<a href="https://colab.research.google.com/github/lukuenya/Bispectrum_Analysis/blob/master/Audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!unzip "/content/drive/MyDrive/Colab Notebooks/audio_lanzhou_2015_org.zip" -d "/content/"

Archive:  /content/drive/MyDrive/Colab Notebooks/audio_lanzhou_2015_org.zip
   creating: /content/audio_lanzhou_2015_org/HC/
   creating: /content/audio_lanzhou_2015_org/HC/02020004/
   creating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/
   creating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/13.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/14.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/15.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/16.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/17.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/18.wav  
   creating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Neutral/
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Neutral/07.wav  
  

In [8]:
import os
import numpy as np
import cupy as cp
import cupyx.scipy.signal as cusignal
import cupyx.scipy.fft as cufft
import matplotlib.pyplot as plt
from scipy.io import wavfile  # Reading audio files can remain on CPU



In [None]:
cupy_version = cp.__version__
print(f"cupy version: {cupy_version}")


cupy version: 12.2.0


In [9]:
fs = 44100  # Sampling frequency in Hz
nfft = 512  # Adjust as needed

# Compute frequency bins once since fs is constant
freqs = np.fft.fftfreq(nfft, d=1/fs)


def compute_bispectrum_gpu(audio_data, nfft=256, noverlap=None):
    """
    Compute the bispectrum of an audio signal using GPU acceleration.

    Parameters:
    - audio_data: 1D numpy array of audio samples.
    - nfft: FFT length.
    - noverlap: Number of points to overlap between segments.

    Returns:
    - bispec: Bispectrum array on GPU.
    """
    if noverlap is None:
        noverlap = nfft // 2

    # Generate window function on CPU
    window_cpu = np.hanning(nfft)

    # Segment the data on CPU
    step = nfft - noverlap
    shape = ((audio_data.size - noverlap) // step, nfft)
    strides = (audio_data.strides[0] * step, audio_data.strides[0])
    segments_cpu = np.lib.stride_tricks.as_strided(audio_data, shape=shape, strides=strides)

    # Apply window function on CPU
    segments_cpu = segments_cpu * window_cpu

    # Transfer windowed segments to GPU
    segments_gpu = cp.asarray(segments_cpu)

    # Compute FFT on GPU
    fft_segments = cufft.fft(segments_gpu, n=nfft, axis=1)

    # Initialize bispectrum accumulator on GPU
    bispec_accum = cp.zeros((nfft, nfft), dtype=cp.complex128)

    # Compute bispectrum on GPU
    num_segments = fft_segments.shape[0]
    for i in range(num_segments):
        X = fft_segments[i]
        X_conj = cp.conj(X)
        # Compute the triple product
        outer_prod = X[:, None] * X[None, :]  # Outer product X(f1) * X(f2)
        sum_indices = (cp.arange(nfft)[:, None] + cp.arange(nfft)) % nfft  # Indices for X*(f1 + f2)
        X_sum_conj = X_conj[sum_indices]
        bispec_accum += outer_prod * X_sum_conj

    # Average over segments
    bispec = bispec_accum / num_segments

    return bispec



In [10]:
# def process_audio_files_gpu(
#     dataset_path,
#     participant_types,
#     emotions,
#     session,
#     nfft=256,
#     output_dir='/content/bispec_data',
#     output_plot_dir='/content/plots'
# ):
#     fs = 44100  # Sampling frequency in Hz
#     freqs = np.fft.fftfreq(nfft, d=1/fs)

#     # Ensure the output directories exist
#     if not os.path.exists(output_dir):
#         os.makedirs(output_dir)
#     if not os.path.exists(output_plot_dir):
#         os.makedirs(output_plot_dir)

#     for participant_type in participant_types:
#         participant_type_path = os.path.join(dataset_path, participant_type)
#         if not os.path.exists(participant_type_path):
#             continue

#         participant_dirs = sorted(os.listdir(participant_type_path))

#         for participant in participant_dirs:
#             participant_path = os.path.join(participant_type_path, participant)
#             session_path = os.path.join(participant_path, session)
#             if not os.path.exists(session_path):
#                 continue

#             for emotion in emotions:
#                 emotion_path = os.path.join(session_path, emotion)
#                 if not os.path.exists(emotion_path):
#                     continue

#                 audio_files = [f for f in os.listdir(emotion_path) if f.endswith('.wav')]

#                 # Initialize variables for accumulating bispectra
#                 total_bispec_gpu = None
#                 file_count = 0

#                 for audio_file in audio_files:
#                     file_path = os.path.join(emotion_path, audio_file)
#                     try:
#                         fs_read, audio_data = wavfile.read(file_path)
#                         # No need to check fs; assuming all files have the same fs
#                         if audio_data.ndim > 1:
#                             audio_data = audio_data[:, 0]  # Use the first channel
#                         audio_data = audio_data.astype(np.float64)

#                         # Compute bispectrum using GPU
#                         bispec_gpu = compute_bispectrum_gpu(audio_data, nfft=nfft)

#                         # Accumulate bispectra
#                         if total_bispec_gpu is None:
#                             total_bispec_gpu = bispec_gpu
#                         else:
#                             total_bispec_gpu += bispec_gpu

#                         file_count += 1

#                         # Free GPU memory for this iteration
#                         del bispec_gpu
#                         cp._default_memory_pool.free_all_blocks()

#                     except Exception as e:
#                         print(f"Error processing {file_path}: {e}")
#                         continue

#                 # After processing all files for this participant, session, and emotion
#                 if file_count > 0:
#                     # Compute the average bispectrum
#                     avg_bispec_gpu = total_bispec_gpu / file_count
#                     avg_bispec = avg_bispec_gpu.get()  # Transfer to CPU

#                     # Save bispectrum data
#                     data_filename = f'{participant}_{session}_{emotion}_average_bispectrum.npy'
#                     data_save_path = os.path.join(output_dir, data_filename)
#                     np.save(data_save_path, avg_bispec)
#                     print(f"Bispectrum data saved to {data_save_path}")

#                     # Plotting
#                     title = f'Bispectrum - {participant} - {session} - {emotion}'
#                     plot_filename = f'{participant}_{session}_{emotion}_average_bispectrum.png'
#                     plot_save_path = os.path.join(output_plot_dir, plot_filename)
#                     plot_bispectrum_triangle_contour(
#                         avg_bispec,
#                         freqs,
#                         max_freq=8000,
#                         levels=20,
#                         cmap='jet',
#                         title=title,
#                         save_path=plot_save_path
#                     ) # or plot_bispectrum

#                     # Free GPU memory
#                     del avg_bispec_gpu, total_bispec_gpu
#                     cp._default_memory_pool.free_all_blocks()
#                 else:
#                     print(f"No valid audio files processed for {participant} - {session} - {emotion}")

def process_audio_files_gpu(
    dataset_path,
    participant_types,
    emotions,
    session,
    nfft=512, # or 256
    output_dir='/content/bispec_data_RV',
    output_plot_dir='/content/plots_RV'
):
    fs = 44100  # Sampling frequency in Hz
    freqs = np.fft.fftfreq(nfft, d=1/fs)

    # Ensure the output directories exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.exists(output_plot_dir):
        os.makedirs(output_plot_dir)

    for participant_type in participant_types:
        participant_type_path = os.path.join(dataset_path, participant_type)
        if not os.path.exists(participant_type_path):
            continue

        # Create subdirectories for participant types in output directories
        output_dir_participant = os.path.join(output_dir, participant_type)
        output_plot_dir_participant = os.path.join(output_plot_dir, participant_type)
        if not os.path.exists(output_dir_participant):
            os.makedirs(output_dir_participant)
        if not os.path.exists(output_plot_dir_participant):
            os.makedirs(output_plot_dir_participant)

        participant_dirs = sorted(os.listdir(participant_type_path))

        for participant in participant_dirs:
            participant_path = os.path.join(participant_type_path, participant)
            session_path = os.path.join(participant_path, session)
            if not os.path.exists(session_path):
                continue

            for emotion in emotions:
                emotion_path = os.path.join(session_path, emotion)
                if not os.path.exists(emotion_path):
                    continue

                audio_files = [f for f in os.listdir(emotion_path) if f.endswith('.wav')]

                # Initialize variables for accumulating bispectra
                total_bispec_gpu = None
                file_count = 0

                for audio_file in audio_files:
                    file_path = os.path.join(emotion_path, audio_file)
                    try:
                        fs_read, audio_data = wavfile.read(file_path)
                        # No need to check fs; assuming all files have the same fs
                        if audio_data.ndim > 1:
                            audio_data = audio_data[:, 0]  # Use the first channel
                        audio_data = audio_data.astype(np.float64)

                        # Compute bispectrum using GPU
                        bispec_gpu = compute_bispectrum_gpu(audio_data, nfft=nfft)

                        # Accumulate bispectra
                        if total_bispec_gpu is None:
                            total_bispec_gpu = bispec_gpu
                        else:
                            total_bispec_gpu += bispec_gpu

                        file_count += 1

                        # Free GPU memory for this iteration
                        del bispec_gpu
                        cp._default_memory_pool.free_all_blocks()

                    except Exception as e:
                        print(f"Error processing {file_path}: {e}")
                        continue

                # After processing all files for this participant, session, and emotion
                if file_count > 0:
                    # Compute the average bispectrum
                    avg_bispec_gpu = total_bispec_gpu / file_count
                    avg_bispec = avg_bispec_gpu.get()  # Transfer to CPU

                    # Save bispectrum data
                    data_filename = f'{participant}_{session}_{emotion}_average_bispectrum.npy'
                    data_save_path = os.path.join(output_dir_participant, data_filename)
                    np.save(data_save_path, avg_bispec)
                    print(f"Bispectrum data saved to {data_save_path}")

                    # Plotting
                    title = f'Bispectrum - {participant} - {session} - {emotion}'
                    plot_filename = f'{participant}_{session}_{emotion}_average_bispectrum.png'
                    plot_save_path = os.path.join(output_plot_dir_participant, plot_filename)
                    plot_bispectrum(
                        avg_bispec,
                        freqs,
                        max_freq=1500, # To be adjusted
                        levels=30,
                        cmap='jet',
                        title=title,
                        save_path=plot_save_path
                    )

                    # Free GPU memory
                    del avg_bispec_gpu, total_bispec_gpu
                    cp._default_memory_pool.free_all_blocks()
                else:
                    print(f"No valid audio files processed for {participant} - {session} - {emotion}")




In [12]:
def plot_bispectrum(bispec, freqs, max_freq=1500, levels=30, cmap='jet', title='Bispectrum', save_path=None):
    # Compute magnitude
    magnitude = np.abs(bispec)

    # Limit frequency range
    freq_indices = np.where((freqs >= 0) & (freqs <= max_freq))[0]
    freqs_limited = freqs[freq_indices]
    magnitude_limited = magnitude[np.ix_(freq_indices, freq_indices)]

    # Plot
    plt.figure(figsize=(10, 8))
    CS = plt.contour(freqs_limited, freqs_limited, magnitude_limited, levels=levels, cmap=cmap)
    plt.colorbar(label='Magnitude')
    plt.title(title)
    plt.xlabel('f1 (Hz)')
    plt.ylabel('f2 (Hz)')
    plt.xlim([0, max_freq])
    plt.ylim([0, max_freq])
    plt.grid(True)

    # Add diagonal line where f1 = f2
    plt.plot([0, max_freq], [0, max_freq], 'k--', linewidth=1)  # Dashed black line

    if save_path:
        plt.savefig(save_path, dpi=300)
        plt.close()
        print(f"Plot saved to {save_path}")
    else:
        plt.show()

# def plot_bispectrum_triangle_contour(
#     bispec,
#     freqs,
#     max_freq=8000,
#     levels=20,
#     cmap='jet',
#     title='Bispectrum',
#     save_path=None
# ):
#     # Compute magnitude
#     magnitude = np.abs(bispec)

#     # Limit frequency range
#     freq_indices = np.where((freqs >= 0) & (freqs <= max_freq))[0]
#     freqs_limited = freqs[freq_indices]
#     magnitude_limited = magnitude[np.ix_(freq_indices, freq_indices)]

#     # Normalize magnitude
#     magnitude_limited /= np.max(magnitude_limited)

#     # Create a mask for the triangular region
#     f1_grid, f2_grid = np.meshgrid(freqs_limited, freqs_limited)
#     # Define the triangular region: f1 >= f2 and f1 + f2 <= max_freq
#     mask = np.logical_and(f1_grid >= f2_grid, (f1_grid + f2_grid) <= max_freq)
#     # Apply mask to the magnitude
#     magnitude_masked = np.where(mask, magnitude_limited, np.nan)

#     # Plot
#     plt.figure(figsize=(10, 8))
#     CS = plt.contour(
#         freqs_limited,
#         freqs_limited,
#         magnitude_masked,
#         levels=levels,
#         cmap=cmap
#     )
#     plt.colorbar(label='Normalized Magnitude')
#     plt.title(title)
#     plt.xlabel('f₁ (Hz)')
#     plt.ylabel('f₂ (Hz)')
#     plt.xlim([0, max_freq])
#     plt.ylim([0, max_freq])
#     plt.grid(True)

#     # Outline the triangular region
#     plt.plot([0, max_freq], [0, max_freq], 'k-', linewidth=0.5)  # Diagonal line f1 = f2
#     plt.plot([0, max_freq], [max_freq, 0], 'k-', linewidth=0.5)  # Line f1 + f2 = max_freq
#     plt.plot([0, 0], [0, max_freq], 'k-', linewidth=0.5)         # y-axis
#     plt.plot([0, max_freq], [0, 0], 'k-', linewidth=0.5)         # x-axis

#     if save_path:
#         plt.savefig(save_path, dpi=300, bbox_inches='tight')
#         plt.close()
#         print(f"Plot saved to {save_path}")
#     else:
#         plt.show()




In [13]:
# # Define the dataset path and parameters
# dataset_path = '/path/to/Audio_lanzhou_2015_org'  # Update this path
# participant_types = ['MDD', 'HC']
# emotions = ['Positive', 'Neutral', 'Negative']
# session = 'Interview'  # Or 'Read_Vocabulary'
# nfft = 256  # Adjust as needed
# fs = 44100  # Sampling frequency in Hz

# # Compute frequency bins once since fs is constant
# freqs = np.fft.fftfreq(nfft, d=1/fs)

# # Define the output directory
# output_directory = '/path/to/output_directory'  # Update this path

# # Create the output directory if it doesn't exist
# if not os.path.exists(output_directory):
#     os.makedirs(output_directory)

# # Compute the average bispectra for each emotion using GPU
# avg_bispecs = process_audio_files_gpu(
#     dataset_path, participant_types, emotions, session, nfft=nfft
# )

# # Plot bispectra for each emotion and save data
# for emotion in emotions:
#     bispec = avg_bispecs[emotion]
#     if bispec is not None:
#         # Save bispectrum data
#         data_filename = f'avg_bispectrum_{session}_{emotion}.npy'
#         data_save_path = os.path.join(output_directory, data_filename)
#         np.save(data_save_path, bispec)
#         print(f"Bispectrum data saved to {data_save_path}")

#         # Plotting
#         title = f'Bispectrum - {session} - {emotion}'
#         plot_filename = f'bispectrum_{session}_{emotion}.png'
#         plot_save_path = os.path.join(output_directory, plot_filename)
#         plot_bispectrum(bispec, freqs, max_freq=8000, levels=20, cmap='jet', title=title, save_path=plot_save_path)
#     else:
#         print(f"No bispectrum data available for emotion: {emotion}")

# Define the dataset path and parameters
dataset_path = '/content/audio_lanzhou_2015_org'  # Update this path
participant_types = ['MDD', 'HC']
emotions = ['Positive', 'Neutral', 'Negative']
session = 'Read_Vocabulary'  # Or 'Interview'
nfft = 512 # or 256 Adjust as needed

# Define the output directories
output_dir = '/content/bispec_data_RV'  # Update this path
output_plot_dir = '/content/plots_RV'  # Update this path

# Call the function to process audio files and save averaged bispectrum data and plots
process_audio_files_gpu(
    dataset_path,
    participant_types,
    emotions,
    session,
    nfft=nfft,
    output_dir=output_dir,
    output_plot_dir=output_plot_dir
)



Bispectrum data saved to /content/bispec_data_RV/MDD/02010001_Read_Vocabulary_Positive_average_bispectrum.npy
Plot saved to /content/plots_RV/MDD/02010001_Read_Vocabulary_Positive_average_bispectrum.png
Bispectrum data saved to /content/bispec_data_RV/MDD/02010001_Read_Vocabulary_Neutral_average_bispectrum.npy
Plot saved to /content/plots_RV/MDD/02010001_Read_Vocabulary_Neutral_average_bispectrum.png
Bispectrum data saved to /content/bispec_data_RV/MDD/02010001_Read_Vocabulary_Negative_average_bispectrum.npy
Plot saved to /content/plots_RV/MDD/02010001_Read_Vocabulary_Negative_average_bispectrum.png
Bispectrum data saved to /content/bispec_data_RV/MDD/02010002_Read_Vocabulary_Positive_average_bispectrum.npy
Plot saved to /content/plots_RV/MDD/02010002_Read_Vocabulary_Positive_average_bispectrum.png
Bispectrum data saved to /content/bispec_data_RV/MDD/02010002_Read_Vocabulary_Neutral_average_bispectrum.npy
Plot saved to /content/plots_RV/MDD/02010002_Read_Vocabulary_Neutral_average_bisp

  fs_read, audio_data = wavfile.read(file_path)


Bispectrum data saved to /content/bispec_data_RV/MDD/02010004_Read_Vocabulary_Neutral_average_bispectrum.npy
Plot saved to /content/plots_RV/MDD/02010004_Read_Vocabulary_Neutral_average_bispectrum.png
Error processing /content/audio_lanzhou_2015_org/MDD/02010004/Read_Vocabulary/Negative/25.wav: File format b'v\x9f\x8e+' not understood. Only 'RIFF' and 'RIFX' supported.
Error processing /content/audio_lanzhou_2015_org/MDD/02010004/Read_Vocabulary/Negative/24.wav: File format b'\xda\xed[\x9d' not understood. Only 'RIFF' and 'RIFX' supported.
No valid audio files processed for 02010004 - Read_Vocabulary - Negative
Bispectrum data saved to /content/bispec_data_RV/MDD/02010005_Read_Vocabulary_Positive_average_bispectrum.npy
Plot saved to /content/plots_RV/MDD/02010005_Read_Vocabulary_Positive_average_bispectrum.png
Bispectrum data saved to /content/bispec_data_RV/MDD/02010005_Read_Vocabulary_Neutral_average_bispectrum.npy
Plot saved to /content/plots_RV/MDD/02010005_Read_Vocabulary_Neutral_

In [14]:
# zip plots directory
import shutil
shutil.make_archive('/content/plots_RV', 'zip', '/content/plots_RV')

'/content/plots_RV.zip'

**Features Extraction**

In [19]:
import os
import numpy as np
import pandas as pd
from scipy.stats import entropy, skew, kurtosis

# Function to extract features from bispectrum data (as defined above)
def extract_bispectrum_features(bispec, fs):
    import numpy as np
    from scipy.stats import entropy, skew, kurtosis

    # Compute magnitude and phase
    bispec_magnitude = np.abs(bispec)
    bispec_phase = np.angle(bispec)

    # Flatten the bispectrum magnitude and phase
    bispec_mag_flat = bispec_magnitude.flatten()
    bispec_phase_flat = bispec_phase.flatten()

    # Avoid log(0) by adding a small epsilon
    eps = 1e-12

    # Bispectral Magnitude Features
    mean_mag = np.mean(bispec_mag_flat)
    max_mag = np.max(bispec_mag_flat)
    sum_mag = np.sum(bispec_mag_flat)
    spectral_flatness = np.exp(np.mean(np.log(bispec_mag_flat + eps))) / (mean_mag + eps)
    entropy_mag = entropy(bispec_mag_flat + eps)

    # Bispectral Phase Features
    mean_phase = np.mean(bispec_phase_flat)
    std_phase = np.std(bispec_phase_flat)
    skew_phase = skew(bispec_phase_flat)
    kurt_phase = kurtosis(bispec_phase_flat)

    # Quadratic Phase Coupling Features
    N = bispec.shape[0]
    indices_qpc = np.unravel_index(np.argmax(bispec_magnitude), bispec_magnitude.shape)
    qpc_strength = bispec_magnitude[indices_qpc]
    qpc_freqs = (indices_qpc[0] * fs / N, indices_qpc[1] * fs / N)

    # Frequency Band Features
    freq_bins = np.fft.fftfreq(N, d=1/fs)
    freq_bins = freq_bins[:N//2]
    bispec_magnitude_half = bispec_magnitude[:N//2, :N//2]

    bands = {
        'low': (0, 1000),
        'mid': (1000, 5000),
        'high': (5000, fs/2)
    }

    band_energies = {}
    for band_name, (fmin, fmax) in bands.items():
        idx = np.where((freq_bins >= fmin) & (freq_bins < fmax))[0]
        if len(idx) > 0:
            band_energy = np.sum(bispec_magnitude_half[np.ix_(idx, idx)])
        else:
            band_energy = 0
        band_energies[f'band_energy_{band_name}'] = band_energy

    # Bispectrum Peaks
    threshold = mean_mag + 2 * np.std(bispec_mag_flat)
    num_peaks = np.sum(bispec_mag_flat > threshold)

    # Statistical Features
    std_mag = np.std(bispec_mag_flat)
    skew_mag = skew(bispec_mag_flat)
    kurt_mag = kurtosis(bispec_mag_flat)

    # Collect all features into a dictionary
    features = {
        'mean_mag': mean_mag,
        'max_mag': max_mag,
        'sum_mag': sum_mag,
        'spectral_flatness': spectral_flatness,
        'entropy_mag': entropy_mag,
        'mean_phase': mean_phase,
        'std_phase': std_phase,
        'skew_phase': skew_phase,
        'kurt_phase': kurt_phase,
        'qpc_strength': qpc_strength,
        'qpc_freq1': qpc_freqs[0],
        'qpc_freq2': qpc_freqs[1],
        **band_energies,
        'num_peaks': num_peaks,
        'std_mag': std_mag,
        'skew_mag': skew_mag,
        'kurt_mag': kurt_mag,
    }

    return features


# Define paths and parameters
output_dir = '/content/bispec_data'  # Update with your output directory
session = 'Interview'  # Or 'Read_Vocabulary'
participant_types = ['MDD', 'HC']
emotions = ['Positive', 'Neutral', 'Negative']

# Initialize a list to store features
features_list = []

# Load bispectrum data and extract features
for participant_type in participant_types:
    participant_output_dir = os.path.join(output_dir, participant_type)
    if not os.path.exists(participant_output_dir):
        continue

    # Assign label based on participant type
    label = 1 if participant_type == 'MDD' else 0

    # List participants in the directory
    participant_files = os.listdir(participant_output_dir)
    for bispec_file in participant_files:
        bispec_path = os.path.join(participant_output_dir, bispec_file)

        # Ensure we're only processing .npy files
        if not bispec_file.endswith('_average_bispectrum.npy'):
            continue

        # Load bispectrum data
        bispec = np.load(bispec_path)

        # Extract participant ID and emotion from filename
        filename_parts = bispec_file.split('_')
        participant = filename_parts[0]
        emotion = filename_parts[2]

        # Sampling frequency
        fs = 44100  # Update if necessary

        # Extract features
        features = extract_bispectrum_features(bispec, fs)

        # Add additional information to features
        features['participant'] = participant
        features['participant_type'] = participant_type
        features['session'] = session
        features['emotion'] = emotion
        features['label'] = label

        # Append features to the list
        features_list.append(features)

# Convert the list of features to a DataFrame
df_features = pd.DataFrame(features_list)

# Save the DataFrame to an Excel or CSV file
excel_file = '/content/features_Interview.xlsx'  # Update the path
df_features.to_excel(excel_file, index=False)
print(f"Features saved to {excel_file}")

# # Optionally, save to CSV
# csv_file = '/path/to/features.csv'  # Update the path
# df_features.to_csv(csv_file, index=False)
# print(f"Features saved to {csv_file}")


Features saved to /content/features_Interview.xlsx
