<a href="https://colab.research.google.com/github/lukuenya/Bispectrum_Analysis/blob/master/Audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!unzip "/content/drive/MyDrive/Colab Notebooks/audio_lanzhou_2015_org.zip" -d "/content/"

Archive:  /content/drive/MyDrive/Colab Notebooks/audio_lanzhou_2015_org.zip
   creating: /content/audio_lanzhou_2015_org/HC/
   creating: /content/audio_lanzhou_2015_org/HC/02020004/
   creating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/
   creating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/13.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/14.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/15.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/16.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/17.wav  
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Negative/18.wav  
   creating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Neutral/
  inflating: /content/audio_lanzhou_2015_org/HC/02020004/Interview/Neutral/07.wav  
  

In [2]:
import os
import numpy as np
import cupy as cp
import cupyx.scipy.signal as cusignal
import cupyx.scipy.fft as cufft
import matplotlib.pyplot as plt
from scipy.io import wavfile  # Reading audio files can remain on CPU



In [18]:
cupy_version = cp.__version__
print(f"cupy version: {cupy_version}")


cupy version: 12.2.0


In [3]:
fs = 44100  # Sampling frequency in Hz
nfft = 512  # Adjust as needed

# Compute frequency bins once since fs is constant
freqs = np.fft.fftfreq(nfft, d=1/fs)


def compute_bispectrum_gpu(audio_data, nfft=256, noverlap=None):
    """
    Compute the bispectrum of an audio signal using GPU acceleration.

    Parameters:
    - audio_data: 1D numpy array of audio samples.
    - nfft: FFT length.
    - noverlap: Number of points to overlap between segments.

    Returns:
    - bispec: Bispectrum array on GPU.
    """
    if noverlap is None:
        noverlap = nfft // 2

    # Generate window function on CPU
    window_cpu = np.hanning(nfft)

    # Segment the data on CPU
    step = nfft - noverlap
    shape = ((audio_data.size - noverlap) // step, nfft)
    strides = (audio_data.strides[0] * step, audio_data.strides[0])
    segments_cpu = np.lib.stride_tricks.as_strided(audio_data, shape=shape, strides=strides)

    # Apply window function on CPU
    segments_cpu = segments_cpu * window_cpu

    # Transfer windowed segments to GPU
    segments_gpu = cp.asarray(segments_cpu)

    # Compute FFT on GPU
    fft_segments = cufft.fft(segments_gpu, n=nfft, axis=1)

    # Initialize bispectrum accumulator on GPU
    bispec_accum = cp.zeros((nfft, nfft), dtype=cp.complex128)

    # Compute bispectrum on GPU
    num_segments = fft_segments.shape[0]
    for i in range(num_segments):
        X = fft_segments[i]
        X_conj = cp.conj(X)
        # Compute the triple product
        outer_prod = X[:, None] * X[None, :]  # Outer product X(f1) * X(f2)
        sum_indices = (cp.arange(nfft)[:, None] + cp.arange(nfft)) % nfft  # Indices for X*(f1 + f2)
        X_sum_conj = X_conj[sum_indices]
        bispec_accum += outer_prod * X_sum_conj

    # Average over segments
    bispec = bispec_accum / num_segments

    return bispec



In [4]:
# def process_audio_files_gpu(
#     dataset_path,
#     participant_types,
#     emotions,
#     session,
#     nfft=256,
#     output_dir='/content/bispec_data',
#     output_plot_dir='/content/plots'
# ):
#     fs = 44100  # Sampling frequency in Hz
#     freqs = np.fft.fftfreq(nfft, d=1/fs)

#     # Ensure the output directories exist
#     if not os.path.exists(output_dir):
#         os.makedirs(output_dir)
#     if not os.path.exists(output_plot_dir):
#         os.makedirs(output_plot_dir)

#     for participant_type in participant_types:
#         participant_type_path = os.path.join(dataset_path, participant_type)
#         if not os.path.exists(participant_type_path):
#             continue

#         participant_dirs = sorted(os.listdir(participant_type_path))

#         for participant in participant_dirs:
#             participant_path = os.path.join(participant_type_path, participant)
#             session_path = os.path.join(participant_path, session)
#             if not os.path.exists(session_path):
#                 continue

#             for emotion in emotions:
#                 emotion_path = os.path.join(session_path, emotion)
#                 if not os.path.exists(emotion_path):
#                     continue

#                 audio_files = [f for f in os.listdir(emotion_path) if f.endswith('.wav')]

#                 # Initialize variables for accumulating bispectra
#                 total_bispec_gpu = None
#                 file_count = 0

#                 for audio_file in audio_files:
#                     file_path = os.path.join(emotion_path, audio_file)
#                     try:
#                         fs_read, audio_data = wavfile.read(file_path)
#                         # No need to check fs; assuming all files have the same fs
#                         if audio_data.ndim > 1:
#                             audio_data = audio_data[:, 0]  # Use the first channel
#                         audio_data = audio_data.astype(np.float64)

#                         # Compute bispectrum using GPU
#                         bispec_gpu = compute_bispectrum_gpu(audio_data, nfft=nfft)

#                         # Accumulate bispectra
#                         if total_bispec_gpu is None:
#                             total_bispec_gpu = bispec_gpu
#                         else:
#                             total_bispec_gpu += bispec_gpu

#                         file_count += 1

#                         # Free GPU memory for this iteration
#                         del bispec_gpu
#                         cp._default_memory_pool.free_all_blocks()

#                     except Exception as e:
#                         print(f"Error processing {file_path}: {e}")
#                         continue

#                 # After processing all files for this participant, session, and emotion
#                 if file_count > 0:
#                     # Compute the average bispectrum
#                     avg_bispec_gpu = total_bispec_gpu / file_count
#                     avg_bispec = avg_bispec_gpu.get()  # Transfer to CPU

#                     # Save bispectrum data
#                     data_filename = f'{participant}_{session}_{emotion}_average_bispectrum.npy'
#                     data_save_path = os.path.join(output_dir, data_filename)
#                     np.save(data_save_path, avg_bispec)
#                     print(f"Bispectrum data saved to {data_save_path}")

#                     # Plotting
#                     title = f'Bispectrum - {participant} - {session} - {emotion}'
#                     plot_filename = f'{participant}_{session}_{emotion}_average_bispectrum.png'
#                     plot_save_path = os.path.join(output_plot_dir, plot_filename)
#                     plot_bispectrum_triangle_contour(
#                         avg_bispec,
#                         freqs,
#                         max_freq=8000,
#                         levels=20,
#                         cmap='jet',
#                         title=title,
#                         save_path=plot_save_path
#                     ) # or plot_bispectrum

#                     # Free GPU memory
#                     del avg_bispec_gpu, total_bispec_gpu
#                     cp._default_memory_pool.free_all_blocks()
#                 else:
#                     print(f"No valid audio files processed for {participant} - {session} - {emotion}")

def process_audio_files_gpu(
    dataset_path,
    participant_types,
    emotions,
    session,
    nfft=256,
    output_dir='/content/bispec_data',
    output_plot_dir='/content/plots'
):
    fs = 44100  # Sampling frequency in Hz
    freqs = np.fft.fftfreq(nfft, d=1/fs)

    # Ensure the output directories exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.exists(output_plot_dir):
        os.makedirs(output_plot_dir)

    for participant_type in participant_types:
        participant_type_path = os.path.join(dataset_path, participant_type)
        if not os.path.exists(participant_type_path):
            continue

        # Create subdirectories for participant types in output directories
        output_dir_participant = os.path.join(output_dir, participant_type)
        output_plot_dir_participant = os.path.join(output_plot_dir, participant_type)
        if not os.path.exists(output_dir_participant):
            os.makedirs(output_dir_participant)
        if not os.path.exists(output_plot_dir_participant):
            os.makedirs(output_plot_dir_participant)

        participant_dirs = sorted(os.listdir(participant_type_path))

        for participant in participant_dirs:
            participant_path = os.path.join(participant_type_path, participant)
            session_path = os.path.join(participant_path, session)
            if not os.path.exists(session_path):
                continue

            for emotion in emotions:
                emotion_path = os.path.join(session_path, emotion)
                if not os.path.exists(emotion_path):
                    continue

                audio_files = [f for f in os.listdir(emotion_path) if f.endswith('.wav')]

                # Initialize variables for accumulating bispectra
                total_bispec_gpu = None
                file_count = 0

                for audio_file in audio_files:
                    file_path = os.path.join(emotion_path, audio_file)
                    try:
                        fs_read, audio_data = wavfile.read(file_path)
                        # No need to check fs; assuming all files have the same fs
                        if audio_data.ndim > 1:
                            audio_data = audio_data[:, 0]  # Use the first channel
                        audio_data = audio_data.astype(np.float64)

                        # Compute bispectrum using GPU
                        bispec_gpu = compute_bispectrum_gpu(audio_data, nfft=nfft)

                        # Accumulate bispectra
                        if total_bispec_gpu is None:
                            total_bispec_gpu = bispec_gpu
                        else:
                            total_bispec_gpu += bispec_gpu

                        file_count += 1

                        # Free GPU memory for this iteration
                        del bispec_gpu
                        cp._default_memory_pool.free_all_blocks()

                    except Exception as e:
                        print(f"Error processing {file_path}: {e}")
                        continue

                # After processing all files for this participant, session, and emotion
                if file_count > 0:
                    # Compute the average bispectrum
                    avg_bispec_gpu = total_bispec_gpu / file_count
                    avg_bispec = avg_bispec_gpu.get()  # Transfer to CPU

                    # Save bispectrum data
                    data_filename = f'{participant}_{session}_{emotion}_average_bispectrum.npy'
                    data_save_path = os.path.join(output_dir_participant, data_filename)
                    np.save(data_save_path, avg_bispec)
                    print(f"Bispectrum data saved to {data_save_path}")

                    # Plotting
                    title = f'Bispectrum - {participant} - {session} - {emotion}'
                    plot_filename = f'{participant}_{session}_{emotion}_average_bispectrum.png'
                    plot_save_path = os.path.join(output_plot_dir_participant, plot_filename)
                    plot_bispectrum_triangle_contour(
                        avg_bispec,
                        freqs,
                        max_freq=8000,
                        levels=20,
                        cmap='jet',
                        title=title,
                        save_path=plot_save_path
                    )

                    # Free GPU memory
                    del avg_bispec_gpu, total_bispec_gpu
                    cp._default_memory_pool.free_all_blocks()
                else:
                    print(f"No valid audio files processed for {participant} - {session} - {emotion}")




In [5]:
# def plot_bispectrum(bispec, freqs, max_freq=8000, levels=20, cmap='jet', title='Bispectrum', save_path=None):
#     # Compute magnitude
#     magnitude = np.abs(bispec)

#     # Limit frequency range
#     freq_indices = np.where((freqs >= 0) & (freqs <= max_freq))[0]
#     freqs_limited = freqs[freq_indices]
#     magnitude_limited = magnitude[np.ix_(freq_indices, freq_indices)]

#     # Plot
#     plt.figure(figsize=(10, 8))
#     CS = plt.contour(freqs_limited, freqs_limited, magnitude_limited, levels=levels, cmap=cmap)
#     plt.colorbar(label='Magnitude')
#     plt.title(title)
#     plt.xlabel('f1 (Hz)')
#     plt.ylabel('f2 (Hz)')
#     plt.xlim([0, max_freq])
#     plt.ylim([0, max_freq])
#     plt.grid(True)

#     # Add diagonal line where f1 = f2
#     plt.plot([0, max_freq], [0, max_freq], 'k--', linewidth=1)  # Dashed black line

#     if save_path:
#         plt.savefig(save_path, dpi=300)
#         plt.close()
#         print(f"Plot saved to {save_path}")
#     else:
#         plt.show()

def plot_bispectrum_triangle_contour(
    bispec,
    freqs,
    max_freq=8000,
    levels=20,
    cmap='jet',
    title='Bispectrum',
    save_path=None
):
    # Compute magnitude
    magnitude = np.abs(bispec)

    # Limit frequency range
    freq_indices = np.where((freqs >= 0) & (freqs <= max_freq))[0]
    freqs_limited = freqs[freq_indices]
    magnitude_limited = magnitude[np.ix_(freq_indices, freq_indices)]

    # Normalize magnitude
    magnitude_limited /= np.max(magnitude_limited)

    # Create a mask for the triangular region
    f1_grid, f2_grid = np.meshgrid(freqs_limited, freqs_limited)
    # Define the triangular region: f1 >= f2 and f1 + f2 <= max_freq
    mask = np.logical_and(f1_grid >= f2_grid, (f1_grid + f2_grid) <= max_freq)
    # Apply mask to the magnitude
    magnitude_masked = np.where(mask, magnitude_limited, np.nan)

    # Plot
    plt.figure(figsize=(10, 8))
    CS = plt.contour(
        freqs_limited,
        freqs_limited,
        magnitude_masked,
        levels=levels,
        cmap=cmap
    )
    plt.colorbar(label='Normalized Magnitude')
    plt.title(title)
    plt.xlabel('f₁ (Hz)')
    plt.ylabel('f₂ (Hz)')
    plt.xlim([0, max_freq])
    plt.ylim([0, max_freq])
    plt.grid(True)

    # Outline the triangular region
    plt.plot([0, max_freq], [0, max_freq], 'k-', linewidth=0.5)  # Diagonal line f1 = f2
    plt.plot([0, max_freq], [max_freq, 0], 'k-', linewidth=0.5)  # Line f1 + f2 = max_freq
    plt.plot([0, 0], [0, max_freq], 'k-', linewidth=0.5)         # y-axis
    plt.plot([0, max_freq], [0, 0], 'k-', linewidth=0.5)         # x-axis

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"Plot saved to {save_path}")
    else:
        plt.show()




In [6]:
# # Define the dataset path and parameters
# dataset_path = '/path/to/Audio_lanzhou_2015_org'  # Update this path
# participant_types = ['MDD', 'HC']
# emotions = ['Positive', 'Neutral', 'Negative']
# session = 'Interview'  # Or 'Read_Vocabulary'
# nfft = 256  # Adjust as needed
# fs = 44100  # Sampling frequency in Hz

# # Compute frequency bins once since fs is constant
# freqs = np.fft.fftfreq(nfft, d=1/fs)

# # Define the output directory
# output_directory = '/path/to/output_directory'  # Update this path

# # Create the output directory if it doesn't exist
# if not os.path.exists(output_directory):
#     os.makedirs(output_directory)

# # Compute the average bispectra for each emotion using GPU
# avg_bispecs = process_audio_files_gpu(
#     dataset_path, participant_types, emotions, session, nfft=nfft
# )

# # Plot bispectra for each emotion and save data
# for emotion in emotions:
#     bispec = avg_bispecs[emotion]
#     if bispec is not None:
#         # Save bispectrum data
#         data_filename = f'avg_bispectrum_{session}_{emotion}.npy'
#         data_save_path = os.path.join(output_directory, data_filename)
#         np.save(data_save_path, bispec)
#         print(f"Bispectrum data saved to {data_save_path}")

#         # Plotting
#         title = f'Bispectrum - {session} - {emotion}'
#         plot_filename = f'bispectrum_{session}_{emotion}.png'
#         plot_save_path = os.path.join(output_directory, plot_filename)
#         plot_bispectrum(bispec, freqs, max_freq=8000, levels=20, cmap='jet', title=title, save_path=plot_save_path)
#     else:
#         print(f"No bispectrum data available for emotion: {emotion}")

# Define the dataset path and parameters
dataset_path = '/content/audio_lanzhou_2015_org'  # Update this path
participant_types = ['MDD', 'HC']
emotions = ['Positive', 'Neutral', 'Negative']
session = 'Interview'  # Or 'Read_Vocabulary'
nfft = 256  # Adjust as needed

# Define the output directories
output_dir = '/content/bispec_data'  # Update this path
output_plot_dir = '/content/plots'  # Update this path

# Call the function to process audio files and save averaged bispectrum data and plots
process_audio_files_gpu(
    dataset_path,
    participant_types,
    emotions,
    session,
    nfft=nfft,
    output_dir=output_dir,
    output_plot_dir=output_plot_dir
)



Bispectrum data saved to /content/bispec_data/02010001_Interview_Positive_average_bispectrum.npy
Plot saved to /content/plots/02010001_Interview_Positive_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010001_Interview_Neutral_average_bispectrum.npy
Plot saved to /content/plots/02010001_Interview_Neutral_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010001_Interview_Negative_average_bispectrum.npy
Plot saved to /content/plots/02010001_Interview_Negative_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010002_Interview_Positive_average_bispectrum.npy
Plot saved to /content/plots/02010002_Interview_Positive_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010002_Interview_Neutral_average_bispectrum.npy
Plot saved to /content/plots/02010002_Interview_Neutral_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010002_Interview_Negative_average_bispectrum.npy
Plot saved to /content

  fs_read, audio_data = wavfile.read(file_path)


Bispectrum data saved to /content/bispec_data/02010025_Interview_Positive_average_bispectrum.npy
Plot saved to /content/plots/02010025_Interview_Positive_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010025_Interview_Neutral_average_bispectrum.npy
Plot saved to /content/plots/02010025_Interview_Neutral_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010025_Interview_Negative_average_bispectrum.npy
Plot saved to /content/plots/02010025_Interview_Negative_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010034_Interview_Positive_average_bispectrum.npy
Plot saved to /content/plots/02010034_Interview_Positive_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010034_Interview_Neutral_average_bispectrum.npy
Plot saved to /content/plots/02010034_Interview_Neutral_average_bispectrum.png
Bispectrum data saved to /content/bispec_data/02010034_Interview_Negative_average_bispectrum.npy
Plot saved to /content

In [9]:
# zip plots directory
import shutil
shutil.make_archive('/content/plots', 'zip', '/content/plots')

'/content/plots.zip'