In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The way we will go about it:
- we define a new function that takes input ans output path params
- we initialize the iyer stuff as variables/constants
- we'll use enumerate to deal with the audio files, so we get a number (the order) and the file and associate them together


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
import os
import glob
import librosa

def audio_to_spectrogram(input_path, output_path):
    #get all WAV files in the input directory
    audio_files = glob.glob(os.path.join(input_path, '*.wav'))

    #params
    window_ms = 32
    overlap = 0.5  # 50%
    fft_size = 1024
    dpi = 100

    for i, audio_file in enumerate(audio_files):
        try:
            #LOADING WITH LIBROSA!!!
            audio_data, sample_rate = librosa.load(audio_file, sr=None, mono=True)

            # debugging... if loaded file is empty or of different datatype
            if not isinstance(audio_data, np.ndarray):
                print(f"Skipping {audio_file}: audio_data is {type(audio_data)} instead of np.ndarray")
                continue
            if audio_data.size == 0:
                print(f"Skipping {audio_file}: Empty audio array")
                continue

            #calculate window and overlap in samples
            window_size = int(sample_rate * window_ms / 1000)
            noverlap = int(window_size * overlap)

            #generate spectrogram using scipy.signal
            frequencies, times, spectrogram = signal.spectrogram(
                audio_data,
                fs=sample_rate,
                window='hann',
                nperseg=window_size,
                noverlap=noverlap,
                nfft=fft_size,
                mode='magnitude'
            )

            # normalize energy equation: 10*log10(|S| / max(|S|))
            max_val = np.max(spectrogram)
            if max_val == 0:
                max_val = 1e-10 #this is our epsilon, basically nearing zero but not quite
            spectrogram_db = 10 * np.log10(spectrogram / max_val + 1e-10)

            #base filename
            base_filename = os.path.splitext(os.path.basename(audio_file))[0]

            # the actual spectrograms will be made through matplotlib
            fig, ax = plt.subplots(figsize=(6, 6), dpi=dpi)
            ax.pcolormesh(times, frequencies, spectrogram_db, shading='gouraud', cmap='vividis')
            ax.axis('off')  #hide axes
            ax.set_position([0, 0, 1, 1])  # remove margins
            plt.savefig(
                os.path.join(output_path, f'{base_filename}.jpg'),
                dpi=dpi, format='jpg', bbox_inches=None, pad_inches=0
            )
            plt.close(fig)

            print(f"Processed {i+1}/{len(audio_files)}: {base_filename}")

        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

#paaths
pd_audio_path = '/content/drive/MyDrive/wav_Dataset/PD_AH'
hc_audio_path = '/content/drive/MyDrive/wav_Dataset/HC_AH'

print("converting pd")
audio_to_spectrogram(pd_audio_path, '/content/drive/MyDrive/dataset/PD_AH')

print("\n" + "="*50 + "\n")

print("converting hc")
audio_to_spectrogram(hc_audio_path, '/content/drive/MyDrive/dataset/HC_AH')

print("all done lol")

converting pd
Processed 1/40: AH_545622718-C052AD58-5E6B-4ADC-855C-F76B66BAFA6E
Processed 2/40: AH_545622719-52C23861-6E0D-41E0-A3D8-9358C28C019B
Processed 3/40: AH_545622722-3C79DA68-36BB-43A2-B29C-61AEF480E07E
Processed 4/40: AH_545622720-E1486AF6-8C95-47EB-829B-4D62698C987A
Processed 5/40: AH_545789671-794D2256-DDFF-4009-8BA8-8A306C8FA14F
Processed 6/40: AH_545812846-0C14B32A-6C50-4B62-BC89-0A815C2DEEFA
Processed 7/40: AH_545643618-82A143AC-B643-4273-A923-C42A83AEEC5F
Processed 8/40: AH_545789675-243F18DB-4432-4C87-B12C-6EEC2D2D30D6
Processed 9/40: AH_545789682-7554E0C7-4E25-49C3-9E6C-04D525455E28
Processed 10/40: AH_545806325-8A17002B-CFD3-4DCF-8854-04F0F2BFF21B
Processed 11/40: AH_545692315-C2972597-9AEC-4060-A186-F1F59340640C
Processed 12/40: AH_545713224-1B3708B0-8792-4FEE-B03B-C7CB9CB03D58
Processed 13/40: AH_545743929-E2EAE1A3-7E46-4DCF-8DB7-37A5CA47DB9D
Processed 14/40: AH_545753015-58CAA743-BA9A-47E0-B9EF-CC35E9EFB839
Processed 15/40: AH_545789690-DA26461A-AF40-4A43-9662-3A9