In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import numpy as np 
import os 
import librosa
from glob import glob
from tqdm import tqdm
import pdb 
import IPython.display as ipd

In [3]:
data_dir = "/Users/sanchit/Documents/Projects/datasets/audio_data/speech_commands/"

In [4]:
# define parameters 
n_fft = 2048
hop_length = int(n_fft/4)
save_whole_dataset = False 

In [5]:
def compute_mel_spectrogram(data, sampling_rate):
    """compute mel spectrogram of the signal"""
    mel_spectrogram = librosa.feature.melspectrogram(data, sr=sampling_rate)

    return librosa.power_to_db(abs(mel_spectrogram))

In [6]:
def load_data_cleaned(dir_path="", class_mode='binary', classes=None):
    
    dataset = dict() 
    directories=[d for d in os.listdir(dir_path) if os.path.isdir(d) or (not d.startswith("."))]
    for label, class_name in enumerate(directories):
        dataset[class_name] = list() 
        print(f"loading data for class: {class_name}")
        class_dir = os.path.join(dir_path, class_name) 

        print(f"number of files: {len(glob(class_dir + '/*.wav'))}")
        for file_path in glob(class_dir + '/*.wav'):
            audio_data, sampling_rate = librosa.load(file_path, duration=1.0)

            if len(audio_data) > sampling_rate or len(audio_data) < sampling_rate:
                continue 

            mel_spec = compute_mel_spectrogram(audio_data, sampling_rate)

            # save the mel spectrogram features 
            dataset[class_name].append(mel_spec)

    return dataset 

In [7]:
audio_clean_dataset = load_data_cleaned(dir_path=data_dir)

loading data for class: right
number of files: 2367
loading data for class: background
number of files: 0
loading data for class: go
number of files: 2372
loading data for class: no
number of files: 2375
loading data for class: left
number of files: 2353
loading data for class: stop
number of files: 2380
loading data for class: up
number of files: 2375
loading data for class: down
number of files: 2359
loading data for class: yes
number of files: 2377
loading data for class: on
number of files: 2367
loading data for class: off
number of files: 2357


In [21]:
for class_name, class_dataset in audio_clean_dataset.items():
    print(f"checking class {class_name}") 
    for mel_spec in class_dataset:
        if mel_spec.shape != (128, 44):
            print(f"####### ERROR ######")

checking class right
checking class background
checking class go
checking class no
checking class left
checking class stop
checking class up
checking class down
checking class yes
checking class on
checking class off
