In [None]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import sklearn.preprocessing
import librosa
import librosa.display
import IPython.display
import os
import pandas as pd

In [None]:
audioFolders = os.listdir("speech_commands_v0.01")
filesToRemove = ["README.md","LICENSE", "testing_list.txt", "validation_list.txt",".DS_Store", "_background_noise_", 'nine','eight','tree','three', 'sheila', 'one', 'bird', 'zero', 'seven', 'up', 'marvin', 'two', 'house', 'down', 'six', 'yes', 'on', 'five', 'off', 'four','cat', 'bed', 'happy','wow','dog']

for file in filesToRemove:
        audioFolders.remove(file)
print(audioFolders)
print(f"Number of classes: {len(audioFolders)}")

In [None]:
def extract_feature_2d(file_name, n_mfcc, window, hop_length, n_fft, pad):
    max_pad_len_mel = pad
    max_pad_len_mfcc = pad
    
    audio, sample_rate = librosa.load(file_name) 


    
    #Melspectrogram
    mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate, window=window, hop_length=hop_length, n_fft=n_fft) 
    mel = librosa.power_to_db(mel)    
    pad_width = max_pad_len_mel - mel.shape[1]
    mel_spec = np.pad(mel, pad_width=((0, 0), (0, pad_width)), mode='constant')
    
    #MFCC
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc, window=window, hop_length=hop_length, n_fft=n_fft)
    pad_width = max_pad_len_mfcc - mfccs.shape[1]
    mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
  
    
    return mel_spec, mfccs


In [None]:
from tqdm.notebook import tqdm

features = []
features2d = []
featuresMelspec = []
featuresMfcc = []
#MFCC

n_mfcc = 12
window = "hamming"
hop_length = 128
n_fft = 882
pad = 173



#pad = sr/hop = 22050/hop



for label in tqdm(audioFolders):
    print(f"Audio: {label}")
    
    audios = os.listdir(f"speech_commands_v0.01/{label}")
    

    for audio in tqdm(audios):
        if ".wav" not in audio:
            print(f"Not audio: {audio}")
            continue
            
        file_name = f"speech_commands_v0.01/{label}/{audio}"

        mel_spec, mfccs = extract_feature_2d(file_name, n_mfcc, window, hop_length, n_fft, pad)

        featuresMelspec.append([mel_spec,label, audio])
        featuresMfcc.append([mfccs,label, audio])

In [None]:
# Convert into a Panda dataframe 
featuresMelspecDf = pd.DataFrame(featuresMelspec, columns=['feature','class_label','file_name'])
featuresMfccDf = pd.DataFrame(featuresMfcc, columns=['feature2d','class_label','file_name'])


In [None]:
# Save features
import pickle

with open(f"features/melspec_N{n_mfcc}_W{window}_WL{n_fft}_HL{hop_length}.pickle", 'wb') as output:
    pickle.dump(featuresMelspecDf, output)
    
with open(f"features/mfcc_2d_N{n_mfcc}_W{window}_WL{n_fft}_HL{hop_length}.pickle", 'wb') as output:
    pickle.dump(featuresMfccDf, output)