- Computes Mel-frequencies and deltas
- Computes MFCC and deltas
- Creates train and test sets for both Mel and MFCC features

In [24]:
import librosa
import numpy as np



# import librosa.display
# from matplotlib import pyplot as plt

# import IPython.display as ipd
# import os, audioread, re
# from matplotlib.pyplot import specgram

# import pandas as pd
# import random as rndm
# from random import shuffle
import glob, audioread





In [18]:
def load_and_read_data(files_path, sr=16000): 
    audio=[]
    sample_rates=[]
    channels=[]
    
    # get all wav files in folder
    sound_file_paths = glob.glob(files_path + '*.wav')
    
    #iterate over files and extract features.
    for file in sound_file_paths:
        ts, sr = librosa.load(file,sr=sr) #librosa returns a time series and sample rate
        audio.append(ts)
        sample_rates.append(sr)  
        with audioread.audio_open(file) as input_file:
            channels.append(input_file.channels)
            
    return audio, sample_rates, channels, sound_file_paths
    
def get_more_audio_features(audio, sr=16000):
    frequencies = []    
    mel_deltas = []
    mfccs = []
    mfcc_deltas = []
    
    for a in audio:
        # Get and store frequencies and their deltas
        fr = librosa.feature.melspectrogram(y=a,sr=sr)
        frequencies.append(fr)
        mel_deltas.append(librosa.feature.delta(fr))
        
        # Get and store mfccs and their deltas        
        mfcc = librosa.feature.mfcc(S=librosa.power_to_db(fr),sr=sr)
        mfccs.append(mfcc)
        mfcc_deltas.append(librosa.feature.delta(mfcc))

    return frequencies, mel_deltas, mfccs, mfcc_deltas

#path for audio files folder:
raw_files_path = 'data/cats_dogs/'

#call the function that will process the data.
audio, sr, channels, file_names = load_and_read_data(raw_files_path)    

#get additional features from audio
frequencies, mel_deltas, mfccs, mfcc_deltas = get_more_audio_features(audio)



In [26]:
a = np.transpose(frequencies[0])[0]
a.shape

(128,)

In [27]:
def build_features_dataframe1(features_name, features_cats, features_dogs, files_id, removeSilentFrames=False, labelSilentFrames=False):
        
    df=pd.DataFrame()
    for index, item in enumerate(features_name):
        
        df_filec=pd.DataFrame([[np.transpose(item)[0]]])        
        for frame_mels in range(1,np.transpose(features_name).shape[0]):
            df_filec=df_filec.append([[np.transpose(item)[frame_mels]]], ignore_index= True)
        df_filec['file_id']=file_names[index]
        df=df_cats.append(df_filec)    
    return df

In [3]:

# GENERIC DATAFRAME that accepts any size features:
#features_name='mel', 'mel_delta', 'mfccs', 'mfcc_delta'
#features_cats is a list with the features extracted for each file.
def build_features_Dataframe(features_name, features_cats, features_dogs, files_id, removeSilentFrames=False, labelSilentFrames=False):
        
    df_cats=pd.DataFrame()
    for f in range(0, len(features_cats)):
        df_filec=pd.DataFrame([[np.transpose(features_cats[f])[0]]])
        for frame_mels in range(1,np.transpose(features_cats[f]).shape[0]):
            df_filec=df_filec.append([[np.transpose(features_cats[f])[frame_mels]]], ignore_index= True)
   
        df_filec['file_id']=files_id[f]
        df_cats=df_cats.append(df_filec)    

    df_cats['label']=catLabel
    df_cats.columns = [features_name, 'File_id', 'Label']

    df_dogs=pd.DataFrame()
    for fd in range(0, len(features_dogs)):
        df_file=pd.DataFrame([[np.transpose(features_dogs[fd])[0]]])
        for frame_mels in range(1,np.transpose(features_dogs[fd]).shape[0]):
            df_file=df_file.append([[np.transpose(features_dogs[fd])[frame_mels]]], ignore_index= True)

        df_file['file_id']=files_id[fd+n_cats]
        df_dogs=df_dogs.append(df_file)    

    df_dogs['label']=dogLabel
    df_dogs.columns = [features_name, 'File_id', 'Label']
    return df_cats, df_dogs




def split_dataframes(cats_dataframe,dogs_dataframe,test_size=0.3):

    #keep unbalanced dataset in the training for now: 0.7*164 cats+ 0.7*113 dogs
    n_train_files_cats=(int((1-test_size)*n_cats))
    n_train_files_dogs=(int((1-test_size)*n_dogs))
    
    # select 30% as test and 70% as train at file level! 
    files_id_cats=files_id[0:n_cats]
    files_id_dogs=files_id[n_cats:n_cats+n_dogs]
    rndm.shuffle(files_id_cats)

    files_id_cats_train=[]
    files_id_cats_test=[]
    for i in range(0,n_train_files_cats):
        files_id_cats_train.append(files_id_cats[i])
    files_id_cats_test=files_id_cats[n_train_files_cats:]

    
    rndm.shuffle(files_id_dogs)    
    files_id_dogs_train=[]
    files_id_dogs_test=[]
    for i in range(0,n_train_files_dogs):
        files_id_dogs_train.append(files_id_dogs[i])
    files_id_dogs_test=files_id_dogs[n_train_files_dogs:]

    dftrain_cats=cats_dataframe.loc[cats_dataframe['File_id'].isin(files_id_cats_train)]
    dftest_cats=cats_dataframe.loc[cats_dataframe['File_id'].isin(files_id_cats_test)]
    dftrain_dogs=dogs_dataframe.loc[dogs_dataframe['File_id'].isin(files_id_dogs_train)]
    dftest_dogs=dogs_dataframe.loc[dogs_dataframe['File_id'].isin(files_id_dogs_test)]

    # concatenate in two dataframe test and train.
    df_TRAIN=pd.DataFrame()
    df_TEST=pd.DataFrame()
    df_TRAIN=df_TRAIN.append(dftrain_cats)
    df_TRAIN=df_TRAIN.append(dftrain_dogs)
    df_TEST=df_TEST.append(dftest_cats)
    df_TEST=df_TEST.append(dftest_dogs)


    # # shuffle...
    df_TEST=df_TEST.sample(frac=1)
    df_TRAIN=df_TRAIN.sample(frac=1)
    return df_TRAIN, df_TEST

In [4]:
#load the files:
Cats_audio, cats_sr, channel_cats, Dogs_audio, dogs_sr, channel_dogs= load_data( sound_file_paths, sr=None)
sr=16000
#Feature computation

cats_mel_frequencies,dogs_mel_frequencies=compute_mel_frequencies(Cats_audio,Dogs_audio , sr)
cats_mel_deltas, dogs_mel_deltas=compute_MEL_deltas(cats_mel_frequencies,dogs_mel_frequencies)
cats_mfccs, dogs_mfccs= compute_mfccs(cats_mel_frequencies,dogs_mel_frequencies,sr)
cats_mfcc_delta, dogs_mfcc_delta=compute_MFCC_deltas(cats_mfccs,dogs_mfccs)

In [5]:
#Features Dataframes per class:

df_cats_melDeltas, df_dogs_melDeltas =build_features_Dataframe('Mel_deltas',cats_mel_deltas, dogs_mel_deltas, files_id )
df_cats_mel, df_dogs_mel =build_features_Dataframe('Mel',cats_mel_frequencies, dogs_mel_frequencies, files_id )
df_cats_MFCCDeltas, df_dogs_MFCCDeltas =build_features_Dataframe('MFCC_deltas',cats_mfcc_delta, dogs_mfcc_delta, files_id )
df_cats_MFCC, df_dogs_MFCC =build_features_Dataframe('MFCC',cats_mfccs, dogs_mfccs, files_id )


# Features concatenation dataframes:
df_cats_melANDdeltas=pd.DataFrame()
df_cats_MFCC_AND_Deltas=pd.DataFrame()
df_dogs_melANDdeltas=pd.DataFrame()
df_dogs_MFCC_AND_Deltas=pd.DataFrame()

df_cats_melANDdeltas=pd.concat([df_cats_mel, df_cats_melDeltas['Mel_deltas']], axis=1 )
df_cats_MFCC_AND_Deltas = pd.concat([df_cats_MFCC, df_cats_MFCCDeltas['MFCC_deltas']] , axis=1 )
df_dogs_melANDdeltas = pd.concat([df_dogs_mel, df_dogs_melDeltas['Mel_deltas']] , axis=1)
df_dogs_MFCC_AND_Deltas = pd.concat([df_dogs_MFCC, df_dogs_MFCCDeltas['MFCC_deltas']],axis=1 )

In [6]:
#creates Test and Train Sets and saves the dataframes:

df_TRAIN_final=pd.DataFrame() 
df_TEST_final=pd.DataFrame() 
df_TRAIN_final, df_TEST_final = split_dataframes( df_cats_melANDdeltas, df_dogs_melANDdeltas, test_size=0.3)
df_TRAIN_final.to_pickle('.\Features_sets/'+ 'Train_MELandDeltas.pkl')  
df_TEST_final.to_pickle('.\Features_sets/'+ 'Test_MELandDeltas.pkl')


df_TRAIN_final=pd.DataFrame() 
df_TEST_final=pd.DataFrame() 
df_TRAIN_final, df_TEST_final = split_dataframes( df_cats_MFCC_AND_Deltas, df_dogs_MFCC_AND_Deltas, test_size=0.3)
df_TRAIN_final.to_pickle('.\Features_sets/'+ 'Train_MFCCandDeltas.pkl')  
df_TEST_final.to_pickle('.\Features_sets/'+ 'Test_MFCCandDeltas.pkl')