In [None]:
import librosa
import numpy as np
import pandas as pd
import glob, audioread
from util_functions import capture_class, random_shuffle, load_and_save_mel_data, load_and_save_mel_delta_data

In [2]:
def load_and_read_data(files_path, sr=16000): 
    audio, sample_rates, channels=[],[],[]
    
    # get all wav files in folder
    sound_file_paths = glob.glob(files_path + '*.wav')
    
    #iterate over files and extract features.
    for file in sound_file_paths:
        ts, sr = librosa.load(file,sr=sr) #librosa returns a time series and sample rate
        audio.append(ts)
        sample_rates.append(sr)  
        with audioread.audio_open(file) as input_file:
            channels.append(input_file.channels)
            
    return audio, sample_rates, channels, sound_file_paths
    
def get_more_audio_features(audio, sr=16000):
    frequencies, mel_deltas, mfccs, mfcc_deltas = [],[],[],[]    
    
    for a in audio:
        # Get and store frequencies and their deltas
        fr = librosa.feature.melspectrogram(y=a,sr=sr)
        frequencies.append(fr)
        mel_deltas.append(librosa.feature.delta(fr))
        
        # Get and store mfccs and their deltas        
        mfcc = librosa.feature.mfcc(S=librosa.power_to_db(fr),sr=sr)
        mfccs.append(mfcc)
        mfcc_deltas.append(librosa.feature.delta(mfcc))

    return frequencies, mel_deltas, mfccs, mfcc_deltas

In [3]:
#path for audio files folder:
raw_files_path = '../data/cats_dogs/'

#call the function that will process the data.
audio, sr, channels, file_names = load_and_read_data(raw_files_path)    

#get additional features from audio
frequencies, mel_deltas, mfccs, mfcc_deltas = get_more_audio_features(audio)


#Combining the lists into a single dataframe
#The result will be a row per file with several attributes.
features_df = pd.DataFrame({'audio': audio,
                            'sample_rates': sr,
                            'channels': channels,
                            'file_name': file_names,
                            'Mel': frequencies,
                            'Mel_deltas': mel_deltas,
                            'mfccs': mfccs,
                            'mfcc_deltas': mfcc_deltas,
                            'File_id': [f.replace('../data/cats_dogs/', '').replace('.wav', '') for f in file_names]                            
                           })

#Adding the class label to the dataframe
features_df['Label'] = features_df.apply(lambda row: capture_class(row['File_id']), axis=1)

#We'll shuffle our dataframe for each class and split into training and test set
training_df, test_df = random_shuffle(features_df, seed=1)

#save as pickles
training_df.to_pickle('../data_processed/'+ 'training_set.pkl')  
test_df.to_pickle('../data_processed/'+ 'test_set.pkl')



In [None]:
#Processing the mels and MFCCs into numpy files for clustering.
files_path = '../data/cats_dogs/'
load_and_save_mel_data(files_path=files_path, sr=22050, dest_path='../data_processed/features_mel_spectrograms/')
load_and_save_mel_delta_data(files_path=files_path, sr=22050, dest_path='../data_processed/features_delta_spectograms/')