In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import re
import scipy
import time
import collections
import itertools
import librosa
import pickle

In [9]:
data_raw = pd.read_json(path_or_buf='nsynth-test/examples.json', orient='index')

In [12]:
def feature_extract(file):
    
    y, sr = librosa.load(file)
        

    y_harmonic, y_percussive = librosa.effects.hpss(y)
    if np.mean(y_harmonic)>np.mean(y_percussive):
        harmonic=1
    else:
        harmonic=0
        
    #Mel-frequency cepstral coefficients (MFCCs)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    #temporal averaging
    mfcc=np.mean(mfcc,axis=1)
    
    #get the mel-scaled spectrogram
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128,fmax=8000)  
    #temporally average spectrogram
    spectrogram = np.mean(spectrogram, axis = 1)
    
    #compute chroma energy
    chroma = librosa.feature.chroma_cens(y=y, sr=sr)
    #temporally average chroma
    chroma = np.mean(chroma, axis = 1)
    
    #compute spectral contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast = np.mean(contrast, axis= 1)
    
    return [harmonic, mfcc, spectrogram, chroma, contrast]

In [55]:
#create dictionary to store all test features
filenames_test = data_raw.index.tolist()
dict_test = {}
count = 500
for i in range(0, 100):
    features = feature_extract("nsynth-test/audio/" + filenames_test[i] + '.wav') #specify directory and .wav
    dict_test[filenames_test[i]] = features

In [58]:
features = pd.DataFrame.from_dict(dict_test, orient='index',
                                       columns=['harmonic', 'mfcc', 'spectro', 'chroma', 'contrast'])
features.head()

Unnamed: 0,harmonic,mfcc,spectro,chroma,contrast
bass_synthetic_068-049-025,1,"[-522.8658, 46.664143, 34.379803, 24.787596, 1...","[0.010030984, 0.026242843, 0.043270785, 0.0930...","[0.14567767, 0.8257, 0.07661895, 0.034161784, ...","[35.980837578584854, 29.09917598685963, 27.838..."
keyboard_electronic_001-021-127,0,"[-444.644, 193.89053, 27.298918, -3.6895258, 6...","[0.055242054, 0.1996687, 11.73172, 20.330856, ...","[0.08902063, 0.10597091, 0.077824585, 0.392217...","[19.94874740592431, 19.84175193955027, 18.7577..."
guitar_acoustic_010-066-100,0,"[-439.3734, 59.546818, 2.8950782, -12.039842, ...","[0.0014635242, 0.0026336873, 0.0022025616, 0.0...","[0.10209415, 0.11988667, 0.064387664, 0.072573...","[12.373146069245387, 34.62253462234786, 35.319..."
reed_acoustic_037-068-127,1,"[-372.98746, 68.7968, -39.474766, -22.089417, ...","[0.0001457585, 0.00046337646, 0.00040781248, 2...","[0.05025812, 0.038772635, 0.023343502, 0.36616...","[19.619532208524717, 25.605338163547735, 30.76..."
flute_acoustic_002-077-100,1,"[-364.77966, 12.56691, -42.82651, 19.884623, -...","[4.9267728e-05, 0.00012820741, 0.00046132298, ...","[0.07728491, 0.08046897, 0.073985286, 0.037789...","[20.51479887365293, 10.440213092344615, 40.267..."


In [86]:
#extract mfccs
mfcc_test = pd.DataFrame(features.mfcc.values.tolist(),index=features.index)
mfcc_test = mfcc_test.add_prefix('mfcc_')

#extract spectro
spectro_test = pd.DataFrame(features.spectro.values.tolist(),index=features.index)
spectro_test = spectro_test.add_prefix('spectro_')


#extract chroma
chroma_test = pd.DataFrame(features.chroma.values.tolist(),index=features.index)
chroma_test = chroma_test.add_prefix('chroma_')


#extract contrast
contrast_test = pd.DataFrame(features.contrast.values.tolist(),index=features.index)
contrast_test = chroma_test.add_prefix('contrast_')

#drop the old columns
features = features.drop(labels=['mfcc', 'spectro', 'chroma', 'contrast'], axis=1)

#concatenate
df_features_test=pd.concat([features, mfcc_test, spectro_test, chroma_test, contrast_test],
                           axis=1, join='inner')

In [87]:
def instrument_code(filename):
    """
    Function that takes in a filename and returns instrument based on naming convention
    """
    class_names=['bass', 'brass', 'flute', 'guitar', 
             'keyboard', 'mallet', 'organ', 'reed', 
             'string', 'synth_lead', 'vocal']
    
    for name in class_names:
        if name in filename:
            return class_names.index(name)
    else:
        return None
targets_test = []
for name in df_features_test.index.tolist():
    targets_test.append(instrument_code(name))

df_features_test['targets'] = targets_test
df_features_test.head()

Unnamed: 0,harmonic,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,...,contrast_chroma_3,contrast_chroma_4,contrast_chroma_5,contrast_chroma_6,contrast_chroma_7,contrast_chroma_8,contrast_chroma_9,contrast_chroma_10,contrast_chroma_11,targets
bass_synthetic_068-049-025,1,-522.865784,46.664143,34.379803,24.787596,17.420424,10.198802,4.609535,1.380599,-1.023339,...,0.034162,0.035634,0.025365,0.019434,0.039115,0.092514,0.086638,0.061319,0.091409,0
keyboard_electronic_001-021-127,0,-444.644012,193.890533,27.298918,-3.689526,6.295263,8.093067,10.283858,6.089182,2.495069,...,0.392218,0.563529,0.143236,0.164834,0.211634,0.35358,0.422365,0.01923,0.009007,4
guitar_acoustic_010-066-100,0,-439.373413,59.546818,2.895078,-12.039842,-12.553243,-11.451952,-12.535439,-20.456726,-19.256969,...,0.072573,0.091863,0.09099,0.840737,0.093613,0.048391,0.030883,0.015003,0.009667,3
reed_acoustic_037-068-127,1,-372.987457,68.796799,-39.474766,-22.089417,-50.149693,-16.565742,-29.849129,-16.627388,-17.712208,...,0.366166,0.036109,0.006731,0.023056,0.020713,0.885966,0.061736,0.016221,0.002263,7
flute_acoustic_002-077-100,1,-364.779663,12.56691,-42.826511,19.884623,-29.598228,-21.321129,-18.30831,-9.495083,7.672029,...,0.037789,0.052419,0.91079,0.022027,0.030132,0.009579,0.028886,0.048302,0.01697,2


In [88]:
X = df_features_test.drop(labels=['targets'], axis=1)
y = df_features_test['targets']

In [89]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)
X_train.head()

Unnamed: 0,harmonic,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,...,contrast_chroma_2,contrast_chroma_3,contrast_chroma_4,contrast_chroma_5,contrast_chroma_6,contrast_chroma_7,contrast_chroma_8,contrast_chroma_9,contrast_chroma_10,contrast_chroma_11
keyboard_electronic_001-063-075,0,-486.980774,61.603725,6.447635,4.832292,14.117064,1.65068,-6.994359,-1.949207,0.456022,...,0.081251,0.855828,0.127744,0.08988,0.060506,0.161025,0.081554,0.010892,0.159763,0.0
bass_synthetic_068-042-075,0,-511.22522,56.006729,45.305599,35.635181,28.023079,20.877127,14.919964,11.143857,8.461707,...,0.07312,0.054922,0.055106,0.108761,0.828937,0.116662,0.070248,0.04524,0.025391,0.024701
bass_synthetic_034-086-075,1,-601.557983,-13.29833,-15.405071,7.378573,-12.147111,13.712168,6.80619,13.47465,-7.904801,...,0.771877,0.131279,0.10865,0.06472,0.034783,0.027477,0.022424,0.340835,0.028006,0.042188
bass_synthetic_033-085-050,1,-485.005341,15.44673,-2.398754,-2.644888,4.577124,12.059195,19.964092,18.835772,4.725045,...,0.300473,0.150608,0.100349,0.085223,0.081332,0.04113,0.066574,0.053438,0.03034,0.009502
organ_electronic_113-068-050,0,-365.117584,110.905922,-41.027218,1.542724,7.61323,-32.344093,-22.845663,-5.534832,1.187749,...,0.047647,0.046029,0.041839,0.048347,0.05038,0.324329,0.81618,0.382194,0.034572,0.029736


In [90]:
from sklearn.naive_bayes import GaussianNB
clf_NB = GaussianNB()

#fit to training data
clf_NB.fit(X_train, y_train)

In [93]:
y_pred_NB = clf_NB.predict(X_test)
accuracy_NB = np.mean(y_pred_NB == y_test)
print("The accuracy of Naive Bayes is {0:.2%}".format(accuracy_NB))

The accuracy of Naive Bayes is 23.33%


In [91]:
from sklearn.ensemble import RandomForestClassifier
clf_Rf =RandomForestClassifier(n_estimators=20, max_depth=50, warm_start=True)
clf_Rf.fit(X_train, y_train)

In [92]:
y_pred_RF = clf_Rf.predict(X_test)
accuracy_RF = np.mean(y_pred_RF == y_test)
print("The accuracy of Random Forest is {0:.2%}".format(accuracy_RF))

The accuracy of Random Forest is 36.67%
