In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import xgboost as xgb
import re
from os import path, listdir
from ipywidgets import FloatProgress
from matplotlib.colors import ListedColormap
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
def tolerant_score(expected, predicted):
    score = 0.
    for one, two in zip(expected, predicted):
        for xx, yy in zip(one, two):
            if xx == yy:
                score += 0.33
    return score / len(expected)

In [3]:
def get_frames(audio, fr = 44100., fs = 0.33):
    return [audio[x:x+int(fr * fs)] for x in [k * int(fr * fs) for k in range(int(1 / fs))]]

In [4]:
from essentia.standard import Windowing, ZeroCrossingRate, Energy
from essentia.standard import Spectrum, Centroid, CentralMoments, DistributionShape, MFCC

window = Windowing()
zcr = ZeroCrossingRate()
energy = Energy()
spectrum = Spectrum()
centroid = Centroid()
cm = CentralMoments()
dshape = DistributionShape()
mfcc = MFCC()

def get_features(frame):
    spec = spectrum(window(frame))
    bands, coefs = mfcc(spec)
    res = [zcr(frame), energy(frame), centroid(spec)]
    res.extend(dshape(cm(spec)))
    res.extend(bands)
    res.extend(coefs)
    return res

In [5]:
from sklearn.preprocessing import StandardScaler

data = pd.read_csv('thebestclasses.csv', index_col='Unnamed: 0')

X = data.drop(['Label', 'Bands mean', 'Coefs mean'], axis = 1)
y = data['Label']

sc = StandardScaler()
X_train = StandardScaler.fit_transform(sc, X)

In [6]:
from essentia.standard import MonoLoader

list_of_files = listdir(path.join(path.abspath(path.curdir), 'testing'))
list_of_instr = ['pia', 'vio', 'tru']

wav_ptn = re.compile('wav')

audios = []

for fname in list_of_files:
    if re.search(wav_ptn, fname):
        audios.append(MonoLoader(filename=path.join(path.abspath(path.curdir), 'testing', fname))())
        
print list_of_files

['01 - Da Duh Dah-5.wav', '01 - Chet Baker - Prayer For The Newborn-20.txt', "01. I Don't Worry About a Thing-4.wav", '01 - Da Duh Dah-5.txt', 'Tool - Lateralis-5.wav', '01 - Chet Baker - Prayer For The Newborn-20.wav', "01. I Don't Worry About a Thing-4.txt", 'Tool - Lateralis-5.txt', 'Debussy, Sonata for Violin & Piano - 1 Allegro vivo-2.wav', 'Debussy, Sonata for Violin & Piano - 1 Allegro vivo-2.txt']


In [7]:
answers = np.array([[1., 0., 1.], [1., 0., 0.], [0., 0., 0.], [1., 0., 1.], [1., 1., 0.]], dtype=float64)

In [8]:
features = []

for audio in audios:
    for frame in get_frames(audio, fs=0.2):
        features.append(get_features(frame))

print np.array(features).shape
print np.asmatrix(features)[:2]

(25, 59)
[[  2.26417229e-01   1.24546108e+01   3.58231723e-01   8.42445940e-02
    1.95207939e-01  -1.48568106e+00   2.36716824e-05   2.26124175e-05
    1.51999411e-05   1.59696028e-05   7.12649080e-06   1.62243778e-05
    3.67105531e-05   8.59241652e-07   1.45555703e-06   4.45174152e-07
    1.85010704e-05   8.36041636e-06   4.28339490e-07   6.91948117e-06
    2.24877858e-05   4.34578737e-07   2.29469651e-06   2.95310565e-06
    1.83786881e-07   1.47582011e-06   1.14042713e-07   3.43558924e-07
    3.99035230e-07   2.58517105e-07   1.40421236e-07   5.93999374e-08
    1.32796373e-07   1.64011212e-07   8.11032237e-08   1.13056643e-07
    7.49943680e-08   4.64492445e-08   5.44427543e-08   1.32837670e-07
    2.19250964e-07   2.38088347e-07   2.21603585e-07   1.41953393e-07
    1.73251735e-07   2.27876697e-07  -7.73353638e+02   9.54976654e+01
    2.44143009e+01  -1.29470272e+01   1.51408958e+01   1.04692383e+01
    1.01076031e+01   4.83669281e-01  -1.14575424e+01  -4.95921516e+00
   -7.76071

Single label

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import RandomizedSearchCV

rf_params = {'n_estimators': range(1, 50, 3), 'max_depth': range(1, 20, 1), 'min_samples_leaf': range(1, 10, 1)}

model = RandomForestClassifier(n_estimators=43, max_depth=12, min_samples_leaf=3, random_state=0)
optimizer = RandomizedSearchCV(estimator=model, param_distributions=rf_params, cv=5)
optimizer.fit(X_train, y)
preds = optimizer.predict(features)

print 'Best params: %s' % optimizer.best_params_
print preds

Best params: {'n_estimators': 22, 'max_depth': 15, 'min_samples_leaf': 1}
[ 1.  1.  1.  1.  1.  2.  2.  1.  2.  1.  1.  1.  1.  1.  1.  0.  2.  2.
  2.  2.  1.  0.  0.  2.  1.]


In [15]:
preds_by_comp = [preds[x:x+5] for x in [k * 5 for k in range(5)]]
preds_mean = [round(x.mean(), 0) for x in preds_by_comp]
preds_by_comp

[array([ 1.,  1.,  1.,  1.,  1.]),
 array([ 2.,  2.,  1.,  2.,  1.]),
 array([ 1.,  1.,  1.,  1.,  1.]),
 array([ 0.,  2.,  2.,  2.,  2.]),
 array([ 1.,  0.,  0.,  2.,  1.])]

Multilabel

In [11]:
answers = np.array([[1., 0., 1.],
                    [1., 0., 0.],
                    [0., 0., 0.],
                    [1., 0., 1.],
                    [1., 1., 0.]], dtype=float64)

In [12]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer

base = RandomForestClassifier(n_estimators=43, max_depth=12, min_samples_leaf=3, random_state=0)

clf = OneVsRestClassifier(base)
clf.fit(X_train, MultiLabelBinarizer().fit_transform([[yy] for yy in y]))

y_pred = clf.predict(features)

ypreds_by_comp = [y_pred[x:x+5] for x in [k * 5 for k in range(5)]]

res = []
for pred in ypreds_by_comp:
    res.append([x.mean() for x in pred.T])

for one, two in zip(answers, y_pred):
    print one, two

[ 1.  0.  1.] [0 0 0]
[ 1.  0.  0.] [0 0 0]
[ 0.  0.  0.] [1 0 0]
[ 1.  0.  1.] [0 0 1]
[ 1.  1.  0.] [0 0 0]


In [13]:
for one, two in zip(answers, res):
    print one, two

[ 1.  0.  1.] [0.20000000000000001, 0.0, 0.20000000000000001]
[ 1.  0.  0.] [0.0, 0.20000000000000001, 0.0]
[ 0.  0.  0.] [0.0, 0.20000000000000001, 0.59999999999999998]
[ 1.  0.  1.] [0.0, 0.59999999999999998, 0.59999999999999998]
[ 1.  1.  0.] [0.0, 0.80000000000000004, 0.20000000000000001]
