In [1]:
%matplotlib inline

import os

import IPython.display as ipd

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,precision_recall_curve
from sklearn.metrics import precision_recall_fscore_support,f1_score,fbeta_score
from sklearn.metrics import make_scorer
from sklearn.metrics import classification_report
from sklearn.svm import SVC


import librosa
import librosa.display
import ast

import utils

plt.rcParams['figure.figsize'] = (17, 5)

In [2]:
# Directory where mp3 are stored.
AUDIO_DIR = os.environ.get('AUDIO_DIR')

# Load metadata and features.
tracks = utils.load('data/fma_metadata/smalltracks.csv')
genres = utils.load('data/fma_metadata/smallgenres.csv')
features = utils.load('data/fma_metadata/smallfeatures.csv')
echonest = utils.load('data/fma_metadata/smallechonest.csv')

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

tracks.shape, genres.shape, features.shape, echonest.shape

((8000, 53), (8, 4), (8000, 518), (1294, 249))

In [3]:
tracks['track','genre_top'].value_counts()

Rock             1000
Pop              1000
International    1000
Instrumental     1000
Hip-Hop          1000
Folk             1000
Experimental     1000
Electronic       1000
Name: (track, genre_top), dtype: int64

In [4]:
filename = 'data/fma_small/000/000010.mp3'
x, sr = librosa.load(filename, sr=None, mono=True)
start, end = 7, 17
ipd.Audio(data=x[start*sr:end*sr], rate=sr)



In [5]:

train = tracks['set', 'split'] == 'training'
val = tracks['set', 'split'] == 'validation'
test = tracks['set', 'split'] == 'test'

y_train = tracks.loc[train, ('track', 'genre_top')]
y_test = tracks.loc[test, ('track', 'genre_top')]
X_train = features.loc[train, 'mfcc']
X_test = features.loc[test, 'mfcc']
X_val = features.loc[val, 'mfcc']
y_val = tracks.loc[val, ('track', 'genre_top')]


print('{} training examples, {} testing examples'.format(y_train.size, y_test.size))
print('{} features, {} classes'.format(X_train.shape[1], np.unique(y_train).size))

6400 training examples, 800 testing examples
140 features, 8 classes


# First Model

In [6]:

# Support vector classification.
clf = SVC()
clf.fit(X_train, y_train)
trainscore = clf.score(X_train, y_train)
testscore = clf.score(X_test, y_test)
print('Test Accuracy: {:.2%}'.format(testscore))
print('Train Accuracy: {:.2%}'.format(trainscore))

Test Accuracy: 40.50%
Train Accuracy: 49.12%


In [74]:
svc_pipeline = Pipeline([('ss',StandardScaler()),
                    ('svc',SVC())])

In [75]:
params = {'svc__C':[0.001,0.01,0.1,1,2.5,10,50,100],'svc__gamma':[0.001,0.01,0.1,1,'auto','scale'],
            'svc__kernel':['linear', 'poly', 'rbf', 'sigmoid']}

In [76]:
svc_cv = GridSearchCV(estimator=svc_pipeline,param_grid=params,cv=5)

In [43]:
svc_cv.fit(X_val,y_val)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('ss', StandardScaler()),
                                       ('svc', SVC())]),
             param_grid={'svc__C': [0.001, 0.01, 0.1, 1, 2.5, 5, 10, 50, 100],
                         'svc__gamma': [0.001, 0.01, 0.1, 1, 'auto', 'scale'],
                         'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid']})

In [44]:
svc_best = svc_cv.best_estimator_

In [45]:
svc_cv.best_params_

{'svc__C': 2.5, 'svc__gamma': 'auto', 'svc__kernel': 'rbf'}

In [46]:
svc_best.score(X_train,y_train),svc_best.score(X_test,y_test),svc_best.score(X_val,y_val)

(0.39234375, 0.385, 0.96875)

In [47]:
svc_preds = svc_best.predict(X_test)

In [None]:
svc_cv.fit(X_train,y_train)

In [20]:

# Standardize features by removing the mean and scaling to unit variance.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled =scaler.transform(X_test)
X_val_scaled =scaler.transform(X_val)

In [21]:
svc_pipeline = Pipeline([
                    ('svc',SVC())])

In [22]:
svc_cv_2 = GridSearchCV(estimator=svc_pipeline,param_grid=params,cv=5)

In [28]:
svc_cv_2.fit(X_val_scaled,y_val)

GridSearchCV(cv=5, estimator=Pipeline(steps=[('svc', SVC())]),
             param_grid={'svc__C': [0.001, 0.01, 0.1, 1],
                         'svc__gamma': [0.001, 0.01, 0.1, 1, 'auto', 'scale'],
                         'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid']})

In [29]:
svc_best_2 = svc_cv_2.best_estimator_

In [30]:
svc_cv_2.best_params_

{'svc__C': 1, 'svc__gamma': 'scale', 'svc__kernel': 'rbf'}

In [32]:
svc_best_2.score(X_train_scaled,y_train),svc_best_2.score(X_test_scaled,y_test),svc_best_2.score(X_val_scaled,y_val)

(0.40109375, 0.40875, 0.8725)

In [24]:

# Standardize features by removing the mean and scaling to unit variance.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled =scaler.transform(X_test)

In [25]:
svc_best.score(X_train_scaled,y_train),svc_best.score(X_test_scaled,y_test),svc_best.score(X_val,y_val)

(0.1653125, 0.16, 0.76625)

In [None]:
X_train, y_train = skl.utils.shuffle(X_train, y_train)

# Support vector classification.
clf = skl.svm.SVC(C=0.1, gamma=1, kernel='poly')
clf.fit(X_train, y_train)
trainscore = clf.score(X_train, y_train)
testscore = clf.score(X_test, y_test)
print('Test Accuracy: {:.2%}'.format(testscore))
print('Train Accuracy: {:.2%}'.format(trainscore))

In [326]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [327]:

genre_list = tracks['track', 'genre_top']
encoder = LabelEncoder()
tracks['track', 'genre_top'] = encoder.fit_transform(genre_list)


TypeError: Encoders require their input to be uniformly strings or numbers. Got ['float', 'str']

In [328]:
y

array([3, 3, 6, ..., 6, 6, 3])

In [329]:
# scaler = StandardScaler()
# X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [353]:
smalldf  = tracks.loc[tracks['subset']=='small']

In [354]:
encoder = LabelEncoder()

smalldf['labels'] = encoder.fit_transform(smalldf['track', 'genre_top'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [355]:
smalldf['labels']

track_id
2         3
5         3
10        6
140       2
141       2
         ..
154308    3
154309    3
154413    6
154414    6
155066    3
Name: labels, Length: 8000, dtype: int64

In [356]:
y_train = smalldf.loc[train, ('labels')]
y_test = smalldf.loc[test, ('labels')]
y_val = y_test = smalldf.loc[val, ('labels')]


In [357]:
y_train.value_counts()

7    800
3    800
6    800
2    800
5    800
1    800
4    800
0    800
Name: labels, dtype: int64

In [358]:
y_test.value_counts()

7    100
6    100
5    100
4    100
3    100
2    100
1    100
0    100
Name: labels, dtype: int64

In [368]:
train = smalldf['set', 'split'] == 'training'
val = smalldf['set', 'split'] == 'validation'
test = smalldf['set', 'split'] == 'test'


X_train = features.loc[small & train, 'mfcc']
X_test = features.loc[small & test, 'mfcc']
X_val = features.loc[small & val, 'mfcc']
print('{} training examples, {} testing examples'.format(y_train.size, y_test.size))
print('{} features, {} classes'.format(X_train.shape[1], np.unique(y_train).size))

6400 training examples, 800 testing examples
140 features, 8 classes


In [369]:
X_train, y_train = skl.utils.shuffle(X_train, y_train, random_state=42)

# Standardize features by removing the mean and scaling to unit variance.
scaler = skl.preprocessing.StandardScaler(copy=False)
scaler.fit_transform(X_train)
scaler.transform(X_test)

array([[ 2.02407870e+00,  7.28341156e-01,  2.04241274e+00, ...,
        -5.21931263e-01, -8.24199676e-01, -7.80570294e-01],
       [ 1.40072301e-02, -3.89110770e-01, -2.80515927e-01, ...,
         2.49215243e-01,  1.43903422e-01,  7.42925519e-01],
       [-3.27751541e-02, -4.64044606e-01,  1.57717431e-01, ...,
         4.74054870e-02, -6.77014218e-03, -1.44250996e-01],
       ...,
       [-3.04122154e-01, -4.97753390e-01, -8.27489341e-06, ...,
         7.09121936e-01,  2.35810789e+00,  1.56903540e+00],
       [-2.59806394e-01, -2.07064389e-01,  1.37721511e-01, ...,
        -4.98516891e-01,  1.47795198e-01,  2.76447211e-01],
       [-2.95374053e-01, -7.25148255e-01,  3.32040621e-01, ...,
         4.31275266e-02, -1.65205994e-01,  8.37615946e-01]])

In [370]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(140, activation='relu', input_shape=(X_train.shape[1],)))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(8, activation='softmax'))



In [371]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [372]:
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [373]:
test_loss, test_acc = model.evaluate(X_test,y_test)

print('test_acc: ',test_acc)

test_acc:  0.13625


### 5.2 From audio