# Initial experiments with FMA

In [169]:
## Imports
import numpy as np
import matplotlib.pyplot as plt
import utils
import pandas as pd
import sklearn as skl
import sklearn.utils
import sklearn.preprocessing
import sklearn.decomposition
import sklearn.svm
import sklearn.tree
import sklearn.ensemble
import sklearn.neural_network
import sklearn.naive_bayes
import sklearn.discriminant_analysis
import sklearn.neighbors

In [22]:
## Load data
tracks = utils.load("fma_metadata/tracks.csv")
features = utils.load("fma_metadata/features.csv")
genres = utils.load("fma_metadata/genres.csv")

## Exploration of genre data

Genres seem to be hierarchical, we will need some kind of method of encoding it in a single number/string

In [36]:
tracks["track"].head()

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2,256000,0,,2008-11-26 01:48:12,2008-11-26,168,2,Hip-Hop,[21],[21],,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,256000,0,,2008-11-26 01:48:14,2008-11-26,237,1,Hip-Hop,[21],[21],,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
5,256000,0,,2008-11-26 01:48:20,2008-11-26,206,6,Hip-Hop,[21],[21],,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,192000,0,Kurt Vile,2008-11-25 17:49:06,2008-11-26,161,178,Pop,[10],[10],,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
20,256000,0,,2008-11-26 01:48:56,2008-01-01,311,0,,"[76, 103]","[17, 10, 76, 103]",,978,en,Attribution-NonCommercial-NoDerivatives (aka M...,361,,3,,[],Spiritual Level


In [38]:
tracks["track"][["genre_top", "genres", "genres_all"]]

Unnamed: 0_level_0,genre_top,genres,genres_all
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,Hip-Hop,[21],[21]
3,Hip-Hop,[21],[21]
5,Hip-Hop,[21],[21]
10,Pop,[10],[10]
20,,"[76, 103]","[17, 10, 76, 103]"
...,...,...,...
155316,Rock,[25],"[25, 12]"
155317,Rock,[25],"[25, 12]"
155318,Rock,[25],"[25, 12]"
155319,Rock,[25],"[25, 12]"


## Selection of subset

In [50]:
tracks['set', 'subset']

track_id
2          small
3         medium
5          small
10         small
20         large
           ...  
155316     large
155317     large
155318     large
155319     large
155320     large
Name: (set, subset), Length: 106574, dtype: object

In [64]:
small = tracks[tracks['set', 'subset'] == 'small']
small["set"]

Unnamed: 0_level_0,split,subset
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2,training,small
5,training,small
10,training,small
140,training,small
141,training,small
...,...,...
154308,test,small
154309,test,small
154413,training,small
154414,training,small


In [67]:
medium = tracks[tracks['set', 'subset'] == 'medium']
medium["set"]

Unnamed: 0_level_0,split,subset
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1
3,training,medium
134,training,medium
136,training,medium
139,training,medium
181,test,medium
...,...,...
155297,training,medium
155298,training,medium
155306,training,medium
155307,training,medium


In [70]:
small_indexes = tracks['set', 'subset'] == 'small'
print(small)

track_id
2          True
3         False
5          True
10         True
20        False
          ...  
155316    False
155317    False
155318    False
155319    False
155320    False
Name: (set, subset), Length: 106574, dtype: bool


## Complete selection of training, validation and test data

In [103]:
small_indexes = tracks['set', 'subset'] == 'small'
train_indexes = tracks['set', 'split'] == 'training'
val_indexes   = tracks['set', 'split'] == 'validation'
test_indexes  = tracks['set', 'split'] == 'test'

In [104]:
# Can we really use genres_top? It seems to sometimes be nan
y_train = tracks.loc[small_indexes & train_indexes, ('track', 'genre_top')]
y_val = tracks.loc[small_indexes & val_indexes, ('track', 'genre_top')]
y_test = tracks.loc[small_indexes & test_indexes, ('track', 'genre_top')]

In [105]:
print(y_train.unique())
print(y_train.shape)

print(y_val.unique())
print(y_val.shape)

print(y_test.unique())
print(y_test.shape)

['Hip-Hop' 'Pop' 'Folk' 'Rock' 'Experimental' 'International' 'Electronic'
 'Instrumental']
(6400,)
['Experimental' 'Rock' 'Folk' 'Pop' 'International' 'Electronic' 'Hip-Hop'
 'Instrumental']
(800,)
['Rock' 'International' 'Folk' 'Experimental' 'Instrumental' 'Pop'
 'Hip-Hop' 'Electronic']
(800,)


In [133]:
## Indexing of the features themselves
X_train = features.loc[small_indexes & train_indexes]['mfcc']
X_val = features.loc[small_indexes & val_indexes]['mfcc']
X_test = features.loc[small_indexes & test_indexes]['mfcc']

In [132]:
X_train

statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,std,std,std,std,std,std,std
number,01,02,03,04,05,06,07,08,09,10,...,11,12,13,14,15,16,17,18,19,20
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,3.856789,1.541901,0.000816,0.330728,0.118731,-0.342687,-0.259252,0.146735,0.410656,-0.162872,...,10.059609,8.601942,9.284250,9.245516,8.520863,8.560472,7.651871,7.246555,7.077188,7.391859
5,2.624517,2.415293,0.440233,-0.782131,-0.771069,-0.724216,0.090260,0.152119,0.261731,-0.608905,...,8.863638,9.581952,8.895723,8.141456,8.201844,7.780963,7.132692,7.539753,8.452527,7.334442
10,5.076893,1.161854,2.095651,1.372743,-0.203574,-0.345354,-0.529139,0.561974,0.281350,-0.150672,...,8.289734,7.985110,7.075400,6.972649,7.071393,7.270959,7.051070,6.928591,6.430473,6.186294
140,0.384906,1.203378,0.429393,0.639471,-0.080144,-0.440946,-0.432567,-0.126163,0.507695,0.408915,...,9.883532,8.250456,8.085891,8.114858,8.832722,8.157245,7.297486,7.734456,7.807103,7.986255
141,-0.038922,4.771146,-0.378949,-0.309047,-0.661308,-0.531602,0.029186,0.662524,-0.084562,0.965640,...,9.464321,8.119837,8.918522,8.059117,9.339061,8.451561,8.426965,8.341757,8.572934,9.351485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153955,-0.094801,0.671608,0.272434,0.206937,-0.028461,0.412274,-0.185880,0.579378,-0.116734,0.426431,...,9.264652,7.839735,8.126788,7.582359,8.076923,7.397376,7.137416,6.690812,6.939083,6.786241
153956,2.000893,3.728883,1.277116,2.308511,0.184435,0.112066,0.152872,0.180372,0.020499,-0.061386,...,8.954394,7.874153,9.674955,6.872311,7.636909,7.255323,7.721473,7.024552,7.117678,7.499773
154413,1.811922,1.701329,0.074595,1.446981,-0.049799,0.446221,-0.056744,1.145449,-0.152812,-0.095076,...,10.746084,8.901895,8.683184,8.785449,9.296292,9.567595,8.954231,9.067839,8.833537,8.499075
154414,0.823031,1.711429,0.904345,0.756484,-0.112708,0.160333,-0.107313,-0.201079,-0.288760,-0.048646,...,10.293734,9.340322,9.545247,9.065882,8.945354,8.646461,8.212511,9.037906,8.509675,7.942321


## Testing some common classifiers

In [137]:
# Transfer labels from strings to numbers
enc = skl.preprocessing.LabelEncoder()
y_train_encoded = enc.fit_transform(y_train)
y_val_encoded = enc.transform(y_val)
y_test_encoded = enc.transform(y_test)

In [138]:
y_train_encoded

array([3, 3, 6, ..., 6, 6, 3])

In [150]:
# Standardize features
scaler = skl.preprocessing.StandardScaler(copy=False)
scaler.fit_transform(X_train)
scaler.transform(X_val)
scaler.transform(X_test)

array([[ 2.02407870e+00,  7.28341156e-01,  2.04241274e+00, ...,
        -5.21931263e-01, -8.24199676e-01, -7.80570294e-01],
       [ 1.40072301e-02, -3.89110770e-01, -2.80515927e-01, ...,
         2.49215243e-01,  1.43903422e-01,  7.42925519e-01],
       [-3.27751541e-02, -4.64044606e-01,  1.57717431e-01, ...,
         4.74054870e-02, -6.77014218e-03, -1.44250996e-01],
       ...,
       [-3.04122154e-01, -4.97753390e-01, -8.27489341e-06, ...,
         7.09121936e-01,  2.35810789e+00,  1.56903540e+00],
       [-2.59806394e-01, -2.07064389e-01,  1.37721511e-01, ...,
        -4.98516891e-01,  1.47795198e-01,  2.76447211e-01],
       [-2.95374053e-01, -7.25148255e-01,  3.32040621e-01, ...,
         4.31275266e-02, -1.65205994e-01,  8.37615946e-01]])

In [194]:
# Using an SVM
X_train, y_train_encoded = skl.utils.shuffle(X_train, y_train_encoded, random_state=42)
#clf = skl.neighbors.KNeighborsClassifier(n_neighbors=200)
#clf = skl.svm.SVC()
#clf = skl.tree.DecisionTreeClassifier(max_depth=5)
#clf = skl.ensemble.AdaBoostClassifier(n_estimators=10)
#clf = skl.ensemble.RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
#clf = skl.naive_bayes.GaussianNB()
#clf = skl.discriminant_analysis.QuadraticDiscriminantAnalysis()
clf = skl.neural_network.MLPClassifier(hidden_layer_sizes=(200,50), max_iter=2000)
clf.fit(X_train, y_train_encoded)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(200, 50), learning_rate='constant',
       learning_rate_init=0.001, max_iter=2000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [195]:
print(clf.score(X_train, y_train_encoded))
print(clf.score(X_val, y_val_encoded))
print(clf.score(X_test, y_test_encoded))



0.99984375
0.125
0.4025
