In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
import librosa
import numpy as np
import librosa.display
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from server.database_wrapper import PostgresqlWrapper
from server.utils import Util
from feature_extractor import FeatureExtractor
import xgboost as xgb

In [18]:
# Song downloading from database
db = PostgresqlWrapper(5)
data = db.fetch_songs(150, genres=['classical', 'metal', 'blues', 'hiphop', 'disco', 'pop', 'rock'])

2018-04-30 01:28:40,709 - server.database_wrapper - INFO - Creating pool
2018-04-30 01:28:40,709 - server.database_wrapper - INFO - Creating pool
2018-04-30 01:28:40,918 - server.database_wrapper - INFO - Done register types
2018-04-30 01:28:40,918 - server.database_wrapper - INFO - Done register types
2018-04-30 01:28:40,921 - server.database_wrapper - INFO - Creating table
2018-04-30 01:28:40,921 - server.database_wrapper - INFO - Creating table
2018-04-30 01:28:41,010 - server.database_wrapper - INFO - Start fetching 150 songs
2018-04-30 01:28:41,010 - server.database_wrapper - INFO - Start fetching 150 songs
2018-04-30 01:28:41,024 - server.database_wrapper - INFO - Statement Select * from music where genre = classical order by id limit 50 offset 0
2018-04-30 01:28:41,024 - server.database_wrapper - INFO - Statement Select * from music where genre = classical order by id limit 50 offset 02018-04-30 01:28:41,028 - server.database_wrapper - INFO - Statement Select * from music where 

2018-04-30 01:30:10,956 - server.database_wrapper - INFO - Statement Select * from music where genre = rock order by id limit 50 offset 50
2018-04-30 01:30:10,956 - server.database_wrapper - INFO - Statement Select * from music where genre = rock order by id limit 50 offset 50


In [19]:
# Create dataset
util = Util()
songs, genres = util.to_dataset(data)

Prepared 50 songs
Prepared 100 songs
Prepared 150 songs
Prepared 200 songs
Prepared 250 songs
Prepared 300 songs
Prepared 350 songs
Prepared 400 songs
Prepared 450 songs
Prepared 500 songs
Prepared 550 songs
Prepared 600 songs
Prepared 650 songs
Prepared 700 songs


In [20]:
# Extract features (mfcc means and zero_crossing_rate for now)
extractor = FeatureExtractor(songs)
mfcc_means = extractor.generate_mfcc(n_mfcc=20, sr=22050)

Got mfcc for 0 songs
Got mfcc for 50 songs
Got mfcc for 100 songs
Got mfcc for 150 songs
Got mfcc for 200 songs
Got mfcc for 250 songs
Got mfcc for 300 songs
Got mfcc for 350 songs
Got mfcc for 400 songs
Got mfcc for 450 songs
Got mfcc for 500 songs
Got mfcc for 550 songs
Got mfcc for 600 songs
Got mfcc for 650 songs


In [21]:
zcrs = extractor.generate_zero_crossing_rate(None, None)

Got zero_cross_rate for 0 songs
Got zero_cross_rate for 50 songs
Got zero_cross_rate for 100 songs
Got zero_cross_rate for 150 songs
Got zero_cross_rate for 200 songs
Got zero_cross_rate for 250 songs
Got zero_cross_rate for 300 songs
Got zero_cross_rate for 350 songs
Got zero_cross_rate for 400 songs
Got zero_cross_rate for 450 songs
Got zero_cross_rate for 500 songs
Got zero_cross_rate for 550 songs
Got zero_cross_rate for 600 songs
Got zero_cross_rate for 650 songs


In [22]:
X = np.hstack((mfcc_means, zcrs))

In [23]:
# Prepare data for xgboost. Encode genres. Train/test split
y = LabelEncoder().fit_transform(genres)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [24]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [25]:
# Xgboost training
param = {'objective': "multi:softmax", "num_class": np.unique(y_train).size}
evallist = [(dtrain, 'train'), (dtest, 'eval')]
bst = xgb.train(param, dtrain, evals=evallist)

[0]	train-merror:0.140816	eval-merror:0.433333
[1]	train-merror:0.077551	eval-merror:0.428571
[2]	train-merror:0.04898	eval-merror:0.414286
[3]	train-merror:0.034694	eval-merror:0.404762
[4]	train-merror:0.026531	eval-merror:0.404762
[5]	train-merror:0.016327	eval-merror:0.4
[6]	train-merror:0.008163	eval-merror:0.395238
[7]	train-merror:0.006122	eval-merror:0.380952
[8]	train-merror:0.002041	eval-merror:0.366667
[9]	train-merror:0.002041	eval-merror:0.37619


In [31]:
print(classification_report(y_test, bst.predict(dtest), target_names=['classical', 'metal', 'blues', 'hiphop', 'disco', 'pop', 'rock']))

             precision    recall  f1-score   support

  classical       0.60      0.50      0.55        30
      metal       0.87      0.87      0.87        30
      blues       0.45      0.47      0.46        30
     hiphop       0.61      0.63      0.62        30
      disco       0.80      0.80      0.80        30
        pop       0.73      0.73      0.73        30
       rock       0.33      0.37      0.35        30

avg / total       0.63      0.62      0.63       210



In [None]:
# Pay attention to rock. SICK!