In [14]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
import librosa
import numpy as np
import librosa.display
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
from server.database_wrapper import PostgresqlWrapper
from server.utils import Util
from server.feature_extractor import FeatureAggregator
import xgboost as xgb
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import StratifiedKFold
from numpy import binary_repr

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Импортим данные

In [15]:
data = pd.DataFrame.from_csv("extracted_data_new.csv", encoding="utf-8").values
X = np.array(data[:, :-1], dtype = float)
genre_list = data[:, -1]

## Определяем модель

In [16]:
rfc = RandomForestClassifier()
svc = SVC()
skf = StratifiedKFold(n_splits=3)

param_grid = { 
    'n_estimators': [ 70, 150, 370],
    'max_features': ['log2'],
    'max_depth' : [10,15, 20],
    'criterion' :['gini']
}

grid_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=skf, n_jobs = -1, verbose=True)
grid_svc = GridSearchCV(svc, {"C": np.logspace(0,2,num=20), "kernel": ["poly", "rbf", "sigmoid"]}, n_jobs = -1, verbose=True, cv = skf)
eclf = VotingClassifier(estimators=[('svc', grid_svc), ('rfc', grid_rfc)], voting='hard')

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

 ## Модель из 6 жанров

In [42]:
mask6 = (genre_list != 'country') & (genre_list != 'blues') &\
        (genre_list != 'disco') & (genre_list != 'reggae')

In [48]:
X6 = X_scaled[mask6]
encoder6 = LabelEncoder()
y6 = encoder6.fit_transform(genre_list[mask6])

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X6, y6, test_size=0.2, random_state=5, stratify=y6)
eclf.fit(X_train, y_train)
target = encoder6.inverse_transform(np.arange(6))
print(classification_report(y_test, eclf.predict(X_test), target_names=target))

Fitting 3 folds for each of 60 candidates, totalling 180 fits
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    1.5s finished
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    5.4s finished


             precision    recall  f1-score   support

  classical       0.95      1.00      0.98        20
     hiphop       0.74      0.85      0.79        20
       jazz       1.00      0.95      0.97        20
      metal       0.90      0.90      0.90        20
        pop       0.70      0.80      0.74        20
       rock       0.86      0.60      0.71        20

avg / total       0.86      0.85      0.85       120



  if diff:
  if diff:


## Модель из 7 жанров

In [50]:
mask7 = (genre_list != 'country') & (genre_list != 'blues') &\
        (genre_list != 'disco')

In [51]:
X7 = X_scaled[mask7]
encoder7 = LabelEncoder()
y7 = encoder7.fit_transform(genre_list[mask7])

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X7, y7, test_size=0.2, random_state=5, stratify=y7)
eclf.fit(X_train, y_train)
target = encoder7.inverse_transform(np.arange(7))
print(classification_report(y_test, eclf.predict(X_test), target_names=target))

Fitting 3 folds for each of 60 candidates, totalling 180 fits
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    2.0s finished
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    5.8s finished


             precision    recall  f1-score   support

  classical       0.83      1.00      0.91        20
     hiphop       0.76      0.80      0.78        20
       jazz       1.00      0.90      0.95        20
      metal       0.78      0.90      0.84        20
        pop       0.89      0.80      0.84        20
     reggae       0.73      0.80      0.76        20
       rock       0.86      0.60      0.71        20

avg / total       0.84      0.83      0.83       140



  if diff:
  if diff:


## Модель из 8 жанров

In [53]:
mask8 = (genre_list != 'disco') & (genre_list != 'reggae')

In [54]:
X8 = X_scaled[mask8]
encoder8 = LabelEncoder()
y8 = encoder8.fit_transform(genre_list[mask8])

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X8, y8, test_size=0.2, random_state=5, stratify=y8)
eclf.fit(X_train, y_train)
target = encoder8.inverse_transform(np.arange(8))
print(classification_report(y_test, eclf.predict(X_test), target_names=target))

Fitting 3 folds for each of 60 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    2.6s finished


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    7.0s finished


             precision    recall  f1-score   support

      blues       0.67      0.80      0.73        20
  classical       0.95      0.90      0.92        20
    country       0.62      0.75      0.68        20
     hiphop       0.73      0.80      0.76        20
       jazz       0.88      0.70      0.78        20
      metal       0.76      0.95      0.84        20
        pop       0.68      0.75      0.71        20
       rock       0.75      0.30      0.43        20

avg / total       0.75      0.74      0.73       160



  if diff:
  if diff:


## Модель из 9 жанров

In [56]:
mask9 = (genre_list != 'disco')

In [57]:
X9 = X_scaled[mask9]
encoder9 = LabelEncoder()
y9 = encoder9.fit_transform(genre_list[mask9])

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X9, y9, test_size=0.2, random_state=5, stratify=y9)
eclf.fit(X_train, y_train)
target = encoder9.inverse_transform(np.arange(9))
print(classification_report(y_test, eclf.predict(X_test), target_names=target))

Fitting 3 folds for each of 60 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    2.5s


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    3.1s finished
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    6.2s finished


             precision    recall  f1-score   support

      blues       0.73      0.80      0.76        20
  classical       0.90      0.90      0.90        20
    country       0.54      0.75      0.63        20
     hiphop       0.61      0.70      0.65        20
       jazz       0.89      0.80      0.84        20
      metal       0.70      0.95      0.81        20
        pop       0.65      0.65      0.65        20
     reggae       0.78      0.70      0.74        20
       rock       0.75      0.15      0.25        20

avg / total       0.73      0.71      0.69       180



  if diff:
  if diff:


## Модель из 10 жанров

In [59]:
encoder10 = LabelEncoder()
y10 = encoder10.fit_transform(genre_list)

In [60]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y10, test_size=0.2, random_state=5, stratify=y10)
eclf.fit(X_train, y_train)
target = encoder10.inverse_transform(np.arange(10))
print(classification_report(y_test, eclf.predict(X_test), target_names=target))

Fitting 3 folds for each of 60 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    3.8s finished


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    6.9s finished


             precision    recall  f1-score   support

      blues       0.76      0.80      0.78        20
  classical       0.82      0.90      0.86        20
    country       0.61      0.85      0.71        20
      disco       0.37      0.50      0.43        20
     hiphop       0.86      0.60      0.71        20
       jazz       0.84      0.80      0.82        20
      metal       0.89      0.85      0.87        20
        pop       0.71      0.75      0.73        20
     reggae       0.68      0.65      0.67        20
       rock       0.50      0.25      0.33        20

avg / total       0.71      0.69      0.69       200



  if diff:
  if diff:


## Средние параметры моделей

In [313]:
model_6

Unnamed: 0_level_0,precision,recall,f1-score,support
score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
classical,0.873,0.94,0.906,30.0
metal,0.783,0.896,0.834,30.0
hiphop,0.741,0.838,0.786,30.0
pop,0.799,0.79,0.791,30.0
rock,0.765,0.614,0.678,30.0
jazz,0.928,0.774,0.843,30.0
avg / total,0.814,0.808,0.806,180.0


In [314]:
model_7

Unnamed: 0_level_0,precision,recall,f1-score,support
score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
classical,0.88,0.933,0.905,30.0
metal,0.783,0.877,0.826,30.0
hiphop,0.7,0.791,0.742,30.0
pop,0.748,0.799,0.771,30.0
rock,0.691,0.608,0.643,30.0
reggae,0.762,0.637,0.692,30.0
jazz,0.902,0.785,0.836,30.0
avg / total,0.781,0.778,0.771,210.0


In [315]:
model_8

Unnamed: 0_level_0,precision,recall,f1-score,support
score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
classical,0.878,0.934,0.905,30.0
metal,0.74,0.883,0.804,30.0
blues,0.656,0.813,0.725,30.0
hiphop,0.756,0.81,0.779,30.0
pop,0.758,0.779,0.765,30.0
rock,0.555,0.433,0.485,30.0
country,0.769,0.541,0.628,30.0
jazz,0.881,0.727,0.791,30.0
avg / total,0.748,0.739,0.736,240.0


In [316]:
model_9

Unnamed: 0_level_0,precision,recall,f1-score,support
score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
classical,0.865,0.947,0.902,30.0
metal,0.737,0.859,0.791,30.0
blues,0.657,0.807,0.722,30.0
hiphop,0.684,0.754,0.712,30.0
pop,0.732,0.797,0.761,30.0
rock,0.525,0.445,0.482,30.0
country,0.727,0.547,0.621,30.0
reggae,0.71,0.6,0.65,30.0
jazz,0.895,0.702,0.784,30.0
avg / total,0.724,0.719,0.713,270.0


In [317]:
model_10

Unnamed: 0_level_0,precision,recall,f1-score,support
score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
classical,0.86,0.936,0.895,30.0
metal,0.708,0.877,0.783,30.0
blues,0.715,0.784,0.743,30.0
hiphop,0.613,0.694,0.649,30.0
disco,0.523,0.631,0.569,30.0
pop,0.706,0.749,0.725,30.0
rock,0.45,0.33,0.377,30.0
country,0.714,0.615,0.658,30.0
reggae,0.725,0.546,0.619,30.0
jazz,0.91,0.699,0.789,30.0
