In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import precision_score, classification_report
from sklearn import tree

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## Get data

In [2]:
# Time-domain features
root = np.load("./root_mean_square_2048_512.npy")
cross_rate = np.load("./zero_crossing_rate_2048_512.npy")
amplitude = np.load("./amplitude_envelope_2048_512.npy")

print(cross_rate.shape)
print(root.shape)
print(amplitude.shape)


# Frequency domain features
spec_bandwidth = np.load("./spectral_bandwidth_2048_512.npy")
spec_centroid = np.load("./spectral_centroid_2048_512.npy")
spec_flatness = np.load("./spectral_flatness_2048_512.npy")

print(spec_bandwidth.shape)
print(spec_centroid.shape)
print(spec_flatness.shape)

(7994, 1271)
(7994, 1271)
(7994, 1271)
(7994, 1271)
(7994, 1271)
(7994, 1271)


In [3]:
# Y data
music_data = pd.read_csv("tracks_genre_small.csv")

blacklist = {"fma_small/098/098565.mp3", "fma_small/098/098567.mp3", "fma_small/098/098569.mp3",
             "fma_small/099/099134.mp3", "fma_small/108/108925.mp3", "fma_small/133/133297.mp3"}

music_data = music_data.query("filepath not in @blacklist")

print(music_data.genre_top.value_counts())  # Everything is in balance


Pop              1000
Instrumental     1000
Folk             1000
International    1000
Experimental      999
Rock              999
Electronic        999
Hip-Hop           997
Name: genre_top, dtype: int64


## Stupid way. Just get mean values that characterise our genre

In [4]:
X = pd.DataFrame({"root_mean": root.mean(axis=1),
                  "cross_rate_mean": cross_rate.mean(axis=1),
                  "amplitude_mean": amplitude.mean(axis=1),
                  "spec_bandwidth_mean": spec_bandwidth.mean(axis=1),
                  "spec_centroid_mean": spec_centroid.mean(axis=1),
                  "spec_flatness_mean": spec_flatness.mean(axis=1),
                  
                  "root_min": root.min(axis=1),
                  "cross_rate_min": cross_rate.min(axis=1),
                  "amplitude_min": amplitude.min(axis=1),
                  "spec_bandwidth_min": spec_bandwidth.min(axis=1),
                  "spec_centroid_min": spec_centroid.min(axis=1),
                  "spec_flatness_min": spec_flatness.min(axis=1),
                  
                  "root_max": root.max(axis=1),
                  "cross_rate_max": cross_rate.max(axis=1),
                  "amplitude_max": amplitude.max(axis=1),
                  "spec_bandwidth_max": spec_bandwidth.max(axis=1),
                  "spec_centroid_max": spec_centroid.max(axis=1),
                  "spec_flatness_max": spec_flatness.max(axis=1),
                
                  "root_std": root.std(axis=1),
                  "cross_rate_std": cross_rate.std(axis=1),
                  "amplitude_std": amplitude.std(axis=1),
                  "spec_bandwidth_std": spec_bandwidth.std(axis=1),
                  "spec_centroid_std": spec_centroid.std(axis=1),
                  "spec_flatness_std": spec_flatness.std(axis=1),
                  })

y = music_data.genre_top


In [5]:
print(X.head(4))

   root_mean  cross_rate_mean  amplitude_mean  spec_bandwidth_mean  \
0   0.145564         0.162650        0.472331          2683.269690   
1   0.148352         0.100928        0.417225          2603.937132   
2   0.188408         0.148650        0.647179          2050.288798   
3   0.068198         0.043890        0.170641          2484.904356   

   spec_centroid_mean  spec_flatness_mean  root_min  cross_rate_min  \
0         3023.868820            0.058018  0.021373        0.029297   
1         2426.681473            0.026787  0.032495        0.009277   
2         2348.362119            0.022396  0.099909        0.070801   
3         1743.051804            0.009453  0.002950        0.007812   

   amplitude_min  spec_bandwidth_min  ...  amplitude_max  spec_bandwidth_max  \
0       0.067645         1565.565735  ...       0.944079         3733.309385   
1       0.108328         1374.243640  ...       0.617887         3958.035621   
2       0.294963         1389.244424  ...       1.006

In [6]:
print(y.head(4))

0    Hip-Hop
1    Hip-Hop
2        Pop
3       Folk
Name: genre_top, dtype: object


## Get train and test data

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [8]:
clf_tr = RandomForestClassifier()

params = {
    "n_estimators": range(60, 151, 5),
    "max_depth": range(3, 30, 5),
    "min_samples_leaf": range(1, 5),
    "min_samples_split": range(2, 10),
         }

grid_search_cv_clf = GridSearchCV(clf_tr, params, cv=3)


## Train model

In [9]:
# grid_search_cv_clf.fit(X_train, y_train)


GridSearchCV(cv=3, estimator=RandomForestClassifier(),
             param_grid={'max_depth': range(3, 30, 5),
                         'min_samples_leaf': range(1, 5),
                         'min_samples_split': range(2, 10),
                         'n_estimators': range(60, 151, 5)})

## What are the best params

In [10]:
best_estimator = grid_search_cv_clf.best_params_
print(best_estimator)

{'max_depth': 18, 'min_samples_leaf': 2, 'min_samples_split': 4, 'n_estimators': 100}


In [110]:
## After analysis
params = {'max_depth': 18, 'min_samples_leaf': 2, 'min_samples_split': 4, 'n_estimators': 100}
grid_search_cv_clf = GridSearchCV(clf_tr, params, cv=3)
grid_search_cv_clf.fit(X_train, y_train)

# Let's see, how bad is it :)

In [111]:
# Mean and std fitures 
y_pred = grid_search_cv_clf.predict(X_test)
train_score = grid_search_cv_clf.score(X_train, y_train)
test_score = grid_search_cv_clf.score(X_test, y_test)
clf_report = classification_report(y_true=y_test, y_pred=y_pred)

print(train_score, test_score)
print()
print(clf_report)

0.9940242763772176 0.458128078817734

               precision    recall  f1-score   support

   Electronic       0.44      0.47      0.45       327
 Experimental       0.42      0.28      0.34       317
         Folk       0.50      0.61      0.55       319
      Hip-Hop       0.52      0.57      0.55       359
 Instrumental       0.48      0.57      0.52       319
International       0.44      0.41      0.42       353
          Pop       0.30      0.24      0.26       319
         Rock       0.48      0.52      0.50       326

     accuracy                           0.46      2639
    macro avg       0.45      0.46      0.45      2639
 weighted avg       0.45      0.46      0.45      2639



# Let's try to predict BI_data

In [109]:
all_data = pd.read_csv("./IB_data_with_features.csv").drop(columns=["Unnamed: 0"])
all_data.genre.value_counts()

Electronica         39
Rock                34
Metal               32
rock                20
electro             18
                    ..
Funk_Rock            1
Heavy_metal          1
indi_pop             1
Pop rock             1
Alternative_Rock     1
Name: genre, Length: 62, dtype: int64

In [84]:
def fix_genre(name):
    if "roc" in name.lower():
        return "Rock"
    elif "class" in name.lower():
        return "Classic"
    elif "elect" in name.lower():
        return "Electronic"
    elif "hip" in name.lower():
        return "Hip-Hop"
    elif "folk" in name.lower():
        return "Folk"
    else:
        return name.title()

In [85]:
all_data.genre = all_data.genre.apply(fix_genre)

In [86]:
X_data = all_data.drop(columns=["name", "genre"])
y_data = all_data.genre

## Try to predict genre from BI_data

In [87]:
y_BI_pred = grid_search_cv_clf.predict(X_data)


## Make it more readable

In [88]:
result_data = pd.DataFrame({
    "music": all_data.name.str[:20],
    "actual_genre": y_data,
    "predicted_genre": y_BI_pred
})

result_data.to_csv("Result_of_forest_classification.csv", encoding="UTF-8")
print(result_data)

                    music actual_genre predicted_genre
0    7Horse_-_Answer_the_         Rock   International
1    A_Simple_Life-Brian_      Classic    Instrumental
2    Adagio_in_G_Minor-Al      Classic    Instrumental
3    Age_Atomic-Solar_Bea   Electronic    Instrumental
4    Agnus_Dei-Enigma.mp3      New_Age    Experimental
..                    ...          ...             ...
320  Электрослабость-Тере         Rock   International
321  Я_говорю_тебе_да-Зоя     Art_Song    Instrumental
322  Я_не_один-Эм_Калинин        Indie    Experimental
323  Я_так_соскучился-Пор         Rock            Rock
324  Я_убью_себя-Слава_КП      Hip-Hop            Rock

[325 rows x 3 columns]


In [89]:
result_data.actual_genre.unique()


array(['Rock', 'Classic', 'Electronic', 'New_Age', 'Rap', 'Hip-Hop',
       'Jazz', 'Symphonic_Metal', 'Britpop', 'Metal', 'Country', 'Disco',
       'Pop', 'Folk', 'Alternative', 'Heavy_Metal', 'Indie', 'Indi_Pop',
       'Instrumental', 'Synth-Punk', 'J-Pop', 'Indie_Pop',
       'Alternative Metal', 'Blues', 'Alternative_Metal', 'Orchestral',
       'Ethnic', 'Trap', 'Art_Song', 'Audiobook', 'Dance', 'Romance'],
      dtype=object)

In [90]:
result_data.actual_genre = result_data.actual_genre.apply(fix_genre)


In [91]:
result_data.predicted_genre.value_counts()

Rock             83
International    80
Instrumental     59
Experimental     46
Electronic       29
Pop              16
Folk              9
Hip-Hop           3
Name: predicted_genre, dtype: int64

In [92]:
result_data.actual_genre.value_counts()

Rock                 80
Electronic           69
Metal                32
Hip-Hop              24
Symphonic_Metal      18
Pop                  18
Folk                 15
Indie                13
Classic               8
New_Age               8
Jazz                  5
Instrumental          5
Orchestral            4
Country               3
Art_Song              3
Britpop               2
Indie_Pop             2
Ethnic                2
Indi_Pop              1
Synth-Punk            1
J-Pop                 1
Alternative           1
Audiobook             1
Blues                 1
Romance               1
Trap                  1
Disco                 1
Alternative Metal     1
Dance                 1
Heavy_Metal           1
Alternative_Metal     1
Rap                   1
Name: actual_genre, dtype: int64

In [94]:
print(grid_search_cv_clf.score(X_data, y_BI_pred))
print(grid_search_cv_clf.score(X_data, y_data))
print()
print(classification_report(y_true=result_data.actual_genre, 
                            y_pred=result_data.predicted_genre))

1.0
0.13538461538461538

                   precision    recall  f1-score   support

      Alternative       0.00      0.00      0.00         1
Alternative Metal       0.00      0.00      0.00         1
Alternative_Metal       0.00      0.00      0.00         1
         Art_Song       0.00      0.00      0.00         3
        Audiobook       0.00      0.00      0.00         1
            Blues       0.00      0.00      0.00         1
          Britpop       0.00      0.00      0.00         2
          Classic       0.00      0.00      0.00         8
          Country       0.00      0.00      0.00         3
            Dance       0.00      0.00      0.00         1
            Disco       0.00      0.00      0.00         1
       Electronic       0.62      0.26      0.37        69
           Ethnic       0.00      0.00      0.00         2
     Experimental       0.00      0.00      0.00         0
             Folk       0.00      0.00      0.00        15
      Heavy_Metal       0.00  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Let's try to predict not the whole BI_data

In [95]:
needed_genre = ["Rock", "International", "Instrumental", "Experimental", "Electronic", "Pop", "Folk", "Hip-Hop"]

In [96]:
short_data = all_data.query("genre in @needed_genre")
print(short_data.shape)


(211, 26)


In [97]:
X_short_data = short_data.drop(columns=["name", "genre"])
y_short_data = short_data.genre

## Try to predict short data

In [98]:
y_BI_short_pred = grid_search_cv_clf.predict(X_short_data)


In [99]:
print(grid_search_cv_clf.score(X_short_data, y_BI_short_pred))
print(grid_search_cv_clf.score(X_short_data, y_short_data))
print()
print(classification_report(y_true=y_short_data, 
                            y_pred=y_BI_short_pred))

1.0
0.20853080568720378

               precision    recall  f1-score   support

   Electronic       0.69      0.26      0.38        69
 Experimental       0.00      0.00      0.00         0
         Folk       0.00      0.00      0.00        15
      Hip-Hop       0.50      0.04      0.08        24
 Instrumental       0.08      0.60      0.14         5
International       0.00      0.00      0.00         0
          Pop       0.08      0.06      0.06        18
         Rock       0.53      0.26      0.35        80

     accuracy                           0.21       211
    macro avg       0.23      0.15      0.13       211
 weighted avg       0.49      0.21      0.27       211



  _warn_prf(average, modifier, msg_start, len(result))


In [100]:
result_short_data = pd.DataFrame({
    "music": short_data.name.str[:20],
    "actual_genre": y_short_data,
    "predicted_genre": y_BI_short_pred
})

# result_data.to_csv("Result_of_forest_classification.csv", encoding="UTF-8")
print(result_data)

                    music actual_genre predicted_genre
0    7Horse_-_Answer_the_         Rock   International
1    A_Simple_Life-Brian_      Classic    Instrumental
2    Adagio_in_G_Minor-Al      Classic    Instrumental
3    Age_Atomic-Solar_Bea   Electronic    Instrumental
4    Agnus_Dei-Enigma.mp3      New_Age    Experimental
..                    ...          ...             ...
320  Электрослабость-Тере         Rock   International
321  Я_говорю_тебе_да-Зоя     Art_Song    Instrumental
322  Я_не_один-Эм_Калинин        Indie    Experimental
323  Я_так_соскучился-Пор         Rock            Rock
324  Я_убью_себя-Слава_КП      Hip-Hop            Rock

[325 rows x 3 columns]
