In [23]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [14]:
# load data
df = pd.read_csv('/content/features_3_sec.csv')
df.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


In [15]:
# drop filename column
df = df.dropna()
df = df.drop(['filename'],axis=1)

In [16]:
# check if dataset is balanced
df['label'].value_counts()

blues        1000
jazz         1000
disco         999
classical     998
hiphop        998
country       997
metal         616
Name: label, dtype: int64

In [17]:
# split data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.drop('label', axis=1), df['label'], test_size=0.2, random_state=42)

# scale data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [18]:
# train model
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=200)
model.fit(X_train, y_train)

In [19]:
# evaluate model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
y_pred = model.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, y_pred))
print('Confusion Matrix: ', confusion_matrix(y_test, y_pred))
print('Classification Report: ', classification_report(y_test, y_pred))

Accuracy:  0.8956127080181543
Confusion Matrix:  [[182   1  10   4   2   4   2]
 [  0 196   3   0   0   8   0]
 [ 10   0 149   1   1  10   1]
 [  2   1  12 184   6   2   1]
 [  2   0   3  11 185   2   3]
 [  6  11   9   2   0 172   0]
 [  3   0   0   3   0   2 116]]
Classification Report:                precision    recall  f1-score   support

       blues       0.89      0.89      0.89       205
   classical       0.94      0.95      0.94       207
     country       0.80      0.87      0.83       172
       disco       0.90      0.88      0.89       208
      hiphop       0.95      0.90      0.93       206
        jazz       0.86      0.86      0.86       200
       metal       0.94      0.94      0.94       124

    accuracy                           0.90      1322
   macro avg       0.90      0.90      0.90      1322
weighted avg       0.90      0.90      0.90      1322



In [24]:
# improve model
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}
CV_rfc = GridSearchCV(estimator=model, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train, y_train)
print(CV_rfc.best_params_)
print(CV_rfc.best_score_)
y_pred = CV_rfc.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, y_pred))
print('Confusion Matrix: ', confusion_matrix(y_test, y_pred))
print('Classification Report: ', classification_report(y_test, y_pred))

{'criterion': 'entropy', 'max_depth': 8, 'max_features': 'auto', 'n_estimators': 500}
0.8658773180149261
Accuracy:  0.8426626323751891
Confusion Matrix:  [[173   1  13   5   4   5   4]
 [  0 190   4   0   0  13   0]
 [ 14   0 135   9   0  12   2]
 [  5   1  13 169  14   5   1]
 [  7   1   4  20 171   0   3]
 [  8  15  10   2   0 165   0]
 [  5   0   0   6   0   2 111]]
Classification Report:                precision    recall  f1-score   support

       blues       0.82      0.84      0.83       205
   classical       0.91      0.92      0.92       207
     country       0.75      0.78      0.77       172
       disco       0.80      0.81      0.81       208
      hiphop       0.90      0.83      0.87       206
        jazz       0.82      0.82      0.82       200
       metal       0.92      0.90      0.91       124

    accuracy                           0.84      1322
   macro avg       0.85      0.84      0.84      1322
weighted avg       0.84      0.84      0.84      1322

