## Install Kaggle and download dataset

In [2]:
# Download the GTZAN dataset
!kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification

# Unzip the downloaded dataset
!unzip -q gtzan-dataset-music-genre-classification.zip

Downloading gtzan-dataset-music-genre-classification.zip to /content
 99% 1.21G/1.21G [00:10<00:00, 133MB/s]
100% 1.21G/1.21G [00:10<00:00, 125MB/s]


## Load data to pandas dataframe

In [None]:
import pandas as pd

# Load data to dataframe
data_path = '/content/Data/features_3_sec.csv'
data = pd.read_csv(data_path)

data.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


## Split the data

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

seed = 42

# Splitting data into features and labels
X = data.drop(['filename', 'label'], axis=1)
y = data['label']

# Encoding labels and feature normalization
encoder = LabelEncoder()
y = encoder.fit_transform(y)
X = StandardScaler().fit_transform(X)

# Split data into training and validation parts
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((7992, 58), (1998, 58), (7992,), (1998,))

## XGBoost Approach

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import numpy as np

xgboost_model = XGBClassifier(n_estimators=100, random_state=seed)
xgboost_model.fit(X_train, y_train)

# Estimate prediction
y_val_predict = xgboost_model.predict(X_val)

# Evaludation
report = classification_report(y_val, y_val_predict, target_names=encoder.classes_)
print(report)

              precision    recall  f1-score   support

       blues       0.90      0.89      0.89       208
   classical       0.93      0.97      0.95       203
     country       0.81      0.87      0.84       186
       disco       0.89      0.90      0.90       199
      hiphop       0.96      0.90      0.93       218
        jazz       0.88      0.92      0.90       192
       metal       0.94      0.96      0.95       204
         pop       0.95      0.96      0.96       180
      reggae       0.93      0.91      0.92       211
        rock       0.91      0.84      0.87       197

    accuracy                           0.91      1998
   macro avg       0.91      0.91      0.91      1998
weighted avg       0.91      0.91      0.91      1998



array([4, 5, 0, ..., 4, 3, 8])

In [None]:
class_index = np.argmax([[0.05301428213715553, 0.001104326220229268, 0.017865989357233047, 0.0024623898789286613, 0.14880643784999847, 0.004810688551515341, 0.10332601517438889, 0.06957555562257767, 0.5924625396728516, 0.006571735721081495]], axis=1)
print(class_index, encoder.inverse_transform(class_index))
encoder.classes_

encoder2 = LabelEncoder()
encoder2.fit_transform(['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz',
       'metal', 'pop', 'reggae', 'rock'])
for obj in encoder2.inverse_transform(class_index):
    print(obj)


[8] ['reggae']
reggae


### Test XGBoost with different `n_estimators`

In [None]:
for n_estimators in [25, 50, 100, 200, 500, 1000]:
    print(f"\n\n# n_estimators: {n_estimators}\n")
    classifier = XGBClassifier(n_estimators=n_estimators, random_state=seed)
    classifier.fit(X_train, y_train)
    # Estimate prediction
    y_knn_predict = classifier.predict(X_val)

    # Evaludation
    report = classification_report(y_val, y_knn_predict, target_names=encoder.classes_)
    print(report)



# n_estimators: 25

              precision    recall  f1-score   support

       blues       0.89      0.85      0.87       208
   classical       0.93      0.95      0.94       203
     country       0.72      0.80      0.76       186
       disco       0.81      0.82      0.81       199
      hiphop       0.93      0.86      0.89       218
        jazz       0.82      0.86      0.84       192
       metal       0.94      0.92      0.93       204
         pop       0.91      0.94      0.92       180
      reggae       0.91      0.86      0.89       211
        rock       0.80      0.78      0.79       197

    accuracy                           0.86      1998
   macro avg       0.87      0.86      0.86      1998
weighted avg       0.87      0.86      0.87      1998



# n_estimators: 50

              precision    recall  f1-score   support

       blues       0.88      0.88      0.88       208
   classical       0.94      0.96      0.95       203
     country       0.77      0.85 