## Download dataset

_Dataset details_: https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification

In [2]:
# Download the GTZAN dataset
!kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification

# Unzip the downloaded dataset
!unzip -q gtzan-dataset-music-genre-classification.zip

Downloading gtzan-dataset-music-genre-classification.zip to /content
100% 1.21G/1.21G [00:09<00:00, 104MB/s]
100% 1.21G/1.21G [00:09<00:00, 132MB/s]


## Load data to pandas dataframe

In [None]:
import pandas as pd

# Load data to dataframe
data_path = '/content/Data/features_3_sec.csv'
data = pd.read_csv(data_path)

data.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


## Split the data

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Splitting data into features and labels
X = data.drop(['filename', 'label'], axis=1)
y = data['label']

# Encoding labels and feature normalization
encoder = LabelEncoder()
y = encoder.fit_transform(y)
X = StandardScaler().fit_transform(X)

# Split data into training and validation parts
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((7992, 58), (1998, 58), (7992,), (1998,))

## CNN Random Forest Ensemble

### Data preparation

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical

# Data preparation for CNN model
X_val_reshaped = np.array(X_val).reshape(X_val.shape[0], X_val.shape[1], 1)
y_val_encoded = encoder.fit_transform(y_val)
y_val_categorical = to_categorical(y_val_encoded)

y_train_encoded = encoder.fit_transform(y_train)
y_train_categorical = to_categorical(y_train_encoded)

### Creating the CNN feature extractor

In [None]:
# Defining the CNN feature extractor
activation = 'relu'

cnn_feature_extractor = Sequential([
    Conv1D(filters=64, kernel_size=5, activation=activation, input_shape=(X_val_reshaped.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(filters=128, kernel_size=5, activation=activation),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation=activation)
])

dl_layer = Dense(len(np.unique(y_val)), activation='softmax')(cnn_feature_extractor.output)
cnn_model = Model(inputs=cnn_feature_extractor.input, outputs=dl_layer)

# Compiling the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Tranining the CNN feature extractor

In [None]:
# Training the model
cnn_model.fit(X_train, y_train_categorical, epochs=25, batch_size=32, validation_data=(X_val_reshaped, y_val_categorical), verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x7ff1913faa40>

In [None]:
y_val_predict = np.argmax(cnn_model.predict(X_val_reshaped), axis=1)

# Evaluation
report = classification_report(y_val_encoded, y_val_predict, target_names=LabelEncoder().fit(y_val).classes_.astype(str))
print(report)

              precision    recall  f1-score   support

           0       0.88      0.91      0.89       208
           1       0.92      0.95      0.93       203
           2       0.85      0.83      0.84       186
           3       0.88      0.88      0.88       199
           4       0.91      0.91      0.91       218
           5       0.86      0.91      0.88       192
           6       0.96      0.95      0.95       204
           7       0.92      0.93      0.93       180
           8       0.92      0.89      0.90       211
           9       0.86      0.80      0.83       197

    accuracy                           0.90      1998
   macro avg       0.90      0.90      0.90      1998
weighted avg       0.90      0.90      0.90      1998



## Ensemble with random forest

In [None]:
# Estimating prediction
from sklearn.ensemble import RandomForestClassifier

X_train_reshaped = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1], 1)
cnn_model_features = cnn_model.predict(X_train_reshaped)

# Create and train the Random Forest classifier on the extracted features
random_forest_model = RandomForestClassifier(n_estimators=10)
random_forest_model.fit(cnn_model_features, y_train)



In [None]:
val_features = cnn_model.predict(X_val_reshaped)
random_forest_predictions = random_forest_model.predict(val_features)

# Evaluation
report = classification_report(y_val_encoded, random_forest_predictions, target_names=LabelEncoder().fit(y_val).classes_.astype(str))
print(report)
random_forest_predictions

              precision    recall  f1-score   support

           0       0.88      0.90      0.89       208
           1       0.90      0.95      0.93       203
           2       0.85      0.83      0.84       186
           3       0.89      0.88      0.88       199
           4       0.91      0.91      0.91       218
           5       0.87      0.91      0.89       192
           6       0.95      0.95      0.95       204
           7       0.92      0.93      0.93       180
           8       0.92      0.89      0.90       211
           9       0.85      0.81      0.83       197

    accuracy                           0.90      1998
   macro avg       0.89      0.89      0.89      1998
weighted avg       0.90      0.90      0.90      1998



array([4, 5, 0, ..., 4, 3, 8])

## Ensemble with KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

features = cnn_feature_extractor.predict(X_train)
val_features = cnn_feature_extractor.predict(X_val)

# Step 3: Train the KNN classifier
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(features, y_train)



In [None]:
val_features = cnn_feature_extractor.predict(X_val)
knn_predictions = knn_model.predict(val_features)

report = classification_report(y_val_encoded, knn_predictions, target_names=LabelEncoder().fit(y_val).classes_.astype(str))
print(report)

knn_predictions

              precision    recall  f1-score   support

           0       0.88      0.93      0.90       208
           1       0.92      0.98      0.95       203
           2       0.84      0.85      0.84       186
           3       0.91      0.89      0.90       199
           4       0.94      0.94      0.94       218
           5       0.90      0.92      0.91       192
           6       0.97      0.98      0.97       204
           7       0.96      0.94      0.95       180
           8       0.95      0.92      0.94       211
           9       0.92      0.83      0.87       197

    accuracy                           0.92      1998
   macro avg       0.92      0.92      0.92      1998
weighted avg       0.92      0.92      0.92      1998



array([4, 5, 0, ..., 4, 3, 8])