## Download dataset

_Dataset details_: https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification

In [2]:
# Download the GTZAN dataset
!kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification

# Unzip the downloaded dataset
!unzip -q gtzan-dataset-music-genre-classification.zip

Downloading gtzan-dataset-music-genre-classification.zip to /content
 99% 1.20G/1.21G [00:10<00:00, 117MB/s]
100% 1.21G/1.21G [00:10<00:00, 120MB/s]


## Load data to pandas dataframe

In [None]:
import pandas as pd

# Load data to dataframe
data_path = '/content/Data/features_3_sec.csv'
data = pd.read_csv(data_path)

data.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


## Split the data

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Splitting data into features and labels
X = data.drop(['filename', 'label'], axis=1)
y = data['label']

# Encoding labels and feature normalization
encoder = LabelEncoder()
y = encoder.fit_transform(y)
X = StandardScaler().fit_transform(X)

# Split data into training and validation parts
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((7992, 58), (1998, 58), (7992,), (1998,))

## CNN Approach

### Data preparation

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical

# Data preparation for CNN model
X_val_reshaped = np.array(X_val).reshape(X_val.shape[0], X_val.shape[1], 1)
y_val_encoded = encoder.fit_transform(y_val)
y_val_categorical = to_categorical(y_val_encoded)

y_train_encoded = encoder.fit_transform(y_train)
y_train_categorical = to_categorical(y_train_encoded)

### Creating the CNN model

In [None]:
# Defining the CNN model
cnn_model = Sequential([
    Conv1D(filters=64, kernel_size=5, activation='relu', input_shape=(X_val_reshaped.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(np.unique(y_val)), activation='softmax')
])

# Compiling the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Tranining the CNN model

In [None]:
# Training the model
cnn_model.fit(X_train, y_train_categorical, epochs=15, batch_size=32, validation_data=(X_val_reshaped, y_val_categorical), verbose=1)

prediction = cnn_model.predict(X_val_reshaped)
prediction

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


array([[2.1438545e-03, 6.8000480e-07, 5.6510249e-05, ..., 2.2657728e-06,
        1.1498729e-01, 1.9487288e-03],
       [1.5510838e-03, 8.8094663e-07, 9.4257366e-06, ..., 7.6161200e-08,
        8.4036913e-08, 8.7675602e-05],
       [8.4565312e-01, 2.0567336e-08, 2.9570772e-09, ..., 1.3806348e-06,
        3.7207725e-03, 7.3012496e-05],
       ...,
       [2.6524538e-05, 1.4763718e-09, 2.4575361e-06, ..., 8.9198575e-06,
        1.9463139e-04, 1.1524180e-03],
       [4.3674943e-05, 6.1902341e-07, 7.1895035e-04, ..., 4.0318095e-04,
        8.5970953e-02, 4.5952767e-01],
       [7.6711015e-04, 2.4852909e-07, 5.1885513e-07, ..., 2.6860166e-06,
        9.9482590e-01, 8.7831082e-05]], dtype=float32)

### CNN model evaluation

In [None]:
# Estimating prediction
y_val_predict = np.argmax(prediction, axis=1)

# Evaluation
report = classification_report(y_val_encoded, y_val_predict, target_names=LabelEncoder().fit(y_val).classes_.astype(str))
print(report)

prediction[0].tolist()

              precision    recall  f1-score   support

           0       0.91      0.88      0.89       208
           1       0.92      0.97      0.94       203
           2       0.86      0.84      0.85       186
           3       0.92      0.82      0.87       199
           4       0.86      0.92      0.89       218
           5       0.88      0.88      0.88       192
           6       0.91      0.97      0.94       204
           7       0.91      0.92      0.92       180
           8       0.92      0.90      0.91       211
           9       0.83      0.81      0.82       197

    accuracy                           0.89      1998
   macro avg       0.89      0.89      0.89      1998
weighted avg       0.89      0.89      0.89      1998



[0.002143854508176446,
 6.800048026889272e-07,
 5.651024912367575e-05,
 0.0011121140560135245,
 0.8796737790107727,
 7.171065226430073e-05,
 3.10112022816611e-06,
 2.265772764076246e-06,
 0.11498729139566422,
 0.0019487288082018495]