## Install Kaggle and download dataset

In [1]:
# Install the Kaggle API client
!pip install kaggle

# Upload your Kaggle API credentials (kaggle.json) file
from google.colab import files
files.upload()

# Move the Kaggle API key to the appropriate directory
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the GTZAN dataset
!kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification

# Unzip the downloaded dataset
!unzip gtzan-dataset-music-genre-classification.zip



Saving kaggle.json to kaggle.json
Downloading gtzan-dataset-music-genre-classification.zip to /content
 99% 1.20G/1.21G [00:10<00:00, 63.0MB/s]
100% 1.21G/1.21G [00:10<00:00, 120MB/s] 
Archive:  gtzan-dataset-music-genre-classification.zip
  inflating: Data/features_30_sec.csv  
  inflating: Data/features_3_sec.csv  
  inflating: Data/genres_original/blues/blues.00000.wav  
  inflating: Data/genres_original/blues/blues.00001.wav  
  inflating: Data/genres_original/blues/blues.00002.wav  
  inflating: Data/genres_original/blues/blues.00003.wav  
  inflating: Data/genres_original/blues/blues.00004.wav  
  inflating: Data/genres_original/blues/blues.00005.wav  
  inflating: Data/genres_original/blues/blues.00006.wav  
  inflating: Data/genres_original/blues/blues.00007.wav  
  inflating: Data/genres_original/blues/blues.00008.wav  
  inflating: Data/genres_original/blues/blues.00009.wav  
  inflating: Data/genres_original/blues/blues.00010.wav  
  inflating: Data/genres_original/blues/blu

## Load data to pandas dataframe

In [2]:
import pandas as pd

# Load data to dataframe
data_path = '/content/Data/features_3_sec.csv'
data = pd.read_csv(data_path)

data.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


## Split the data

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

seed = 42

# Splitting data into features and labels
X = data.drop(['filename', 'label'], axis=1)
y = data['label']

# Encoding labels and feature normalization
encoder = LabelEncoder()
y = encoder.fit_transform(y)
X = StandardScaler().fit_transform(X)

# Split data into training and validation parts
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((7992, 58), (1998, 58), (7992,), (1998,))

## Deep Learning Approach

### DP model without dropout

In [4]:
import tensorflow as tf

dp_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax'),
])

print(dp_model.summary())

dp_model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics='accuracy')

history = dp_model.fit(X_train,
                    y_train,
                    validation_data=(X_val, y_val),
                    epochs=150,
                    batch_size=32)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               30208     
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 64)                8256      
                                                                 
 dense_4 (Dense)             (None, 10)                650       
                                                                 
Total params: 203338 (794.29 KB)
Trainable params: 203338 (794.29 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/150

#### Score evaluation



In [5]:
import numpy as np
from sklearn.metrics import classification_report

y_val_predict = np.argmax(dp_model.predict(X_val), axis=1)

# Evaludation
report = classification_report(y_val, y_val_predict, target_names=encoder.classes_)
print(report)

              precision    recall  f1-score   support

       blues       0.94      0.93      0.93       208
   classical       0.95      0.98      0.96       203
     country       0.87      0.90      0.89       186
       disco       0.92      0.92      0.92       199
      hiphop       0.95      0.94      0.95       218
        jazz       0.91      0.92      0.91       192
       metal       0.98      0.99      0.98       204
         pop       0.95      0.96      0.96       180
      reggae       0.92      0.93      0.93       211
        rock       0.90      0.85      0.87       197

    accuracy                           0.93      1998
   macro avg       0.93      0.93      0.93      1998
weighted avg       0.93      0.93      0.93      1998



### Test DP model with different dropout

In [8]:
def create_dp_model_with_dropout(dropout):
    dp_model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dropout(dropout),

        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(dropout),

        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(dropout),

        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(dropout),

        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    dp_model.compile(optimizer='adam',
                    loss='sparse_categorical_crossentropy',
                    metrics='accuracy')

    return dp_model

for i in range(1, 6):
    dropout = i * 0.1
    print(f"\n\n# Dropout: {dropout}\n")
    model = create_dp_model_with_dropout(dropout)
    dp_model.fit(X_train,
                    y_train,
                    validation_data=(X_val, y_val),
                    epochs=150,
                    batch_size=32,
                    verbose=None)

    y_val_predict = np.argmax(dp_model.predict(X_val), axis=1)

    # Evaludation
    report = classification_report(y_val, y_val_predict, target_names=encoder.classes_)
    print(report)



# Dropout: 0.1

              precision    recall  f1-score   support

       blues       0.93      0.93      0.93       208
   classical       0.97      0.93      0.95       203
     country       0.89      0.90      0.90       186
       disco       0.92      0.90      0.91       199
      hiphop       0.92      0.95      0.94       218
        jazz       0.88      0.97      0.92       192
       metal       0.95      0.98      0.96       204
         pop       0.94      0.95      0.95       180
      reggae       0.92      0.91      0.92       211
        rock       0.94      0.83      0.88       197

    accuracy                           0.93      1998
   macro avg       0.93      0.93      0.93      1998
weighted avg       0.93      0.93      0.93      1998



# Dropout: 0.2

              precision    recall  f1-score   support

       blues       0.94      0.92      0.93       208
   classical       0.94      0.95      0.94       203
     country       0.82      0.91      0.8

## Test larger DP model

In [None]:
dp_model_1024 = tf.keras.models.Sequential([
    tf.keras.layers.Dense(1024, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(10, activation='softmax'),
])

print(dp_model_1024.summary())

dp_model_1024.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics='accuracy')

history = dp_model_1024.fit(X_train,
                    y_train,
                    validation_data=(X_val, y_val),
                    epochs=150,
                    batch_size=32)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 1024)              60416     
                                                                 
 dropout_8 (Dropout)         (None, 1024)              0         
                                                                 
 dense_11 (Dense)            (None, 512)               524800    
                                                                 
 dropout_9 (Dropout)         (None, 512)               0         
                                                                 
 dense_12 (Dense)            (None, 256)               131328    
                                                                 
 dropout_10 (Dropout)        (None, 256)               0         
                                                                 
 dense_13 (Dense)            (None, 128)              

In [None]:
from sklearn.metrics import classification_report

y_val_predict = np.argmax(dp_model_1024.predict(X_val), axis=1)

# Evaludation
report = classification_report(y_val, y_val_predict, target_names=encoder.classes_)
print(report)

              precision    recall  f1-score   support

       blues       0.94      0.94      0.94       208
   classical       0.93      0.98      0.96       203
     country       0.88      0.90      0.89       186
       disco       0.91      0.94      0.93       199
      hiphop       0.99      0.92      0.95       218
        jazz       0.94      0.91      0.92       192
       metal       0.96      0.99      0.98       204
         pop       0.95      0.94      0.95       180
      reggae       0.94      0.95      0.95       211
        rock       0.91      0.88      0.90       197

    accuracy                           0.94      1998
   macro avg       0.94      0.94      0.94      1998
weighted avg       0.94      0.94      0.94      1998

