In [4]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
andradaolteanu_gtzan_dataset_music_genre_classification_path = kagglehub.dataset_download('andradaolteanu/gtzan-dataset-music-genre-classification')

print('Data source import complete.')


Using Colab cache for faster access to the 'gtzan-dataset-music-genre-classification' dataset.
Data source import complete.


In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [6]:
# =============================
# Music Genre Classification with RNN
# Dataset: GTZAN (3-sec chunks)
# =============================

import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings("ignore")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# =============================
# 1. Load and Preprocess Data
# =============================

# Load 3-second features
# The kagglehub.dataset_download function returns the path to the extracted dataset.
# We will use the variable from the first cell which holds this path.
csv_path = os.path.join(andradaolteanu_gtzan_dataset_music_genre_classification_path, "Data/features_30_sec.csv") # Corrected path
df = pd.read_csv(csv_path)

# Remove filename and length columns (not features)
df = df.drop(columns=["filename", "length"])

# Separate features and labels
X = df.drop(columns=["label"]).values
y = df["label"].values

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Normalize features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape for RNN: (samples, time_steps, features)
# We'll treat each MFCC coefficient as a time step â†’ (20 timesteps, 1 feature)
# Or transpose: (samples, 20, 1)

# Extract MFCC means (20 coefficients)
mfcc_mean_cols = [f"mfcc{i}_mean" for i in range(1, 21)]
X_mfcc = df[mfcc_mean_cols].values  # Shape: (n_samples, 20)

X_reshaped = X_mfcc.reshape(X_mfcc.shape[0], 20, 1)  # (samples, seq_len, input_size)

# Train-val split
X_train, X_val, y_train, y_val = train_test_split(
    X_reshaped, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# =============================
# 2. Custom Dataset
# =============================

class GTZANDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.LongTensor(y)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = GTZANDataset(X_train, y_train)
val_dataset = GTZANDataset(X_val, y_val)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# =============================
# 3. RNN Model
# =============================

class MusicGenreRNN(nn.Module):
    def __init__(self, input_size=1, hidden_size=128, num_layers=2, num_classes=10, dropout=0.3):
        super(MusicGenreRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout,
            bidirectional=True
        )
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(hidden_size * 2, num_classes)  # *2 for bidirectional

    def forward(self, x):
        # x: (batch, seq_len, input_size)
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.rnn(x, h0)  # out: (batch, seq_len, hidden*2)
        out = self.dropout(out[:, -1, :])  # Take last time step
        out = self.fc(out)
        return out

# Initialize model
model = MusicGenreRNN(
    input_size=1,
    hidden_size=128,
    num_layers=2,
    num_classes=len(le.classes_),
    dropout=0.3
).to(device)

print(model)

# =============================
# 4. Training Setup
# =============================

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

# =============================
# 5. Training Loop
# =============================

num_epochs = 50
best_val_acc = 0.0
patience = 7
trigger = 0

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Validation
    model.eval()
    val_preds = []
    val_true = []
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()

            preds = torch.argmax(outputs, dim=1)
            val_preds.extend(preds.cpu().numpy())
            val_true.extend(y_batch.cpu().numpy())

    val_acc = accuracy_score(val_true, val_preds)
    avg_train_loss = train_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)

    scheduler.step(avg_val_loss)

    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | "
          f"Val Acc: {val_acc:.4f}")

    # Early stopping
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        trigger = 0
        torch.save(model.state_dict(), "best_rnn_gtzan.pth") # Changed filename
    else:
        trigger += 1
        if trigger >= patience:
            print("Early stopping!")
            break

# =============================
# 6. Final Evaluation
# =============================

model.load_state_dict(torch.load("best_rnn_gtzan.pth")) # Changed filename
model.eval()
all_preds = []
all_true = []
with torch.no_grad():
    for X_batch, y_batch in val_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_true.extend(y_batch.numpy())

final_acc = accuracy_score(all_true, all_preds)
print(f"\nFinal Validation Accuracy: {final_acc:.4f}")
print("\nClassification Report:")
print(classification_report(all_true, all_preds, target_names=le.classes_))

# Save label encoder
import joblib
joblib.dump(le, "label_encoder.pkl")

Using device: cpu
MusicGenreRNN(
  (rnn): RNN(1, 128, num_layers=2, batch_first=True, dropout=0.3, bidirectional=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=10, bias=True)
)
Epoch 1/50 | Train Loss: 2.1998 | Val Loss: 2.0635 | Val Acc: 0.2300
Epoch 2/50 | Train Loss: 2.0442 | Val Loss: 1.9677 | Val Acc: 0.2850
Epoch 3/50 | Train Loss: 1.9146 | Val Loss: 1.8526 | Val Acc: 0.3050
Epoch 4/50 | Train Loss: 1.8233 | Val Loss: 1.7564 | Val Acc: 0.3350
Epoch 5/50 | Train Loss: 1.7220 | Val Loss: 1.6691 | Val Acc: 0.3450
Epoch 6/50 | Train Loss: 1.6711 | Val Loss: 1.6625 | Val Acc: 0.3550
Epoch 7/50 | Train Loss: 1.6568 | Val Loss: 1.5758 | Val Acc: 0.3600
Epoch 8/50 | Train Loss: 1.6420 | Val Loss: 1.5749 | Val Acc: 0.3900
Epoch 9/50 | Train Loss: 1.5989 | Val Loss: 1.5493 | Val Acc: 0.3700
Epoch 10/50 | Train Loss: 1.5945 | Val Loss: 1.5606 | Val Acc: 0.4050
Epoch 11/50 | Train Loss: 1.5487 | Val Loss: 1.5676 | Val Acc: 0.3950
Epoch 12/50 | T

['label_encoder.pkl']