<a href="https://colab.research.google.com/github/ronitavalani/467Project/blob/main/genre.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
# Load in the data set
url = 'https://raw.githubusercontent.com/ronitavalani/467Project/main/songs_normalize.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,artist,song,duration_ms,explicit,year,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genre
0,Britney Spears,Oops!...I Did It Again,211160,False,2000,77,0.751,0.834,1,-5.444,0,0.0437,0.3,1.8e-05,0.355,0.894,95.053,pop
1,blink-182,All The Small Things,167066,False,1999,79,0.434,0.897,0,-4.918,1,0.0488,0.0103,0.0,0.612,0.684,148.726,"rock, pop"
2,Faith Hill,Breathe,250546,False,1999,66,0.529,0.496,7,-9.007,1,0.029,0.173,0.0,0.251,0.278,136.859,"pop, country"
3,Bon Jovi,It's My Life,224493,False,2000,78,0.551,0.913,0,-4.063,0,0.0466,0.0263,1.3e-05,0.347,0.544,119.992,"rock, metal"
4,*NSYNC,Bye Bye Bye,200560,False,2000,65,0.614,0.928,8,-4.806,0,0.0516,0.0408,0.00104,0.0845,0.879,172.656,pop


In [3]:
# Preprocess the data


# Mixed Genre Problem

# Attempt 1: Specify different types of genres instead of pop - indicates that pop is why the model is performing well
# def get_preferred_genre(genre_str):
#     genres = genre_str.split(",")
#     genres = [g.strip() for g in genres if g.strip()]  # clean and remove empty entries
#     if not genres:
#         return "Unknown"
#     if genres[0].lower() == "pop" and len(genres) > 1:
#         return genres[1]
#     return genres[0]

# Attempt 2: Create a separate "mixed" genre for multiple genre songs
# def get_preferred_genre(genre_str):
#     genres = genre_str.split(",")
#     genres = [g.strip() for g in genres if g.strip()]  # clean and remove empty entries
#     if len(genres) > 1:
#         return "Mixed"
#     return genres[0] if genres else "Unknown"

# df['genre'] = df['genre'].astype(str).apply(get_preferred_genre)

# Keep only the first genre when a song is classified with multiple
df['genre'] = df['genre'].astype(str).apply(lambda x: x.split(',')[0].strip())


# Drop qualitative data (song name, artist name)
non_numeric_cols = df.select_dtypes(include=['object']).columns.tolist()
non_numeric_cols.remove('genre')
df = df.drop(columns=non_numeric_cols)

# Drop empty values
df = df.dropna()

# Create input and output
X = df.drop(columns=['genre'])
y = df['genre']

# Underrepresented Data Problem
# Drop genre classes with only 1 example
le = LabelEncoder()
y_encoded = le.fit_transform(y)
value_counts = pd.Series(y_encoded).value_counts()
valid_classes = value_counts[value_counts > 1].index
valid_mask = pd.Series(y_encoded).isin(valid_classes)
X = X[valid_mask]
y = y[valid_mask].reset_index(drop=True)

# Redo output labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
# Prepare data for training
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Create a PyTorch data set
class SongDataset(Dataset):
    def __init__(self, features, labels):
        self.X = torch.tensor(features, dtype=torch.float32)
        self.y = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = SongDataset(X_train, y_train)
test_dataset = SongDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [5]:
# Define neural network
# Simple model with one hidden layer and
class GenreNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GenreNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Deeper model with 2 hidden layers and dropout - performs worse
class GenreNet2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GenreNet2, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.fc3 = nn.Linear(hidden_dim // 2, output_dim)

    def forward(self, x):
        out = F.relu(self.fc1(x))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

# Model, loss, optimizer
input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = len(np.unique(y_encoded))

model = GenreNet(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [6]:
# Training
epochs = 16
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

Epoch 1/16, Loss: 2.0776
Epoch 2/16, Loss: 1.3882
Epoch 3/16, Loss: 1.1139
Epoch 4/16, Loss: 1.0307
Epoch 5/16, Loss: 0.9998
Epoch 6/16, Loss: 0.9793
Epoch 7/16, Loss: 0.9666
Epoch 8/16, Loss: 0.9554
Epoch 9/16, Loss: 0.9452
Epoch 10/16, Loss: 0.9367
Epoch 11/16, Loss: 0.9298
Epoch 12/16, Loss: 0.9219
Epoch 13/16, Loss: 0.9143
Epoch 14/16, Loss: 0.9068
Epoch 15/16, Loss: 0.8997
Epoch 16/16, Loss: 0.8931


In [7]:
# Evaluation
# Consistently gaining around 67% classification accuracy - solid start but not good
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

print(f"\nTest Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 66.25%


In [8]:
# Hyperparameter Experimentation
%%script true
print("Experimentation")
import matplotlib.pyplot as plt
import torch.nn.functional as F

epoch_range = list(range(1, 50))  # Try from 1 to 50 epochs
train_losses = []
test_accuracies = []

# Convert test set to tensors
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


for num_epochs in epoch_range:
    # Re-initialize model for a fresh start each run
    model = GenreNet(input_dim, hidden_dim, output_dim)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Train model for current epoch count
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

    # Store final training loss for this run
    train_losses.append(running_loss / len(train_loader))

    # Evaluate test accuracy
    model.eval()
    with torch.no_grad():
        outputs = model(X_test_tensor)
        predictions = torch.argmax(outputs, dim=1)
        accuracy = (predictions == y_test_tensor).float().mean().item()
        test_accuracies.append(accuracy)

# Plot results
plt.figure(figsize=(12, 6))
plt.plot(epoch_range, train_losses, label="Training Loss", color="blue")
plt.plot(epoch_range, test_accuracies, label="Test Accuracy", color="green")
plt.xlabel("Epochs")
plt.ylabel("Loss / Accuracy")
plt.title("Training Loss vs. Test Accuracy Over Epochs")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
print(f"\nMax Test Accuracy: {max(test_accuracies)*100:.2f}%")

In [9]:
# Error Analysis
# % of Pop Songs vs. Pop Songs Predicted
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(y_batch.cpu().numpy())

predicted_genres = le.inverse_transform(all_preds)

pop_count = sum(1 for genre in predicted_genres if genre.lower() == 'pop')
total = len(predicted_genres)

print(f"\nPredicted 'pop' in test set: {pop_count} out of {total} samples ({pop_count/total:.2%})")


y_train_genres = le.inverse_transform(y_train)

num_pop_train = sum(1 for genre in y_train_genres if genre.lower() == "pop")
total_samples = len(y_train)

print(f"'Pop' samples in training set: {num_pop_train} out of {total_samples} "
      f"({(num_pop_train / total_samples) * 100:.2f}%)")

# Analyze incorrect classifications
# Set model to evaluation mode
model.eval()
# Store misclassified indices
misclassified = []
# No need for gradient tracking during evaluation
with torch.no_grad():
    for i, (X_batch, y_batch) in enumerate(test_loader):  # Assuming you have a test_loader
        outputs = model(X_batch)
        predictions = torch.argmax(outputs, dim=1)

        # Find where prediction != actual
        incorrect = predictions != y_batch

        # Store indices or values of misclassified samples
        for j in range(X_batch.size(0)):
            if incorrect[j]:
                misclassified.append({
                    "index": i * test_loader.batch_size + j,
                    "true": y_batch[j].item(),
                    "pred": predictions[j].item()
                })

# Print results
print(f"\nTotal Misclassified: {len(misclassified)}")
for sample in misclassified[:10]:  # Show first 10
    true_label = le.inverse_transform([sample['true']])[0]
    pred_label = le.inverse_transform([sample['pred']])[0]
    print(f"Index {sample['index']}: True Label = {true_label}, Predicted = {pred_label}")


# Misclassifications by output
from collections import Counter, defaultdict
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
all_true_labels = le.inverse_transform(y_test_tensor.numpy())

total_per_genre = Counter(all_true_labels)

misclassified_counts = defaultdict(int)

for sample in misclassified:
    genre = le.inverse_transform([sample['true']])[0]
    misclassified_counts[genre] += 1

print("\n📉 Misclassification Rates by Genre (% of genre misclassified):")
for genre in sorted(total_per_genre.keys()):
    total = total_per_genre[genre]
    wrong = misclassified_counts[genre]
    percent = 100 * wrong / total
    print(f"{genre}: {wrong}/{total} misclassified ({percent:.1f}%)")



# Print sample distribution
genre_labels = le.inverse_transform(y_encoded)

from collections import Counter
original_distribution = Counter(genre_labels)

print("\n📊 Original Genre Distribution (after preprocessing):")
for genre, count in original_distribution.most_common():
    print(f"{genre}: {count}")



Predicted 'pop' in test set: 249 out of 400 samples (62.25%)
'Pop' samples in training set: 749 out of 1599 (46.84%)

Total Misclassified: 135
Index 0: True Label = pop, Predicted = hip hop
Index 1: True Label = hip hop, Predicted = pop
Index 9: True Label = hip hop, Predicted = pop
Index 10: True Label = rock, Predicted = pop
Index 12: True Label = pop, Predicted = hip hop
Index 22: True Label = Dance/Electronic, Predicted = pop
Index 23: True Label = hip hop, Predicted = pop
Index 25: True Label = hip hop, Predicted = pop
Index 38: True Label = rock, Predicted = pop
Index 39: True Label = pop, Predicted = rock

📉 Misclassification Rates by Genre (% of genre misclassified):
Dance/Electronic: 8/8 misclassified (100.0%)
Folk/Acoustic: 1/1 misclassified (100.0%)
R&B: 3/3 misclassified (100.0%)
World/Traditional: 2/2 misclassified (100.0%)
country: 2/2 misclassified (100.0%)
hip hop: 57/155 misclassified (36.8%)
latin: 3/3 misclassified (100.0%)
metal: 2/2 misclassified (100.0%)
pop: 31/