In [2]:
import torch 
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load dataset
df_tracks = pd.read_csv("high_popularity_spotify_data.csv")

# Features to use
features = ["energy", "tempo", "danceability", "loudness",
            "liveness", "valence", "time_signature", "speechiness"]

# Filter top 5 genres by count
top5_genres = df_tracks['playlist_genre'].value_counts().nlargest(5).index
df_tracks = df_tracks[df_tracks['playlist_genre'].isin(top5_genres)]

X = df_tracks[features]
y = df_tracks["playlist_genre"]

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Convert to tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test  = torch.tensor(X_test, dtype=torch.float32)

# Encode labels
label_encoder = LabelEncoder()
y_train = torch.tensor(label_encoder.fit_transform(y_train), dtype=torch.long)
y_test  = torch.tensor(label_encoder.transform(y_test), dtype=torch.long)

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

# Define MLP
class MLP(nn.Module):
    def __init__(self, input_size=8, hidden1=64, hidden2=32, num_classes=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden1),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden1, hidden2),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden2, num_classes),
        )
    def forward(self, x):
        return self.net(x)

num_classes = 5  # Only top 5 genres
model = MLP(input_size=8, num_classes=num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
epochs = 200
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 5 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    preds = model(X_test)
    predicted = preds.argmax(dim=1)
    acc = (predicted == y_test).float().mean()
    print(f"\nTest Accuracy: {acc.item()*100:.2f}%")

# Convert predicted labels to genre names
#predicted_genres = label_encoder.inverse_transform(predicted.cpu().numpy())





Epoch [5/200], Loss: 1.5775
Epoch [10/200], Loss: 1.5474
Epoch [15/200], Loss: 1.5224
Epoch [20/200], Loss: 1.4882
Epoch [25/200], Loss: 1.4642
Epoch [30/200], Loss: 1.4342
Epoch [35/200], Loss: 1.3982
Epoch [40/200], Loss: 1.3718
Epoch [45/200], Loss: 1.3391
Epoch [50/200], Loss: 1.3101
Epoch [55/200], Loss: 1.2851
Epoch [60/200], Loss: 1.2545
Epoch [65/200], Loss: 1.2479
Epoch [70/200], Loss: 1.2190
Epoch [75/200], Loss: 1.2120
Epoch [80/200], Loss: 1.1886
Epoch [85/200], Loss: 1.1871
Epoch [90/200], Loss: 1.1642
Epoch [95/200], Loss: 1.1759
Epoch [100/200], Loss: 1.1489
Epoch [105/200], Loss: 1.1482
Epoch [110/200], Loss: 1.1436
Epoch [115/200], Loss: 1.1397
Epoch [120/200], Loss: 1.1465
Epoch [125/200], Loss: 1.1214
Epoch [130/200], Loss: 1.1404
Epoch [135/200], Loss: 1.1271
Epoch [140/200], Loss: 1.1297
Epoch [145/200], Loss: 1.1245
Epoch [150/200], Loss: 1.1285
Epoch [155/200], Loss: 1.1016
Epoch [160/200], Loss: 1.1087
Epoch [165/200], Loss: 1.1047
Epoch [170/200], Loss: 1.1141
