# WEEK 10 - Neural Networks
## Iowa Platypuses

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from scipy import stats
from re import X

# utilize GPU for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Data Preparation

### Upload and clean data

In this section, we upload our raw data, and remove the only row that has missing values, at index 65900, as well as deleting duplicates which skew data.

In [None]:
def clean_data (data):
  rows_with_missing = data_raw.isnull().any(axis=1)
  data_no_nans = data_raw.drop(index=65900)
  data = data_no_nans.drop_duplicates(subset=None, keep='first', inplace=False)
  return data

In [None]:
data_raw = pd.read_csv("dataset.csv", index_col=0)
print(data_raw.shape)
data = clean_data(data_raw)
print(data.shape)

### Remove Numerical Outliers

Numerical outliers can strongly affect the PCA which we will use for our neural networks, so we want to remove outliers to ensure that we can accurately predict values. Many of these songs have unique features that make genre categorization difficult.

In [None]:
def get_outlier_counts(df, threshold):
    df = df.copy()
    data_numerical = data.select_dtypes("number")

    # Get the z-score for specified threshold
    threshold_z_score = stats.norm.ppf(threshold)

    # Get the z-scores for each value in df
    z_score_df = pd.DataFrame(np.abs(stats.zscore(data_numerical)), columns=data_numerical.columns)

    # Compare df z_scores to the threshold and return the count of outliers in each column
    return (z_score_df > threshold_z_score).sum(axis=0)


In [None]:
def remove_outliers(df, threshold):
    # Select only numeric columns
    numeric_cols = df.select_dtypes(include=[np.number])

    # Get the z-score for specified threshold
    threshold_z_score = stats.norm.ppf(threshold)

    # Calculate z-scores for numeric columns
    z_score_df = pd.DataFrame(np.abs(stats.zscore(numeric_cols, nan_policy='omit')), columns=numeric_cols.columns)
    z_score_df = z_score_df > threshold_z_score

    # Identify rows with any outliers
    outliers = z_score_df.any(axis=1)
    outlier_indices = df.index[outliers]

    # Drop rows with outliers and reset index without keeping the old index
    df = df.drop(outlier_indices, axis=0).reset_index(drop=True)

    return df


In [None]:
outlier_threshold = 0.999999
get_outlier_counts(data, 0.999999)


In [None]:
data = remove_outliers(data, outlier_threshold)

In [None]:
data.shape

Our reduced dataset has 110,885 data points.

### Perform Principal Component Analysis

Here we perform principal component analysis using 11 principal components, as we determined was optimal in our PCA check-in.

In [None]:
data_numerical = data.select_dtypes("number")
data_standardized = data_numerical.apply(lambda x: (x - x.mean()) / x.std())
pca = PCA(svd_solver="full")
pca.fit(data_standardized)

In [None]:
n_components = 11
pca_final = pca.components_[:, :n_components]
pca_df = pd.DataFrame(data=pca_final, columns=[f"PC {i+1}" for i in range(n_components)])

In [None]:
pca_df



---



### Generate New Data Frame With Principal Components and Genre

We verify that there are no null or NaN values within our data frames before we multiply them to have each point alongside its 11 principal components. Additionally, we add the genre back to our principal component data frame.

In [None]:
print(data_standardized.isna().sum())
print(pca_df.isna().sum())

In [None]:
print(data_standardized.shape)
print(pca_df.shape)

In [None]:
# Create data_pcs which has our original data in terms of principal components
data_pcs = pd.DataFrame(data_standardized.dot(pca_final))
print(data_pcs.shape)
print(data_pcs.isna().sum())
data_pcs["track_genre"] = data["track_genre"]
data_pcs.sample(5)

In [None]:
# list features on data_pcs
data_pcs.columns

# CLASSIFICATION - Genre Prediction

We will use select columns from the dataset to build a neural network that can ideally predict the genre of a song based on its other features (ie. multi-class classification).

We will use only `danceability`, `energy`, `loudness`, `speechiness`, `acousticness`, `instrumentalness`, `liveness`, `valence`, `tempo`.
We believe the other columns are not as relevant to genre prediction, and can be safely ignored.

There are 114 genres in this dataset - that is simply too much, and will definitely reduce the accuracy of any model.
So, we will extract the most relevant (determined by us) genres.
We chose the following 24:
- alt-rock
- ambient
- blues
- country
- disco
- edm
- electronic
- folk
- funk
- gospel
- grunge
- hip-hop
- indie
- k-pop
- latin
- metal
- pop
- punk
- r-n-b
- reggae
- rock
- singer-songwriter
- soul
- techno

### Data preparation

#### Standardize data and create genre mappings

In [None]:
# only selected genres
genres = ["alt-rock", "ambient", "blues", "country", "disco", "edm", "electronic", "folk", "funk", "gospel", "grunge", "hip-hop", "indie", "k-pop", "latin", "metal", "pop", "punk", "r-n-b", "reggae", "rock", "singer-songwriter", "soul", "techno"]
data_genres = data_pcs[data_pcs["track_genre"].isin(genres)]

# extract relevant features
features_c = [0,1,2,3,4,5,6,7,8,9,10]
target_c = "track_genre"

ss_c_x = StandardScaler()
ss_c_y = StandardScaler()

X_c = data_genres[features_c]
y_c = data_genres[target_c]

# standardize data
X_c = ss_c_x.fit_transform(X_c)
X_c = pd.DataFrame(X_c, columns=features_c)


# encode labels
le = LabelEncoder()
y_c = le.fit_transform(y_c)

genre_mapping = dict(zip(le.classes_, range(len(le.classes_))))

X_c.sample(5)

#### Create training, validation, and testing sets

In [None]:
# 80-20 train-test split
X_tv_c, X_test_c, y_tv_c, y_test_c = train_test_split(X_c, y_c, test_size=0.2, random_state=8)
X_train_c, X_val_c, y_train_c, y_val_c = train_test_split(X_tv_c, y_tv_c, test_size=0.25, random_state=8)

In [None]:
# convert to PyTorch tensors
X_train_tensor_c = torch.tensor(X_train_c.values, dtype=torch.float32)
y_train_tensor_c = torch.tensor(y_train_c, dtype=torch.long)
X_val_tensor_c = torch.tensor(X_val_c.values, dtype=torch.float32)
y_val_tensor_c = torch.tensor(y_val_c, dtype=torch.long)
X_test_tensor_c = torch.tensor(X_test_c.values, dtype=torch.float32)
y_test_tensor_c = torch.tensor(y_test_c, dtype=torch.long)

# create PyTorch datasets
train_dataset_c = TensorDataset(X_train_tensor_c, y_train_tensor_c)
val_dataset_c = TensorDataset(X_val_tensor_c, y_val_tensor_c)
test_dataset_c = TensorDataset(X_test_tensor_c, y_test_tensor_c)

# create DataLoaders for batch processing
train_loader_c = DataLoader(train_dataset_c, batch_size=32, shuffle=True)
val_loader_c = DataLoader(val_dataset_c, batch_size=32, shuffle=False)
test_loader_c = DataLoader(test_dataset_c, batch_size=32, shuffle=False)

### Model setup

In [None]:
class GenreClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(GenreClassifier, self).__init__()

        # first hidden layer
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)

        # second hidden layer
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.batch_norm2 = nn.BatchNorm1d(128)

        # third hidden layer
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.2)

        # fourth hidden layer
        self.fc4 = nn.Linear(64, 32)
        self.relu4 = nn.ReLU()
        self.dropout4 = nn.BatchNorm1d(32)

        # output layer
        self.output = nn.Linear(32, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.relu2(x)
        x = self.batch_norm2(x)

        x = self.fc3(x)
        x = self.relu3(x)
        x = self.dropout3(x)

        x = self.fc4(x)
        x = self.relu4(x)
        x = self.dropout4(x)

        x = self.output(x)
        return self.softmax(x)

#### Hyperparameters

In [None]:
num_features_c = len(features_c)
num_classes_c = len(genre_mapping)
learning_rate_c = 0.0005
num_epochs_c = 300

#### Architecture

In [None]:
model_c = GenreClassifier(input_size=num_features_c, num_classes=num_classes_c).to(device)
model_c

In [None]:
criterion_c = nn.CrossEntropyLoss()
optimizer_c = optim.AdamW(model_c.parameters(), lr=learning_rate_c, weight_decay=1e-5)
scheduler_c = optim.lr_scheduler.CosineAnnealingLR(optimizer_c, T_max=10)

### Training loop

In [None]:
def train_model_c(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs):
    # set model to training mode
    model.train()
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        # TRAINING STAGE
        train_loss = 0
        for batch in train_loader:
            X_batch, y_batch = batch
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            # forward
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)

            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # VALIDATION STAGE
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch in val_loader:
                X_batch, y_batch = batch
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)

                # Forward pass
                predictions = model(X_batch)
                loss = criterion(predictions, y_batch)
                val_loss += loss.item()

                # Calculate accuracy
                _, predicted = torch.max(predictions, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        val_accuracy = correct / total

        # add losses to array
        train_losses.append(train_loss)
        val_losses.append(val_loss)

        if (epoch+1) % 10 == 0:
            print(
                f"Epoch {epoch+1}/{num_epochs}, "
                f"Train Loss: {train_loss:.4f}, "
                f"Val Loss: {val_loss:.4f}, "
                f"Val Accuracy: {val_accuracy:.4f}"
            )

        # Step the scheduler based on validation loss
        scheduler.step()

        # Switch back to training mode for the next epoch
        model.train()

    return train_losses, val_losses


train_losses, val_losses = train_model_c(model_c, train_loader_c, val_loader_c, criterion_c, optimizer_c, scheduler_c, num_epochs_c)

In [None]:
# plotting training and validation loss
plt.plot(train_losses, c="b", label='Training Loss')
plt.plot(val_losses, c="r", label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

Based purely off of the loss curves, one could assume that the model would perform well. There are no obvious oscillations, training loss is steadily decreasing while validation loss seems to have plateaued off.



### Model Evaluation

In [None]:
# Evaluation function
def evaluate_model_c(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in test_loader:
            X_batch, y_batch = batch
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            predictions = model(X_batch)
            _, predicted = torch.max(predictions, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

# top n evaluation function
def evaluate_model_top_n(model, test_loader, n=3):
    """
    Evaluates the model accuracy based on top-N predictions.

    Args:
        model: The trained model to evaluate.
        test_loader: DataLoader for the test dataset.
        n: Number of top predictions to consider for accuracy.

    Returns:
        Top-N accuracy as a percentage.
    """
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for batch in test_loader:
            X_batch, y_batch = batch
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            # Model predictions
            predictions = model(X_batch)  # Raw logits
            top_probs, top_classes = torch.topk(predictions, n, dim=1)  # Top-N predictions

            # Check if the true label is in the top-N predictions
            for i in range(len(y_batch)):
                if y_batch[i].item() in top_classes[i]:
                    correct += 1
            total += y_batch.size(0)

    # Calculate Top-N accuracy
    top_n_accuracy = correct / total
    print(f"Top-{n} Accuracy: {top_n_accuracy * 100:.2f}%")
# Evaluate the model
evaluate_model_c(model_c, test_loader_c)

In [None]:
evaluate_model_c(model_c, test_loader_c)
evaluate_model_top_n(model_c, test_loader_c, 2)
evaluate_model_top_n(model_c, test_loader_c, 3)
evaluate_model_top_n(model_c, test_loader_c, 4)

In [None]:
# generate confusion matrix
def generate_confusion_matrix(model, test_loader, num_classes, genre_names):
    model.eval()
    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for batch in test_loader:
            X_batch, y_batch = batch
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            predictions = model(X_batch)
            _, predicted = torch.max(predictions, 1)
            all_predictions.extend(predicted.cpu().numpy())
            all_targets.extend(y_batch.cpu().numpy())

    # Convert predictions and targets to numpy arrays
    all_predictions = np.array(all_predictions)
    all_targets = np.array(all_targets)

    # Create the confusion matrix
    cm = confusion_matrix(all_targets, all_predictions)

    # Plot the confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=genre_names, yticklabels=genre_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()

generate_confusion_matrix(model_c, test_loader_c, num_classes_c, genres)

From the diagonal observations in the plot, we observe that the model performed really unevenly at classification for each class. For example, Ambient and Techno have a large number of observation correctly predicted (probably because their music is very distinct compared to the other genres) whereas alt-roc, folk and indie have few or no correctly predicted observation (probably because their music can be reasonably categorized as other genres).