**Convolutional Neural Network**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import os

# Load preprocessed PCA dataset
file_path = "C:/Users/retae/GitHub/Machine_Learning_Final_Project/data/processed/final.csv"
df = pd.read_csv(file_path)

# Separate features and labels
X = df.drop(columns=['track_id', 'primary_genre'])
y = df['primary_genre']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_onehot = to_categorical(y_encoded)
num_classes = y_onehot.shape[1]

# Reshape PCA features into 2D shape for CNN (e.g., 5x5 matrix if 25 features)
num_features = X.shape[1]
side_len = int(np.ceil(np.sqrt(num_features)))  # smallest square shape
X_padded = np.zeros((len(X), side_len * side_len))
X_padded[:, :num_features] = X  # pad remaining values with 0

# Reshape to (samples, height, width, channels)
X_cnn = X_padded.reshape(-1, side_len, side_len, 1)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_cnn, y_onehot, test_size=0.3, stratify=y_encoded, random_state=42
)

# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(side_len, side_len, 1)),
    MaxPooling2D((2, 2)),
    Dropout(0.3),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(num_classes, activation='softmax')
])

# Compile
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train
history = model.fit(X_train, y_train, epochs=25, batch_size=32, validation_split=0.2)

# Evaluate
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# Decode labels
y_pred_labels = label_encoder.inverse_transform(y_pred)
y_test_labels = label_encoder.inverse_transform(y_true)

# Report
print("=== CNN Classification Report ===")
print(classification_report(y_test_labels, y_pred_labels))

# Confusion matrix
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels, labels=label_encoder.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=label_encoder.classes_)

plt.figure(figsize=(10, 8))
disp.plot(xticks_rotation=45, cmap="Purples", values_format='d')
plt.title("Confusion Matrix - CNN Genre Classifier")
plt.tight_layout()
plt.show()

# Accuracy Plot
plt.figure(figsize=(8, 4))
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("CNN Training & Validation Accuracy")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()