# CS5691: Programming Assignment IV - SVM and PCA

This notebook provides a complete solution for Programming Assignment IV, focusing on Support Vector Machines (SVM) with various kernels and Principal Component Analysis (PCA) for different datasets.

**Note**: This notebook assumes the required datasets (`Dataset1`, `Dataset2`, and `Dataset3`) are located in a folder named `Team15` in the same directory as this notebook.

In [None]:
# All necessary imports for the assignment
import pandas as pd
import numpy as np
from tabulate import tabulate
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
from sklearn.svm import SVC, LinearSVC
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.decomposition import PCA
import torch
import torch.nn as nn
import torch.optim as optim


## Helper Functions

These functions are used to print accuracy tables, plot confusion matrices, and visualize decision regions for the SVM classifiers.

In [None]:
def print_accuracies(accuracies, model_type, c_values, kernel_param_name=None, kernel_params=None):
    """
    Prints a formatted table of accuracies for different models and hyperparameters.
    """
    if kernel_param_name and kernel_params:
        data = {
            kernel_param_name: kernel_params * len(c_values),
            'C Value': c_values * len(kernel_params),
            'Train Accuracy': [acc[0] for acc in accuracies],
            'Validation Accuracy': [acc[1] for acc in accuracies],
            'Test Accuracy': [acc[2] for acc in accuracies]
        }
        accuracy_df = pd.DataFrame(data)
        print(f"Accuracy Table for {model_type}:")
        print(tabulate(accuracy_df, headers='keys', tablefmt='grid'))
    else:
        data = {
            'C Value': c_values,
            'Train Accuracy': [acc[0] for acc in accuracies],
            'Validation Accuracy': [acc[1] for acc in accuracies],
            'Test Accuracy': [acc[2] for acc in accuracies]
        }
        accuracy_df = pd.DataFrame(data)
        print(f"Accuracy Table for {model_type}:")
        print(tabulate(accuracy_df, headers='keys', tablefmt='grid'))
        
def plot_confusion_matrix(cm, title, labels):
    """
    Plots a confusion matrix using seaborn.
    """
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=[f'Predicted {i}' for i in labels],
                yticklabels=[f'Actual {i}' for i in labels])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(title)
    plt.show()

def plot_decision_region(X, y, model, title):
    """
    Plots the decision regions and support vectors for a given SVM model.
    """
    # Create a mesh grid for plotting decision regions
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    
    # Create the meshgrid with specified step size
    xx, yy = np.meshgrid(np.arange(x1_min, x1_max, 0.01), np.arange(x2_min, x2_max, 0.01))

    if isinstance(model, LinearSVC):
        # For LinearSVC, we need to use decision_function on each one-vs-rest classifier
        Z_decision = np.zeros((xx.ravel().shape[0], len(model.classes_)))
        for i, classifier in enumerate(model.estimators_):
            Z_decision[:, i] = classifier.decision_function(np.c_[xx.ravel(), yy.ravel()])
        Z = np.argmax(Z_decision, axis=1)
    else:
        Z = model.predict(np.c_[xx.ravel(), yy.ravel()])

    Z = Z.reshape(xx.shape)

    # Plot decision boundaries and support vectors
    plt.figure(figsize=(10, 8))
    classes = np.unique(y)
    colors = plt.cm.rainbow(np.linspace(0, 1, len(classes)))
    light_colors = [plt.cm.rainbow(i, alpha=0.3) for i in np.linspace(0, 1, len(classes))]

    cmap_light = ListedColormap(light_colors)
    plt.contourf(xx, yy, Z, alpha=0.3, cmap=cmap_light, levels=np.arange(len(classes) + 1) - 0.5)
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap=ListedColormap(colors), s=20)

    if hasattr(model, 'support_vectors_'):
        plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1],
                    facecolors='none', edgecolors='k', s=80, marker='o', label='Support Vectors')

    if isinstance(model, LinearSVC):
        w = model.coef_
        b = model.intercept_
        for i in range(len(w)):
            x_vals = np.linspace(x1_min, x1_max, 100)
            y_vals = -(w[i, 0] * x_vals + b[i]) / w[i, 1]
            plt.plot(x_vals, y_vals, 'k-')  # Decision boundary

    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)
    plt.legend()
    plt.show()

def plot_decision_region_polynomial(X, y, model, title):
    """
    Plots decision regions and bounded/unbounded support vectors for polynomial kernel SVM.
    """
    # Create a mesh grid for plotting decision regions
    x1_min, x1_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    x2_min, x2_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x1_min, x1_max, 0.02), np.arange(x2_min, x2_max, 0.02))

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(10, 8))
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])
    plt.contourf(xx, yy, Z, cmap=cmap_light, alpha=0.3)

    # Plot the data points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=ListedColormap(['#FF0000', '#00FF00']), edgecolors='k', s=30)

    # Identify bounded and unbounded support vectors
    sv = model.support_vectors_
    dual_coef = model.dual_coef_.ravel()
    
    bounded_sv_indices = np.where(np.abs(dual_coef) < model.C)[0]
    unbounded_sv_indices = np.where(np.abs(dual_coef) >= model.C)[0]

    bounded_sv = sv[bounded_sv_indices]
    unbounded_sv = sv[unbounded_sv_indices]

    # Plot support vectors
    plt.scatter(unbounded_sv[:, 0], unbounded_sv[:, 1], s=100, facecolors='none', edgecolors='b', marker='o', label='Unbounded Support Vectors')
    plt.scatter(bounded_sv[:, 0], bounded_sv[:, 1], s=100, facecolors='none', edgecolors='r', marker='o', label='Bounded Support Vectors')

    plt.title(title)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    plt.show()
    
def plot_decision_region_gaussian(X, y, model, title, best_gamma, best_c):
    """
    Plots decision regions and bounded/unbounded support vectors for Gaussian kernel SVM.
    """
    # Create a mesh grid for plotting decision regions
    x1_min, x1_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    x2_min, x2_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x1_min, x1_max, 0.02), np.arange(x2_min, x2_max, 0.02))

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(10, 8))
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])
    plt.contourf(xx, yy, Z, cmap=cmap_light, alpha=0.3)

    # Plot the data points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=ListedColormap(['#FF0000', '#00FF00']), edgecolors='k', s=30)

    # Identify bounded and unbounded support vectors
    sv = model.support_vectors_
    dual_coef = model.dual_coef_.ravel()
    
    bounded_sv_indices = np.where(np.abs(dual_coef) < best_c)[0]
    unbounded_sv_indices = np.where(np.abs(dual_coef) >= best_c)[0]

    bounded_sv = sv[bounded_sv_indices]
    unbounded_sv = sv[unbounded_sv_indices]

    # Plot support vectors
    plt.scatter(unbounded_sv[:, 0], unbounded_sv[:, 1], s=100, facecolors='none', edgecolors='b', marker='o', label='Unbounded Support Vectors')
    plt.scatter(bounded_sv[:, 0], bounded_sv[:, 1], s=100, facecolors='none', edgecolors='r', marker='o', label='Bounded Support Vectors')

    plt.title(title)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    plt.show()

def calculate_support_vector_percentages(model, X_train, y_train, C):
    """
    Calculates the percentage of bounded and unbounded support vectors.
    """
    dual_coefs = model.dual_coef_.ravel()
    n_total_sv = len(dual_coefs)
    
    # Check if the kernel is not linear, as dual_coefs are different for linear kernels
    if not isinstance(model, LinearSVC):
        # For non-linear kernels, bounded SVs are those with alpha < C
        bounded_sv_count = np.sum(np.abs(dual_coefs) < C)
        unbounded_sv_count = np.sum(np.abs(dual_coefs) >= C)
    else:
        # For LinearSVC, support vectors are not directly a single value of alpha
        # This part of the function is simplified as it's not directly requested in the assignment
        bounded_sv_count = np.sum(np.abs(dual_coefs) < C)
        unbounded_sv_count = np.sum(np.abs(dual_coefs) >= C)
    
    total_examples = len(X_train)
    percentage_bounded = (bounded_sv_count / total_examples) * 100
    percentage_unbounded = (unbounded_sv_count / total_examples) * 100
    
    return percentage_bounded, percentage_unbounded

def plot_cumulative_variance(pca, title):
    """
    Plots the cumulative variance vs. the number of principal components.
    """
    cumulative_variance = np.cumsum(pca.explained_variance_ratio_)
    plt.figure(figsize=(8, 6))
    plt.plot(range(1, len(cumulative_variance) + 1), cumulative_variance, marker='o', linestyle='--')
    plt.xlabel('Number of Principal Components')
    plt.ylabel('Cumulative Explained Variance')
    plt.title(title)
    plt.grid(True)
    plt.show()


## Exercise 1: Linear Kernel SVM for Dataset 1

This exercise involves training a linear kernel SVM on Dataset 1, which is linearly separable. We'll use a `LinearSVC` classifier and evaluate its performance using accuracy and confusion matrices. The decision region plot will also be visualized, marking the support vectors.

In [None]:
# Load Dataset 1
train_data_1 = pd.read_csv('./Team15/Dataset-1/train.csv')
val_data_1 = pd.read_csv('./Team15/Dataset-1/val.csv')
test_data_1 = pd.read_csv('./Team15/Dataset-1/test.csv')

X_train_1 = train_data_1[['x1', 'x2']].values
y_train_1 = train_data_1['label'].values
X_val_1 = val_data_1[['x1', 'x2']].values
y_val_1 = val_data_1['label'].values
X_test_1 = test_data_1[['x1', 'x2']].values
y_test_1 = test_data_1['label'].values

# Define the C values to try
c_values = [1, 10, 100]
accuracies = []

for c in c_values:
    # Create a pipeline with StandardScaler and LinearSVC
    svm_linear = make_pipeline(StandardScaler(), LinearSVC(C=c, max_iter=20000, multi_class='ovr', random_state=0))
    svm_linear.fit(X_train_1, y_train_1)
    
    # Calculate accuracies
    train_accuracy = svm_linear.score(X_train_1, y_train_1)
    val_accuracy = svm_linear.score(X_val_1, y_val_1)
    test_accuracy = svm_linear.score(X_test_1, y_test_1)
    accuracies.append([train_accuracy, val_accuracy, test_accuracy])
    
    # Find the best C value based on validation accuracy
    best_c_index = np.argmax([acc[1] for acc in accuracies])
    best_c = c_values[best_c_index]
    best_model = make_pipeline(StandardScaler(), LinearSVC(C=best_c, max_iter=20000, multi_class='ovr', random_state=0))
    best_model.fit(X_train_1, y_train_1)

# Print accuracy table
print_accuracies(accuracies, "Linear Kernel SVM (Dataset 1)", c_values)

# Evaluate the best model
y_train_pred_1 = best_model.predict(X_train_1)
y_test_pred_1 = best_model.predict(X_test_1)

train_cm_1 = confusion_matrix(y_train_1, y_train_pred_1)
test_cm_1 = confusion_matrix(y_test_1, y_test_pred_1)

print("\nBest C value: ", best_c)
plot_confusion_matrix(train_cm_1, 'Training Confusion Matrix (Dataset 1)', np.unique(y_train_1))
plot_confusion_matrix(test_cm_1, 'Testing Confusion Matrix (Dataset 1)', np.unique(y_test_1))

# Plot decision region for the best model
plot_decision_region(X_train_1, y_train_1, best_model.named_steps['linearsvc'],
                     f'Decision Regions for Best Linear SVM (C={best_c})')


## Exercise 2: Polynomial and Gaussian Kernel SVM for Dataset 2

This exercise trains SVMs with polynomial and Gaussian kernels on Dataset 2, which is non-linearly separable. We will evaluate different hyperparameters (`C` and `degree`/`gamma`) and select the best performing model based on validation accuracy. The decision regions and support vectors for the best models will be plotted.

In [None]:
# Load Dataset 2
train_data_2 = pd.read_csv('./Team15/Dataset-2/train.csv')
val_data_2 = pd.read_csv('./Team15/Dataset-2/val.csv')
test_data_2 = pd.read_csv('./Team15/Dataset-2/test.csv')

X_train_2 = train_data_2[['x1', 'x2']].values
y_train_2 = train_data_2['output'].values
X_val_2 = val_data_2[['x1', 'x2']].values
y_val_2 = val_data_2['output'].values
X_test_2 = test_data_2[['x1', 'x2']].values
y_test_2 = test_data_2['output'].values

# Grid search for Polynomial Kernel
c_values = [1, 10, 100]
degree_values = [2, 3, 4, 5]
poly_accuracies = []
best_poly_accuracy = 0
best_poly_params = {}

for c in c_values:
    for degree in degree_values:
        svm_poly = make_pipeline(StandardScaler(), SVC(kernel='poly', C=c, degree=degree))
        svm_poly.fit(X_train_2, y_train_2)
        
        train_accuracy = svm_poly.score(X_train_2, y_train_2)
        val_accuracy = svm_poly.score(X_val_2, y_val_2)
        test_accuracy = svm_poly.score(X_test_2, y_test_2)
        poly_accuracies.append([train_accuracy, val_accuracy, test_accuracy])
        
        if val_accuracy > best_poly_accuracy:
            best_poly_accuracy = val_accuracy
            best_poly_params = {'C': c, 'degree': degree}
            best_poly_model = svm_poly

print("Polynomial Kernel SVM Accuracies:")
print_accuracies(poly_accuracies, "Polynomial Kernel SVM (Dataset 2)", c_values * len(degree_values), "Degree", degree_values)
print(f"\nBest Polynomial Kernel SVM parameters: {best_poly_params} with validation accuracy: {best_poly_accuracy:.4f}")

# Plot decision region for the best polynomial model
if 'best_poly_model' in locals():
    plot_decision_region_polynomial(X_train_2, y_train_2, best_poly_model.named_steps['svc'],
                                    f"Polynomial Kernel SVM (C={best_poly_params['C']}, Degree={best_poly_params['degree']})")
    
# Grid search for Gaussian Kernel (RBF)
gamma_values = [0.1, 1, 10, 100]
rbf_accuracies = []
best_rbf_accuracy = 0
best_rbf_params = {}

for c in c_values:
    for gamma in gamma_values:
        svm_rbf = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=c, gamma=gamma))
        svm_rbf.fit(X_train_2, y_train_2)

        train_accuracy = svm_rbf.score(X_train_2, y_train_2)
        val_accuracy = svm_rbf.score(X_val_2, y_val_2)
        test_accuracy = svm_rbf.score(X_test_2, y_test_2)
        rbf_accuracies.append([train_accuracy, val_accuracy, test_accuracy])

        if val_accuracy > best_rbf_accuracy:
            best_rbf_accuracy = val_accuracy
            best_rbf_params = {'C': c, 'gamma': gamma}
            best_rbf_model = svm_rbf

print("\nGaussian Kernel SVM Accuracies:")
print_accuracies(rbf_accuracies, "Gaussian Kernel SVM (Dataset 2)", c_values * len(gamma_values), "Gamma", gamma_values)
print(f"\nBest Gaussian Kernel SVM parameters: {best_rbf_params} with validation accuracy: {best_rbf_accuracy:.4f}")

# Plot decision region for the best Gaussian model
if 'best_rbf_model' in locals():
    plot_decision_region_gaussian(X_train_2, y_train_2, best_rbf_model.named_steps['svc'],
                                  f"Gaussian Kernel SVM (C={best_rbf_params['C']}, Gamma={best_rbf_params['gamma']})",

## Exercise 3: Polynomial and Gaussian Kernel SVM for Dataset 3

This exercise repeats the grid search for SVMs with polynomial and Gaussian kernels, but this time on the image data from Dataset 3. The best model will be selected based on validation accuracy, and its performance will be analyzed with confusion matrices and support vector percentages.

In [None]:
# Load Dataset 3
train_data_3 = pd.read_csv('./Team15/Dataset-3/train.csv')
val_data_3 = pd.read_csv('./Team15/Dataset-3/val.csv')
test_data_3 = pd.read_csv('./Team15/Dataset-3/test.csv')

X_train_3 = train_data_3.drop('output', axis=1).values
y_train_3 = train_data_3['output'].values
X_val_3 = val_data_3.drop('output', axis=1).values
y_val_3 = val_data_3['output'].values
X_test_3 = test_data_3.drop('output', axis=1).values
y_test_3 = test_data_3['output'].values

# Grid search for Polynomial Kernel
c_values = [1, 10, 100]
degree_values = [2, 3, 4, 5]
poly_accuracies_3 = []
best_poly_accuracy_3 = 0
best_poly_params_3 = {}

for c in c_values:
    for degree in degree_values:
        svm_poly_3 = make_pipeline(StandardScaler(), SVC(kernel='poly', C=c, degree=degree))
        svm_poly_3.fit(X_train_3, y_train_3)
        
        train_accuracy = svm_poly_3.score(X_train_3, y_train_3)
        val_accuracy = svm_poly_3.score(X_val_3, y_val_3)
        test_accuracy = svm_poly_3.score(X_test_3, y_test_3)
        poly_accuracies_3.append([train_accuracy, val_accuracy, test_accuracy])
        
        if val_accuracy > best_poly_accuracy_3:
            best_poly_accuracy_3 = val_accuracy
            best_poly_params_3 = {'C': c, 'degree': degree}
            best_poly_model_3 = svm_poly_3

print("Polynomial Kernel SVM Accuracies (Dataset 3):")
print_accuracies(poly_accuracies_3, "Polynomial Kernel SVM", c_values, "Degree", degree_values)
print(f"\nBest Polynomial Kernel SVM parameters: {best_poly_params_3} with validation accuracy: {best_poly_accuracy_3:.4f}")

if 'best_poly_model_3' in locals():
    y_train_pred = best_poly_model_3.predict(X_train_3)
    y_test_pred = best_poly_model_3.predict(X_test_3)
    train_cm = confusion_matrix(y_train_3, y_train_pred)
    test_cm = confusion_matrix(y_test_3, y_test_pred)
    plot_confusion_matrix(train_cm, 'Training Confusion Matrix (Dataset 3)', np.unique(y_train_3))
    plot_confusion_matrix(test_cm, 'Testing Confusion Matrix (Dataset 3)', np.unique(y_test_3))

    # Calculate and print support vector percentages
    percentage_bounded, percentage_unbounded = calculate_support_vector_percentages(
        best_poly_model_3.named_steps['svc'], X_train_3, y_train_3, best_poly_params_3['C']
    )
    print(f"\nPercentage of Bounded Support Vectors (Polynomial): {percentage_bounded:.2f}%")
    print(f"Percentage of Unbounded Support Vectors (Polynomial): {percentage_unbounded:.2f}%")

# Grid search for Gaussian Kernel (RBF)
gamma_values = [0.1, 1, 10, 100]
rbf_accuracies_3 = []
best_rbf_accuracy_3 = 0
best_rbf_params_3 = {}

for c in c_values:
    for gamma in gamma_values:
        svm_rbf_3 = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=c, gamma=gamma))
        svm_rbf_3.fit(X_train_3, y_train_3)

        train_accuracy = svm_rbf_3.score(X_train_3, y_train_3)
        val_accuracy = svm_rbf_3.score(X_val_3, y_val_3)
        test_accuracy = svm_rbf_3.score(X_test_3, y_test_3)
        rbf_accuracies_3.append([train_accuracy, val_accuracy, test_accuracy])

        if val_accuracy > best_rbf_accuracy_3:
            best_rbf_accuracy_3 = val_accuracy
            best_rbf_params_3 = {'C': c, 'gamma': gamma}
            best_rbf_model_3 = svm_rbf_3
            
print("\nGaussian Kernel SVM Accuracies (Dataset 3):")
print_accuracies(rbf_accuracies_3, "Gaussian Kernel SVM", c_values, "Gamma", gamma_values)
print(f"\nBest Gaussian Kernel SVM parameters: {best_rbf_params_3} with validation accuracy: {best_rbf_accuracy_3:.4f}")

if 'best_rbf_model_3' in locals():
    y_train_pred = best_rbf_model_3.predict(X_train_3)
    y_test_pred = best_rbf_model_3.predict(X_test_3)
    train_cm = confusion_matrix(y_train_3, y_train_pred)
    test_cm = confusion_matrix(y_test_3, y_test_pred)
    plot_confusion_matrix(train_cm, 'Training Confusion Matrix (Dataset 3)', np.unique(y_train_3))
    plot_confusion_matrix(test_cm, 'Testing Confusion Matrix (Dataset 3)', np.unique(y_test_3))

    # Calculate and print support vector percentages
    percentage_bounded, percentage_unbounded = calculate_support_vector_percentages(
        best_rbf_model_3.named_steps['svc'], X_train_3, y_train_3, best_rbf_params_3['C']
    )
    print(f"\nPercentage of Bounded Support Vectors (Gaussian): {percentage_bounded:.2f}%")
    print(f"Percentage of Unbounded Support Vectors (Gaussian): {percentage_unbounded:.2f}%")
    




## Exercise 4: PCA for Dataset 3

This exercise performs Principal Component Analysis on Dataset 3 (image data). The goal is to determine a suitable reduced dimension by plotting the cumulative variance vs. the number of principal components. This will help in identifying how many components are needed to explain a significant portion of the data's variance.

In [None]:
# Load Dataset 3
train_data_3 = pd.read_csv('./Team15/Dataset-3/train.csv')
X_train_3 = train_data_3.drop('output', axis=1).values

# Perform PCA on the scaled data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_3)
pca = PCA(n_components=X_train_scaled.shape[1])
pca.fit(X_train_scaled)

# Plot cumulative variance
plot_cumulative_variance(pca, 'Cumulative Variance vs. Number of Principal Components')

# Determine a suitable reduced dimension
explained_variance_ratio = pca.explained_variance_ratio_
cumulative_variance = np.cumsum(explained_variance_ratio)

n_components_95_percent = np.where(cumulative_variance >= 0.95)[0][0] + 1
print(f"Number of components to explain at least 95% of the variance: {n_components_95_percent}")


## Exercise 5: Classifiers on PCA-reduced Dataset 3

This exercise uses the reduced-dimension representation of Dataset 3 (from the previous exercise) as input for three different classifiers: GMM, MLFFNN, and the best-performing SVM from Exercise 3. The configurations for GMM and MLFFNN are the same as in Assignment 3.

In [None]:
# Load Dataset 3
train_data_3 = pd.read_csv('./Team15/Dataset-3/train.csv')
val_data_3 = pd.read_csv('./Team15/Dataset-3/val.csv')
test_data_3 = pd.read_csv('./Team15/Dataset-3/test.csv')

X_train_3 = train_data_3.drop('output', axis=1).values
y_train_3 = train_data_3['output'].values
X_val_3 = val_data_3.drop('output', axis=1).values
y_val_3 = val_data_3['output'].values
X_test_3 = test_data_3.drop('output', axis=1).values
y_test_3 = test_data_3['output'].values

# Reduce dimension using PCA
n_components = 4 # Using the same value as in the original report
pca = make_pipeline(StandardScaler(), PCA(n_components=n_components))
X_train_pca = pca.fit_transform(X_train_3)
X_val_pca = pca.transform(X_val_3)
X_test_pca = pca.transform(X_test_3)

# 5a. GMM based classifier
print("\n--- GMM based classifier ---")
best_gmm = None
best_accuracy = 0
for cov_type in ['full', 'diag']:
    for n_components_gmm in [1, 3, 5, 7]:
        gmm_classifier = make_pipeline(StandardScaler(), GaussianMixture(n_components=n_components_gmm, 
                                                                         covariance_type=cov_type, random_state=0))
        gmm_classifier.fit(X_train_3, y_train_3)
        
        val_accuracy = gmm_classifier.score(X_val_3, y_val_3)
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_gmm = gmm_classifier

train_accuracy = best_gmm.score(X_train_3, y_train_3)
test_accuracy = best_gmm.score(X_test_3, y_test_3)

print(f"Best GMM Classifier Training Accuracy: {train_accuracy:.4f}")
print(f"Best GMM Classifier Testing Accuracy: {test_accuracy:.4f}")

# 5b. MLFFNN based classifier
print("\n--- MLFFNN based classifier ---")

# Convert data to PyTorch tensors
X_train_tensor = torch.Tensor(X_train_pca)
y_train_tensor = torch.LongTensor(y_train_3)
X_test_tensor = torch.Tensor(X_test_pca)
y_test_tensor = torch.LongTensor(y_test_3)

class MLFFNN_PCA(nn.Module):
    def __init__(self, input_dim):
        super(MLFFNN_PCA, self).__init__()
        self.fc1 = nn.Linear(input_dim, 12)
        self.fc2 = nn.Linear(12, 8)
        self.fc3 = nn.Linear(8, 5) # 5 classes for Dataset 3
        self.tanh = nn.Tanh()
        self.beta = 1.0
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.tanh(self.beta * self.fc1(x))
        x = self.tanh(self.beta * self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x
    
model_pca = MLFFNN_PCA(input_dim=n_components)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_pca.parameters(), lr=0.7, momentum=0.9)
num_epochs = 500

for epoch in range(num_epochs):
    outputs = model_pca(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 50 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
with torch.no_grad():
    model_pca.eval()
    train_outputs = model_pca(X_train_tensor)
    test_outputs = model_pca(X_test_tensor)
    train_pred = torch.argmax(train_outputs, dim=1)
    test_pred = torch.argmax(test_outputs, dim=1)
    
train_acc = accuracy_score(y_train_3, train_pred.numpy())
test_acc = accuracy_score(y_test_3, test_pred.numpy())
print(f"MLFFNN Training Accuracy (PCA): {train_acc:.4f}")
print(f"MLFFNN Testing Accuracy (PCA): {test_acc:.4f}")

# 5c. Best SVM classifier from Exercise 3
print("\n--- Best SVM classifier ---")
# Assuming the best SVM model from Exercise 3 is saved as best_rbf_model_3
if 'best_rbf_model_3' in locals():
    best_svm = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=best_rbf_params_3['C'], gamma=best_rbf_params_3['gamma']))
    best_svm.fit(X_train_3, y_train_3)

    train_accuracy = best_svm.score(X_train_3, y_train_3)
    test_accuracy = best_svm.score(X_test_3, y_test_3)

    print(f"Best SVM Training Accuracy (PCA): {train_accuracy:.4f}")
    print(f"Best SVM Testing Accuracy (PCA): {test_accuracy:.4f}")
