In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import torchvision
import torchvision.transforms as transforms
import torch
import logging

logging.basicConfig(level=logging.INFO)

# Function to load preprocessed MNIST data
def load_preprocessed_mnist_data():
    # Assuming the data is saved as PyTorch tensors
    train_data, train_targets = torch.load('D:\ASU\Fall 2024\EEE549\Final term project\sml-final\data\preprocessed\MNIST\preprocessed_train_mnist.pt')
    test_data, test_targets = torch.load('D:\ASU\Fall 2024\EEE549\Final term project\sml-final\data\preprocessed\MNIST\preprocessed_test_mnist.pt')

    # Convert tensors to numpy arrays for sklearn compatibility, and reshape images to 1D
    X_train = train_data.numpy().reshape(train_data.shape[0], -1)
    X_test = test_data.numpy().reshape(test_data.shape[0], -1)
    y_train = train_targets.numpy()
    y_test = test_targets.numpy()

    return X_train, X_test, y_train, y_test

# Load the data
X_train, X_test, y_train, y_test = load_preprocessed_mnist_data()



In [2]:
# Splitting data for validation (optional)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(9585, 784)
(1984, 784)
(9585,)
(1984,)


## Logistic reg

In [17]:
def train_logistic_regression(X_train, y_train, X_val, y_val):
    logging.info("Training Logistic Regression: penalty=none, solver=saga")
    lr_model = LogisticRegression(penalty=None, solver='saga', max_iter=5000)
    lr_model.fit(X_train, y_train)
    y_pred_val = lr_model.predict(X_val)
    logging.info("Completed Training Logistic Regression")
    print("Validation Accuracy (Logistic Regression without reg):", accuracy_score(y_val, y_pred_val))
    return lr_model

def train_logistic_regression_l2(X_train, y_train, X_val, y_val):
    logging.info("Training Logistic Regression with L2 regularization: penalty=l2, solver=saga")
    lr_model_reg = LogisticRegression(penalty='l2', solver='saga', max_iter=5000, C=1.0)
    lr_model_reg.fit(X_train, y_train)
    y_pred_val_reg = lr_model_reg.predict(X_val)
    logging.info("Completed Training Logistic Regression with L2")
    print("Validation Accuracy (Logistic Regression with L2):", accuracy_score(y_val, y_pred_val_reg))
    return lr_model_reg


## SVM

In [18]:
def train_svm_rbf(X_train, y_train, X_val, y_val):
    logging.info("Training SVM RBF")
    svm_rbf = SVC(kernel='rbf', gamma='scale')
    svm_rbf.fit(X_train, y_train)
    y_pred_val_rbf = svm_rbf.predict(X_val)
    logging.info("Completed SVM RBF")
    print("Validation Accuracy (SVM RBF Kernel):", accuracy_score(y_val, y_pred_val_rbf))
    return svm_rbf

def train_svm_poly(X_train, y_train, X_val, y_val):
    logging.info("Training SVM Poly")
    svm_poly = SVC(kernel='poly', degree=3, gamma='scale')
    svm_poly.fit(X_train, y_train)
    y_pred_val_poly = svm_poly.predict(X_val)
    logging.info("Completed SVM Poly")
    print("Validation Accuracy (SVM Polynomial Kernel):", accuracy_score(y_val, y_pred_val_poly))
    return svm_poly

## Training models

In [19]:
lr_model = train_logistic_regression(X_train, y_train, X_val, y_val)
lr_model_reg = train_logistic_regression_l2(X_train, y_train, X_val, y_val)
svm_rbf = train_svm_rbf(X_train, y_train, X_val, y_val)
svm_poly = train_svm_poly(X_train, y_train, X_val, y_val)

INFO:root:Training Logistic Regression: penalty=none, solver=saga
INFO:root:Completed Training Logistic Regression
INFO:root:Training Logistic Regression with L2 regularization: penalty=l2, solver=saga


Validation Accuracy (Logistic Regression without reg): 0.9616186900292032


INFO:root:Completed Training Logistic Regression with L2
INFO:root:Training SVM RBF


Validation Accuracy (Logistic Regression with L2): 0.9616186900292032


INFO:root:Completed SVM RBF
INFO:root:Training SVM Poly


Validation Accuracy (SVM RBF Kernel): 0.9899874843554443


INFO:root:Completed SVM Poly


Validation Accuracy (SVM Polynomial Kernel): 0.9920734251147267


In [20]:
# Assuming you have already loaded the test set X_test, y_test
y_pred_test_poly = svm_poly.predict(X_test)
test_accuracy_poly = accuracy_score(y_test, y_pred_test_poly)
print("Test Accuracy (SVM Polynomial Kernel):", test_accuracy_poly)
print(classification_report(y_test, y_pred_test_poly))

Test Accuracy (SVM Polynomial Kernel): 0.9924395161290323
              precision    recall  f1-score   support

           3       0.99      0.99      0.99      1010
           8       0.99      0.99      0.99       974

    accuracy                           0.99      1984
   macro avg       0.99      0.99      0.99      1984
weighted avg       0.99      0.99      0.99      1984



## Log plots