In [1]:
import numpy as np
from scipy.io import loadmat
from scipy.optimize import minimize

In [2]:
def preprocess():
    """
     Input:
     Although this function doesn't have any input, you are required to load
     the MNIST data set from file 'mnist_all.mat'.

     Output:
     train_data: matrix of training set. Each row of train_data contains
       feature vector of a image
     train_label: vector of label corresponding to each image in the training
       set
     validation_data: matrix of training set. Each row of validation_data
       contains feature vector of a image
     validation_label: vector of label corresponding to each image in the
       training set
     test_data: matrix of training set. Each row of test_data contains
       feature vector of a image
     test_label: vector of label corresponding to each image in the testing
       set
    """

    mat = loadmat('mnist_all.mat')  # loads the MAT object as a Dictionary

    n_feature = mat.get("train1").shape[1]
    n_sample = 0
    for i in range(10):
        n_sample = n_sample + mat.get("train" + str(i)).shape[0]
    n_validation = 1000
    n_train = n_sample - 10 * n_validation

    # Construct validation data
    validation_data = np.zeros((10 * n_validation, n_feature))
    for i in range(10):
        validation_data[i * n_validation:(i + 1) * n_validation, :] = mat.get("train" + str(i))[0:n_validation, :]

    # Construct validation label
    validation_label = np.ones((10 * n_validation, 1))
    for i in range(10):
        validation_label[i * n_validation:(i + 1) * n_validation, :] = i * np.ones((n_validation, 1))

    # Construct training data and label
    train_data = np.zeros((n_train, n_feature))
    train_label = np.zeros((n_train, 1))
    temp = 0
    for i in range(10):
        size_i = mat.get("train" + str(i)).shape[0]
        train_data[temp:temp + size_i - n_validation, :] = mat.get("train" + str(i))[n_validation:size_i, :]
        train_label[temp:temp + size_i - n_validation, :] = i * np.ones((size_i - n_validation, 1))
        temp = temp + size_i - n_validation

    # Construct test data and label
    n_test = 0
    for i in range(10):
        n_test = n_test + mat.get("test" + str(i)).shape[0]
    test_data = np.zeros((n_test, n_feature))
    test_label = np.zeros((n_test, 1))
    temp = 0
    for i in range(10):
        size_i = mat.get("test" + str(i)).shape[0]
        test_data[temp:temp + size_i, :] = mat.get("test" + str(i))
        test_label[temp:temp + size_i, :] = i * np.ones((size_i, 1))
        temp = temp + size_i

    # Delete features which don't provide any useful information for classifiers
    sigma = np.std(train_data, axis=0)
    index = np.array([])
    for i in range(n_feature):
        if (sigma[i] > 0.001):
            index = np.append(index, [i])
    train_data = train_data[:, index.astype(int)]
    validation_data = validation_data[:, index.astype(int)]
    test_data = test_data[:, index.astype(int)]

    # Scale data to 0 and 1
    train_data /= 255.0
    validation_data /= 255.0
    test_data /= 255.0

    return train_data, train_label, validation_data, validation_label, test_data, test_label

In [3]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

In [4]:
def blrObjFunction(initialWeights, *args):
    """
    blrObjFunction computes 2-class Logistic Regression error function and
    its gradient.

    Input:
        initialWeights: the weight vector (w_k) of size (D + 1) x 1
        train_data: the data matrix of size N x D
        labeli: the label vector (y_k) of size N x 1 where each entry can be either 0 or 1 representing the label of corresponding feature vector

    Output:
        error: the scalar value of error function of 2-class logistic regression
        error_grad: the vector of size (D+1) x 1 representing the gradient of
                    error function
    """
    train_data, labeli = args

    n_data = train_data.shape[0]
    n_features = train_data.shape[1]
    error = 0
    error_grad = np.zeros((n_features + 1, 1))

    #add the bias term to your input data

    train_data = np.hstack((np.ones((n_data, 1)), train_data))
    theta = sigmoid(np.dot(train_data, initialWeights.reshape((n_features + 1, 1))))

    error_sum = -np.sum(labeli * np.log(theta) + (1 - labeli) * np.log(1 - theta))
    error = error_sum / n_data

    error_grad = np.dot(train_data.T, (theta - labeli)) / n_data
    error_grad = error_grad.flatten()

    return error, error_grad

In [5]:
def blrPredict(W, data):
    """
     blrObjFunction predicts the label of data given the data and parameter W
     of Logistic Regression

     Input:
         W: the matrix of weight of size (D + 1) x 10. Each column is the weight
         vector of a Logistic Regression classifier.
         X: the data matrix of size N x D

     Output:
         label: vector of size N x 1 representing the predicted label of
         corresponding feature vector given in data matrix

    """
    label = np.zeros((data.shape[0], 1))

    #add the bias term to your input data

    n_data = data.shape[0]

    data = np.hstack((np.ones((n_data, 1)), data))
    label = np.argmax(sigmoid(np.dot(data, W)), axis=1).reshape((n_data, 1))

    return label

In [6]:
def mlrObjFunction(params, *args):
    """
    mlrObjFunction computes multi-class Logistic Regression error function and
    its gradient.

    Input:
        initialWeights_b: the weight vector of size (D + 1) x 10
        train_data: the data matrix of size N x D
        labeli: the label vector of size N x 1 where each entry can be either 0 or 1
                representing the label of corresponding feature vector

    Output:
        error: the scalar value of error function of multi-class logistic regression
        error_grad: the vector of size (D+1) x 10 representing the gradient of
                    error function
    """
    train_data, labeli = args
    n_data = train_data.shape[0]
    n_feature = train_data.shape[1]
    error = 0

    n_class = labeli.shape[1]

    train_data = np.hstack((np.ones((n_data, 1)), train_data))

    scores = np.dot(train_data, params.reshape((n_feature + 1, n_class)))
    exp_scores = np.exp(scores - np.max(scores, axis=1, keepdims=True))
    probabilities = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    error_sum = -np.sum(labeli * np.log(probabilities))
    error = error_sum / n_data

    error_grad = np.dot(train_data.T, (probabilities - labeli)) / n_data
    error_grad = error_grad.flatten()

    return error, error_grad

In [7]:
def mlrPredict(W, data):
    """
     mlrObjFunction predicts the label of data given the data and parameter W
     of Logistic Regression

     Input:
         W: the matrix of weight of size (D + 1) x 10. Each column is the weight
         vector of a Logistic Regression classifier.
         X: the data matrix of size N x D

     Output:
         label: vector of size N x 1 representing the predicted label of
         corresponding feature vector given in data matrix

    """
    label = np.zeros((data.shape[0], 1))

    n_data = data.shape[0]
    data = np.hstack((np.ones((n_data, 1)), data))

    scores = np.dot(data, W)
    exp_scores = np.exp(scores - np.max(scores, axis=1, keepdims=True))
    probabilities = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    label = np.argmax(probabilities, axis=1).reshape((n_data, 1))

    return label

In [8]:
"""
Script for Logistic Regression
"""
train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()

# number of classes
n_class = 10

# number of training samples
n_train = train_data.shape[0]

# number of features
n_feature = train_data.shape[1]

Y = np.zeros((n_train, n_class))
for i in range(n_class):
    Y[:, i] = (train_label == i).astype(int).ravel()

# Logistic Regression with Gradient Descent
W = np.zeros((n_feature + 1, n_class))

initialWeights = np.zeros((n_feature + 1,))  # Make it a 1D array

opts = {'maxiter': 100}
for i in range(n_class):
    labeli = Y[:, i].reshape(n_train, 1)
    args = (train_data, labeli)
    nn_params = minimize(blrObjFunction, initialWeights, jac=True, args=args, method='CG', options=opts)
    W[:, i] = nn_params.x.reshape((n_feature + 1,))



# Find the accuracy on Training Dataset
predicted_label = blrPredict(W, train_data)
print('\n Training set Accuracy:' + str(100 * np.mean((predicted_label == train_label).astype(float))) + '%')

# Find the accuracy on Validation Dataset
predicted_label = blrPredict(W, validation_data)
print('\n Validation set Accuracy:' + str(100 * np.mean((predicted_label == validation_label).astype(float))) + '%')

# Find the accuracy on Testing Dataset
predicted_label = blrPredict(W, test_data)
print('\n Testing set Accuracy:' + str(100 * np.mean((predicted_label == test_label).astype(float))) + '%')


 Training set Accuracy:92.66799999999999%

 Validation set Accuracy:91.45%

 Testing set Accuracy:91.99000000000001%


In [None]:
"""
Script for Support Vector Machine
"""

print('\n\n--------------SVM-------------------\n\n')
##################
# YOUR CODE HERE #
##################

from sklearn.svm import SVC
import matplotlib.pyplot as plt


svc_linear = SVC(kernel='linear')
svc_linear.fit(train_data, train_label.ravel())

svc_rbf_default = SVC(kernel='rbf')
svc_rbf_default.fit(train_data, train_label.ravel())

svc_rbf_gamma1 = SVC(kernel='rbf', gamma=1)
svc_rbf_gamma1.fit(train_data, train_label.ravel())

print('\n Linear Kernel Training Accuracy:', svc_linear.score(train_data, train_label) * 100)
print('\n Linear Kernel Testing Accuracy:', svc_linear.score(test_data, test_label) * 100)
print('\n Linear Kernel Validation Accuracy:', svc_linear.score(validation_data, validation_label) * 100)

print('\n RBF (gamma=1) Training Accuracy:', svc_rbf_gamma1.score(train_data, train_label) * 100)
print('\n RBF (gamma=1) Testing Accuracy:', svc_rbf_gamma1.score(test_data, test_label) * 100)
print('\n RBF (gamma=1) Validation Accuracy:', svc_rbf_gamma1.score(validation_data, validation_label) * 100)

print('\n RBF (default gamma) Training Accuracy:', svc_rbf_default.score(train_data, train_label) * 100)
print('\n RBF (default gamma) Testing Accuracy:', svc_rbf_default.score(test_data, test_label) * 100)
print('\n RBF (default gamma) Validation Accuracy:', svc_rbf_default.score(validation_data, validation_label) * 100)


C_values = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
train_accuracy = []
val_accuracy = []
test_accuracy = []

for C in C_values:
    svc_rbf = SVC(kernel='rbf', C=C)
    svc_rbf.fit(train_data, train_label.ravel())
    train_accuracy.append(svc_rbf.score(train_data, train_label) * 100)
    val_accuracy.append(svc_rbf.score(validation_data, validation_label) * 100)
    test_accuracy.append(svc_rbf.score(test_data, test_label) * 100)

plt.figure(figsize=(10, 6))
plt.plot(C_values, train_accuracy, label='Training Accuracy', marker='o')
plt.plot(C_values, val_accuracy, label='Validation Accuracy', marker='x')
plt.plot(C_values, test_accuracy, label='Testing Accuracy', marker='s')
plt.xlabel('C value')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs C for RBF Kernel')
plt.legend()
plt.grid()
plt.show()



--------------SVM-------------------


