In [14]:
import numpy as np
import pandas as pd
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve

In [2]:
def multiclass_accuracy(Y_orig, Y_pred):
    no_classes = len(np.unique(Y_orig))
    accuracy_dict = {}
    accuracy_list = []
    for label in range(no_classes):
        numerator = 0
        
        for index in range(len(Y_orig)):
            if(Y_orig[index] == Y_pred[index] and Y_pred[index] == label):
                numerator += 1

            if(Y_orig[index] != label and Y_pred[index] != label):
                numerator += 1
                
                
        accuracy_dict[label] = numerator/len(Y_orig)
        accuracy_list.append(numerator/len(Y_orig))
        
    for i in accuracy_dict:
        print("Class ",i," : ",accuracy_dict[i])
    
    return(np.array(accuracy_list).mean())

In [3]:
train_data = pickle.load(open("MNIST_dataset/train_set.pkl","rb"))
test_data = pickle.load(open("MNIST_dataset/test_set.pkl","rb"))

In [4]:
X_temp_train = np.array(train_data.Image)
Y_train = np.array(train_data.Labels)

X_temp_test = np.array(test_data.Image)
Y_test = np.array(test_data.Labels)

# 784 represent 28*28
# Since originally the data was in a image dataype
# when we converted image type to numpy array we got a 2d array of shape (28,28)
# since in case of logistic regression every data point should be represented by a single vector
# the below process was carried out

In [5]:
X_train = np.zeros((len(X_temp_train),784))
X_test  = np.zeros((len(X_temp_test),784))

In [6]:
for i in range(len(X_train)):
    X_train[i] = np.reshape(np.array(X_temp_train[i]),[1,784])[0]
    
for i in range(len(X_test)):
    X_test[i] = np.reshape(np.array(X_temp_test[i]),[1,784])[0]

In [7]:
X_train.shape

(10000, 784)

In [8]:
Y_test.shape

(2000,)

In [10]:
def select_reg(regulaiser = "l2"):
    model = LogisticRegression(penalty = regulaiser,random_state=0,max_iter = 1000,multi_class= "ovr",C=2,solver="liblinear",verbose=5)

    model.fit(X_train,Y_train)

    Y_train_predict = model.predict(X_train)
    print("\n \n ****************")
    print("Training Accuracy for each class : "+regulaiser.upper()+" regularization")
    avg_train_acc = multiclass_accuracy(Y_train,Y_train_predict)

    Y_test_predict = model.predict(X_test)
    print("\n \n ****************")
    print("Testing Accuracy for each class : "+regulaiser.upper()+" regularization")
    avg_test_acc = multiclass_accuracy(Y_test,Y_test_predict)

    print("\n \n ****************")
    print("In "+regulaiser.upper() + " Average accuracy for")
    print("Training :", avg_train_acc)
    print("Testing  :",avg_test_acc)
    
    return model

In [15]:
l1_model = select_reg("l1")

[LibLinear]
 
 ****************
Training Accuracy for each class : L1 regularization
Class  0  :  0.9998
Class  1  :  0.9999
Class  2  :  0.9956
Class  3  :  0.9925
Class  4  :  0.9998
Class  5  :  0.9941
Class  6  :  1.0
Class  7  :  0.9994
Class  8  :  0.9922
Class  9  :  0.9951

 
 ****************
Testing Accuracy for each class : L1 regularization
Class  0  :  0.985
Class  1  :  0.9855
Class  2  :  0.9595
Class  3  :  0.9625
Class  4  :  0.969
Class  5  :  0.957
Class  6  :  0.978
Class  7  :  0.968
Class  8  :  0.9485
Class  9  :  0.959

 
 ****************
In L1 Average accuracy for
Training : 0.9968400000000001
Testing  : 0.9671999999999998


In [16]:
l2_model = select_reg("l2")

[LibLinear]
 
 ****************
Training Accuracy for each class : L2 regularization
Class  0  :  0.9998
Class  1  :  1.0
Class  2  :  0.9954
Class  3  :  0.9924
Class  4  :  0.9999
Class  5  :  0.9943
Class  6  :  1.0
Class  7  :  0.9998
Class  8  :  0.9926
Class  9  :  0.9956

 
 ****************
Testing Accuracy for each class : L2 regularization
Class  0  :  0.984
Class  1  :  0.9855
Class  2  :  0.961
Class  3  :  0.9595
Class  4  :  0.9665
Class  5  :  0.957
Class  6  :  0.9755
Class  7  :  0.965
Class  8  :  0.9465
Class  9  :  0.9555

 
 ****************
In L2 Average accuracy for
Training : 0.99698
Testing  : 0.9656
