# CSCI 6390: Assignment 6
## Due November 1st
### By: Nicholas Lutrzykowski 
The goal of this assignment is to use Multi Class Logistic Regression on the Fashion MNIST dataset.


In [133]:
# Import Statements 
import numpy as np
import csv
import matplotlib.pyplot as plt
import warnings 
from sklearn.preprocessing import StandardScaler
from scipy.special import softmax

### Import and setup data 

In [135]:
def read_data(filename):
    data_orig = np.genfromtxt(filename, delimiter=',', dtype="i8")

    # First column is the ground truths 
    # First row is labels which we want excluded
    data = data_orig[1:, :]
    
    np.random.seed(42)
    np.random.shuffle(data)

    num_points = data.shape[0]
    num_attributes = data.shape[1]-1
    print(filename)
    print("Number of attributes:", num_attributes)
    print("Number of points:", num_points)
    
    return data

In [136]:
NUM_TEST = 5000 
NUM_TRAINING = 10000

test = read_data('fashion-mnist_test.csv')[:NUM_TEST, :]
training = read_data('fashion-mnist_train.csv')[:NUM_TRAINING, :]


fashion-mnist_test.csv
Number of attributes: 784
Number of points: 10000
fashion-mnist_train.csv
Number of attributes: 784
Number of points: 60000


### Multi-Class Logistic Algorithm

In [137]:
def get_rand_order(D, rand):
    if rand:
        np.random.shuffle(D)
    y_temp = D[:, 0].astype(int)
    y = np.zeros((y_temp.shape[0], np.amax(y_temp)+1))
    y[np.arange(y_temp.shape[0]), y_temp] = 1
    
    D = np.concatenate((np.ones((D.shape[0], 1)), D[:, 1:]), axis=1)
    
    return D, y

In [138]:
def logistic_regression_multi_class(D_orig, eta, epsilon):
    # Initialize y, D, W
    D, y = get_rand_order(D_orig, False)
    
    W = np.zeros((D.shape[1], y.shape[1]))
    W_prev = W 
    dif = 2*epsilon 
    
    while dif > epsilon: 
        D, y = get_rand_order(D_orig, True)
        
        # axis = 1 (we want to compute softmax about the columns)
        pi = softmax(np.matmul(D, W), axis=1) 
        gradient = np.matmul((y-pi).T, D).T
        W = W_prev + eta*gradient
        
        dif = np.sum((W-W_prev)**2)
        W_prev = W
    
    return W
    
    

In [139]:
W = logistic_regression_multi_class(training, 1e-6, 2.5)

print("The weights after training are:")
np.set_printoptions(precision=3)
print(W)


The weights after training are:
[[ 4.133e-04 -5.376e-04 -1.269e-03 ... -1.770e-03 -6.738e-03 -1.301e-02]
 [-1.936e-04 -4.600e-06 -2.560e-05 ... -4.600e-06 -6.106e-04 -1.060e-05]
 [-1.032e-03 -1.087e-04  7.622e-04 ... -1.469e-05 -7.847e-05 -7.173e-05]
 ...
 [-1.172e-01 -1.672e-02  2.353e-01 ...  7.341e-04 -1.223e-01 -6.656e-02]
 [-4.983e-02  1.832e-03  6.784e-02 ...  3.665e-03 -7.626e-02 -3.627e-02]
 [-7.379e-03 -2.141e-04  3.071e-03 ... -7.551e-04 -1.048e-02 -3.651e-03]]


In [140]:
def get_f_measure(y, y_hat, num_classes):
    # Find number of correct predictions of class ci / the number of points in class ci 
    Fi = [] 
    for i in range(num_classes):
        ni = np.where(y==i)[0].shape[0]
        res = np.where((y-y_hat) == 0, y, -1)
        nii = np.sum(np.where(res == i, 1, 0))
        mi = np.where(y_hat==i)[0].shape[0]

        Fi.append((2*nii)/(ni+mi))
    
    F = np.sum(np.array(Fi))/num_classes
        
    return F

In [142]:
def find_accuracy(data, y, w): 
    w = np.reshape(w, (w.shape[0], 1))
    y_hat = np.matmul(data, w)
    y_hat = np.where(y_hat < 0, -1, 1)
    y_hat = np.reshape(y_hat, (data.shape[0],))
    accuracy = 1 - (data.shape[0] - np.count_nonzero(y_hat-y))/data.shape[0]
    
    return accuracy
    

In [141]:
# Find the accuracy on test data
D, y = get_rand_order(test, False)

y_hat = np.argmax(softmax(np.matmul(D, W), axis=1), axis=1)
y = np.argmax(y, axis=1)
accuracy = 1- np.sum(np.where((y-y_hat) != 0, 1, 0))/NUM_TEST

f_measure = get_f_measure(y, y_hat, 10)

print("The accuracy is: {0:.2f}%".format(accuracy*100))
print("The F1-Score is: {0:.4f}".format(f_measure))


The accuracy is: 82.92%
The F1-Score is: 0.8306


These results match the expected performance of the Multi-Class Logistic Algorithm. After adjusting the epsilon value we were able to slightly improve results while still having a reasonable training run-time of under a minute. If I decreased the epsilon value much more, then the weights would never converge, resulting in an infinite loop.  