In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

In [2]:
train_X = pd.read_csv('train.csv')
test_X = pd.read_csv('test.csv' )
train_X.drop(train_X.columns[train_X.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
test_X.drop(test_X.columns[test_X.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
print(train_X.shape)
print(test_X.shape)

(50000, 1568)
(10000, 1568)


In [3]:
# Shuffle your dataset 
shuffle_train_X = train_X.sample(frac=1, random_state=999)
print(len(shuffle_train_X))

# Define a size for your train set 
train_size = int(0.8 * len(train_X))

# Split your dataset 
train_set = shuffle_train_X[:train_size]
val_set = shuffle_train_X[train_size:]
print(len(train_set))
#print(train_set)
print(len(val_set))
#print(val_set)

50000
40000
10000


In [4]:
X_train = train_set.to_numpy()
X_val = val_set.to_numpy()
X_test = test_X.to_numpy()
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(40000, 1568)
(10000, 1568)
(10000, 1568)


In [5]:
val_idx = val_set.index.to_list()
train_idx = train_set.index.to_list()
#print(val_idx)
#print(train_idx)
train_result = pd.read_csv('train_result.csv')
new_train_y = train_result.iloc[train_idx]
new_val_y = train_result.iloc[val_idx]

val_y_class = new_val_y['Class']
val_y = val_y_class.to_numpy()

train_y_class = new_train_y['Class']
train_y = train_y_class.to_numpy()
print(val_y.shape)
print(train_y.shape)

(10000,)
(40000,)


In [6]:
def one_hot(y, c):
    
    # y--> label/ground truth.
    # c--> Number of classes.
    
    # A zero matrix of size (m, c)
    y_hot = np.zeros((len(y), c))
    
    # Putting 1 for column where the label is,
    # Using multidimensional indexing.
    y_hot[np.arange(len(y)), y] = 1
    
    return y_hot

In [7]:
def softmax(z):
    
    # z--> linear part.
    
    # subtracting the max of z for numerical stability.
    exp = np.exp(z - np.max(z))
    
    # Calculating softmax for all examples.
    for i in range(len(z)):
        exp[i] /= np.sum(exp[i])
        
    return exp

In [8]:
def predict(X, w, b):
    
    # X --> Input.
    # w --> weights.
    # b --> bias.
    
    # Predicting
    z = X@w + b
    y_hat = softmax(z)
    
    # Returning the class with highest probability.
    return np.argmax(y_hat, axis=1)

In [9]:
def prediction_accuracy(y, y_hat):
    return np.sum(y==y_hat)/len(y)

In [10]:
def model_predict_accuracy(X, y, w, b):
        
    preds = predict(X, w, b)
    model_accuracy = prediction_accuracy(y, preds)

    n_classes = len(np.unique(y))
    
    confusion_matrix = np.zeros((n_classes,n_classes))
    for (true, pred) in zip(y, preds):
        confusion_matrix[int(true-1), int(pred-1)] += 1

    #misclassification_error(confusion_matrix):
    sum_preds = np.sum(confusion_matrix)
    sum_correct = np.sum(np.diag(confusion_matrix))
    misclassification_error = 1.0 - (float(sum_correct) / float(sum_preds))
    
    #print('\n')
    #print("Accuracy:",model_accuracy)
    #print('confusion_matrix:')
    #print(confusion_matrix, '\n')
    #print("sum_preds               :", int(sum_preds)) 
    #print("sum_correct_predictions :", int(sum_correct)) 
    #print("sum_wrong_predictions   :", int(sum_preds - sum_correct))
    #print("misclassification_error :", misclassification_error)
    #plt.plot(losses)
    
    return model_accuracy, confusion_matrix, sum_preds, sum_correct, misclassification_error

In [11]:
def fit_reg_lamb(X, y, lr, epochs, lamb):
    
    # X --> Input.
    # y --> true/target value.
    # lr --> Learning rate.
    # c --> Number of classes.
    # epochs --> Number of iterations.
    
        
    # m-> number of training examples
    # n-> number of features 
    m, n = X.shape
    #print(X.shape)
    
    c = len(np.unique(y)) # number of classes based on unique y values usually train_y values

    
    # Initializing weights and bias randomly.
    np.random.seed(999)
    w = np.random.random((n, c))
    b = np.random.random(c)
    #print(w.shape)
    #print(b.shape)
    
    # Empty list to store losses.
    losses = []
    
    # Training loop.
    for epoch in range(epochs):
        
        # Calculating hypothesis/prediction.
        z = X@w + b
        #print(z.shape)
       
        y_hat = softmax(z)
        #print(y_hat.shape)
       
        
        # One-hot encoding y.
        y_hot = one_hot(y, c)
        #print(y_hot.shape)
        # Calculating the gradient of loss w.r.t w and b.
        w_grad = (1/m)* ( np.dot(X.T, (y_hat - y_hot)) + lamb*w)
        #w_grad1 = (1/m)* ( np.dot(X.T, (y_hat - y_hot))) + (lamb/m)*w
        #print("w_grad - w_grad1")
        #print(np.round(w_grad - w_grad1, 12))
       
        b_grad = (1/m)*np.sum(y_hat - y_hot)

        # Updating the parameters.
        w = w - lr*w_grad
        b = b - lr*b_grad

        # Calculating loss and appending it in the list.
        #loss = -np.mean(np.log(y_hat[np.arange(len(y)), y]))
        #loss_old=-np.sum(y_hot*np.log(y_hat))/float(y_hat.shape[0])
        
        cross_entropy = - np.sum(np.log(y_hat) * (y_hot), axis=1)
        loss = np.mean(cross_entropy)
        
        reg_cost = (lamb/(2*m))*np.sum(w*w)
        total_costJ = loss + reg_cost 
        #print('loss, reg_cost, total_costJ')
        #print(loss, reg_cost, total_costJ)
        
        losses.append(total_costJ)
        
        # Printing out the total cost after regularization at every 100th iteration.
        #if epoch%100==0:
            #print(f'Epoch {epoch} ==> Total CostJ = {total_costJ}')

    return w, b, losses

In [12]:
#

In [13]:
epochs = 2000
for lr in  [0.001, 0.005, 0.01, 0.05, 0.1]:
    for lamb in [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]:
        #print("Training Accuracy \n")
        w, b, losses =  fit_reg_lamb(X_train, train_y, lr=lr, epochs=epochs, lamb=lamb)
        train_accuracy, train_confusion_matrix, train_sum_preds, train_sum_correct, train_misclassification_error = model_predict_accuracy(X_train, train_y, w, b)
        val_accuracy, val_confusion_matrix, val_sum_preds, val_sum_correct, val_misclassification_error = model_predict_accuracy(X_val, val_y, w, b)
        print(f'Lr: {lr:10.4f} Lambda: {lamb: 10.4f} Epochs: {epochs} Train Accuracy: {train_accuracy: 10.4f} Validation Accuracy: {val_accuracy: 10.4f}')

Lr:     0.0010 Lambda:     0.0010 Epochs: 2000 Train Accuracy:     0.0794 Validation Accuracy:     0.0785
Lr:     0.0010 Lambda:     0.0100 Epochs: 2000 Train Accuracy:     0.0794 Validation Accuracy:     0.0785
Lr:     0.0010 Lambda:     0.1000 Epochs: 2000 Train Accuracy:     0.0794 Validation Accuracy:     0.0785
Lr:     0.0010 Lambda:     1.0000 Epochs: 2000 Train Accuracy:     0.0794 Validation Accuracy:     0.0785
Lr:     0.0010 Lambda:    10.0000 Epochs: 2000 Train Accuracy:     0.0794 Validation Accuracy:     0.0785
Lr:     0.0010 Lambda:   100.0000 Epochs: 2000 Train Accuracy:     0.0794 Validation Accuracy:     0.0786
Lr:     0.0010 Lambda:  1000.0000 Epochs: 2000 Train Accuracy:     0.0794 Validation Accuracy:     0.0793
Lr:     0.0050 Lambda:     0.0010 Epochs: 2000 Train Accuracy:     0.0976 Validation Accuracy:     0.0948
Lr:     0.0050 Lambda:     0.0100 Epochs: 2000 Train Accuracy:     0.0976 Validation Accuracy:     0.0948
Lr:     0.0050 Lambda:     0.1000 Epochs: 2000

In [15]:
lr = 0.10 
for epochs in [5000, 8000, 12000]:
    for lamb in [100.0, 1000.0]:
        #print("Training Accuracy \n")
        w, b, losses =  fit_reg_lamb(X_train, train_y, lr=lr, epochs=epochs, lamb=lamb)
        train_accuracy, train_confusion_matrix, train_sum_preds, train_sum_correct, train_misclassification_error = model_predict_accuracy(X_train, train_y, w, b)
        val_accuracy, val_confusion_matrix, val_sum_preds, val_sum_correct, val_misclassification_error = model_predict_accuracy(X_val, val_y, w, b)
        print(f'Lr: {lr:10.4f} Lambda: {lamb: 10.4f} Epochs: {epochs} Train Accuracy: {train_accuracy: 10.4f} Validation Accuracy: {val_accuracy: 10.4f}')

Lr:     0.1000 Lambda:   100.0000 Epochs: 5000 Train Accuracy:     0.2613 Validation Accuracy:     0.2095
Lr:     0.1000 Lambda:  1000.0000 Epochs: 5000 Train Accuracy:     0.2258 Validation Accuracy:     0.1927
Lr:     0.1000 Lambda:   100.0000 Epochs: 8000 Train Accuracy:     0.2730 Validation Accuracy:     0.2175
Lr:     0.1000 Lambda:  1000.0000 Epochs: 8000 Train Accuracy:     0.2258 Validation Accuracy:     0.1927
Lr:     0.1000 Lambda:   100.0000 Epochs: 12000 Train Accuracy:     0.2759 Validation Accuracy:     0.2173
Lr:     0.1000 Lambda:  1000.0000 Epochs: 12000 Train Accuracy:     0.2258 Validation Accuracy:     0.1927
