In [1]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random as rand

In [2]:
def sigmoid(x):
    s = 1.0/(1.0 + np.exp(-x))
    return s

In [3]:
def softmax(x):

    e = np.exp(x)
    s = np.divide(e, np.sum(e, axis=0))

    return s

In [4]:
def one_hot_encoding(y, m):

    encoded_matrix = np.zeros((m, 10))

    for i in range(m):
        # print(y[0, i])
        encoded_matrix[i, y[0, i]] = 1.0
      
    return encoded_matrix

In [5]:
def forward_propagation(w1, w2, b1, b2, a0):
    z1 = np.dot(w1, a0) + b1
    a1 = sigmoid(z1)

    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)

    return a1, a2

In [6]:
def backward_propagation(a1, a2, x, y, m, w2):

    dz2 = a2 - y
    dw2 = (1.0/m)*np.dot(dz2, a1.T)
    db2 = (1.0/m) * np.sum(dz2, axis=1, keepdims=True)

    dz1 = np.dot(w2.T, dz2)*(a1*(1-a1))
    dw1 = (1.0/m) * np.dot(dz1, x.T)
    db1 = (1.0/m) * np.sum(dz1, axis=1, keepdims=True)

    return dw1, dw2, db1, db2

In [7]:
def calc_accuracy(actual, test, w1, w2, b1, b2, m):

    a1m, a2m = forward_propagation(w1, w2, b1, b2, test)
    sm = softmax(a2m)
    
    testing = np.zeros(sm.shape).T

    max_val_pos = np.argmax(sm, axis=0)
    for i in range(m):
        testing[i, max_val_pos[i]] = 1

    num_correct = np.sum(np.all(testing == actual, axis=1))
    return (num_correct/m)*100    

In [8]:
def gradient_descent(epochs, alpha):
    data = pd.read_csv('ex3_train.csv')
    x = data.drop('y', axis=1).T
    y = data['y'].values.reshape(1, -1)
    m = y.shape[1]

    cost = []
    
    # init weights and bias
    np.random.seed(1)
    # w1 = np.random.rand(25, 400) * 0.01
    w1 = np.random.uniform(-1, 1, (25, 400)) * 0.01 
    b1 = np.zeros((25, 1))
    # b1 = np.ones((25, 1))
    # w2 = np.random.rand(10, 25) * 0.01
    w2 = np.random.uniform(-1, 1, (10, 25)) * 0.01
    # b2 = np.ones((10, 1))
    b2 = np.zeros((10, 1))

    y = one_hot_encoding(y, y.shape[1]).T
    
    for epoch in range(epochs):
        
        a1, a2 = forward_propagation(w1, w2, b1, b2, x)

        loss = -(1.0/m) * np.sum(np.multiply(y, np.log(a2)) + np.multiply(1.0 - y, np.log(1 - a2))) 

        loss = np.squeeze(loss)
        cost.append(loss)
         
        dw1, dw2, db1, db2 = backward_propagation(a1, a2, x, y, m, w2)

        w1 = w1 - (alpha * dw1)
        b1 = b1 - (alpha * db1)
        w2 = w2 - (alpha * dw2)
        b2 = b2 - (alpha * db2)

    plt.ylabel('Cost')
    plt.xlabel('Epochs')
    plt.plot(np.squeeze(cost), label='alpha {}'.format(alpha))
    
    # print(cost[-1])
    print('Cost for alpha {} with epochs {}: {}'.format(alpha, epochs, cost[-1]))
    
    # test training data
    train_data = pd.read_csv('ex3_train.csv')
    train = train_data.drop('y', axis=1).T
    actual_vals = train_data['y'].values.reshape(1, -1)
    actual_vals = one_hot_encoding(actual_vals, m)
    acc = calc_accuracy(actual_vals, train, w1, w2, b1, b2, m)
    print('Accuracy for training data {}'.format(acc))
    
    # test test data
    test_data = pd.read_csv('ex3_test.csv')
    test = test_data.drop('y', axis=1).T
    actual_vals = test_data['y'].values.reshape(1, -1)
    m = actual_vals.shape[1]
    actual_vals = one_hot_encoding(actual_vals, m)
    acc = calc_accuracy(actual_vals, test, w1, w2, b1, b2, m)
    print('Accuracy for test data {}\n'.format(acc))
    return acc


# gradient_descent(alpha=0.5, epochs=7500)
acc1 = gradient_descent(alpha=0.1, epochs=10000)
acc2 = gradient_descent(alpha=0.075, epochs=40000)
acc3 = gradient_descent(alpha=0.03, epochs=50000) 
# gradient_descent(alpha=0.05, epochs=50000) 93.26
# gradient_descent(alpha=0.0, epochs=15000)
acc_list = {0.1: acc1, 0.075: acc2, 0.03: acc3}
# 
plt.legend(['alpha 0.1', 'alpha 0.075', 'alpha 0.03'], loc='upper right')
plt.show()
# 
best = max(acc_list, key=acc_list.get)
print('Best test accuracy {} with alpha {}'.format(acc_list.get(best), best))




Cost for alpha 0.1 with epochs 10000: 0.30799505364495006


Accuracy for training data 96.28571428571429


Accuracy for test data 92.86666666666666



Cost for alpha 0.075 with epochs 40000: 0.10525777108224572


Accuracy for training data 99.45714285714286
Accuracy for test data 93.0

