In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from numpy import genfromtxt
from sklearn.metrics import accuracy_score
import warnings
#from acc_calc import accuracy

In [12]:
#read all data in train_data.csv and train_labels.csv and save in x and y respectively
X = genfromtxt('train_data.csv', delimiter=',', filling_values = 0)
y = genfromtxt('train_labels.csv', delimiter=',', filling_values = 0)

print("size of inputs is X: ", X.shape,"\nsize of labels is y: ", y.shape)

size of inputs is X:  (24754, 784) 
size of labels is y:  (24754, 4)


In [13]:
#using scikit learn to split data into training and learning

# an 80/20 split was used (based on the pareto principle) initially, however not much difference was observed in accuracy with a 60/40 split, I chose to go for 60/40 in final run as it is less computationally intensive with a dataset of 24,000 records, additionally it gives us more data for validation. However, changing epochs from 100 to 150 did make a difference hence we chose to increase that instead

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

print("size of train inputs is X_train: ", X_train.shape,"\nsize of train labels is y: ", y_train.shape,"\nsize of test inputs is X_test: ", X_test.shape,"\nsize of train labels is y_test: ", y_test.shape)

size of train inputs is X_train:  (14852, 784) 
size of train labels is y:  (14852, 4) 
size of test inputs is X_test:  (9902, 784) 
size of train labels is y_test:  (9902, 4)


In [14]:
# Sigmoid activation function

def sigmoid(A):
    #suppress warnings needed because numpy uses float(64) by default and this function can return results larger than that
    return 1 / (1 + np.exp(-A))

#Derivative of sigmoid activation function
def sigmoid_derivative(A):
    return A * (1-A)

# softmax activation function - used for output layer
def softmax(A):
    expA = np.exp(A - np.max (A))
    return expA / expA.sum(axis=1, keepdims=True)

In [15]:
#declaring the number of hidden layer neurons, classes, learning rate and number of epoch for the training
#these values are selected after some trial and error and seem to give accuracy > 90
n_hidden = 50
num_classes = 4
learning_rate = 0.0002
epochs  = 100

In [16]:
#initialize weight matrix for hidden layer of size n_hidden x n_inputs (number of hidden layers) - we will use a numpy random matrix function that will generate a random matrix with values from a normal distribution

n_inputs = X_train.shape[1] #number of features
#print(n_inputs)

#hidden layer weights of size (no of features x no of nodes) - this eliminates the need to transpose later
weight_hidden = np.random.randn(n_inputs, n_hidden)
#print(weight_hidden)
print("Hidden layer weights matrix dimension: {}".format(weight_hidden.shape))

#initialize random weights for output layer of size num_classes x n_hidden
weight_output = np.random.randn(n_hidden, num_classes)
print("Output layer weights matrix dimension: {}".format(weight_output.shape))
#print(weight_output)

#initilaize bias matrix of size 50x1 for hiddden layer
hidden_bias = np.random.randn(n_hidden)
print("Hidden layer biases matrix dimension: {}".format(hidden_bias.shape))

#initilaize bias matrix of size 4x1 for output layer
output_bias = np.random.randn(num_classes)
print("Output layer biases matrix dimension: {}".format(output_bias.shape))

Hidden layer weights matrix dimension: (784, 50)
Output layer weights matrix dimension: (50, 4)
Hidden layer biases matrix dimension: (50,)
Output layer biases matrix dimension: (4,)


In [17]:
# forward propagation

def forward(X_training_data, hidden_layer_weights, output_layer_weights, hidden_layer_bias, output_layer_bias):
    hidden_layer_Sum = np.dot(X_training_data, hidden_layer_weights) + hidden_layer_bias
    hidden_layer_Sig = sigmoid(hidden_layer_Sum)
    output_layer_Sum = np.dot(hidden_layer_Sig, output_layer_weights) + output_layer_bias
    final_output = softmax(output_layer_Sum)

    #we are interested in getting the final output and output of hidden layer from forward propagation to use it for backpropagation
    return final_output, hidden_layer_Sig

In [18]:
# backpropagation
def backward(X_training_data, y_training_data, hidden_layer_output, output_weight_matrix, act_output):
    #calculating weight delta for output layer
    output_error = act_output - y_training_data
    delta_weights_output = np.dot(hidden_layer_output.T, output_error)

    #calculating weight delta for hidden layer
    hidden_layer_error = np.dot(output_error, output_weight_matrix.T)
    hidden_layer_error_output_mul = sigmoid(hidden_layer_output) * hidden_layer_error #just an intermediate step
    delta_weights_hidden = np.dot(X_training_data.T, hidden_layer_error_output_mul)

    #calculating delta biases for output and hidden layer
    delta_bias_output = output_error
    delta_bias_hidden = hidden_layer_error_output_mul

    return delta_weights_output, delta_weights_hidden, delta_bias_output, delta_bias_hidden

In [19]:
#main program

#suppressing warnings for float overflow when using exp in sigmoid function
warnings.filterwarnings('ignore')

for i in range (epochs):
    #forward run through network
    network_output, hidden_layer_output = forward(X_train, weight_hidden, weight_output, hidden_bias, output_bias)

    #backward run through network
    delta_output_weights, delta_hidden_weights, delta_output_bias, delta_hidden_bias = backward(X_train, y_train, hidden_layer_output, weight_output, network_output)

    #update weights as per the delta derived from back propagation
    weight_hidden = weight_hidden - learning_rate * delta_hidden_weights
    hidden_bias = hidden_bias - learning_rate * delta_hidden_bias.sum(axis = 0)
    weight_output = weight_output - learning_rate * delta_output_weights
    output_bias = output_bias - learning_rate * delta_output_bias.sum(axis = 0)


    #The code below will run test data through FFN after every 10 epochs to see improving accuracy - this is commented out to make epochs quicker - uncomment to see accuracy as epochs run

    if i % 10 == 0:

        y_predicted= forward(X_test, weight_hidden, weight_output, hidden_bias, output_bias)[0] #extract first return value of forward function

        #onehot encode the output
        #reference - https://www.adamsmith.haus/python/answers/how-to-replace-elements-in-a-numpy-array-if-a-condition-is-met-in-python
        y_predicted = np.where(y_predicted < 0.5, 0, y_predicted)
        y_predicted = np.where(y_predicted >= 0.5, 1, y_predicted)
        print("Epoch No.: {} , Accuracy: {:.2f}%".format(i, accuracy_score(y_test, y_predicted) * 100))
        #print("Epoch No.: {} , Accuracy: {:.2f}%".format(i, accuracy(y_test, y_predicted) * 100))

print("Model Training Completed")

Epoch No.: 0 , Accuracy: 24.46%
Epoch No.: 10 , Accuracy: 85.26%
Epoch No.: 20 , Accuracy: 91.03%
Epoch No.: 30 , Accuracy: 92.77%
Epoch No.: 40 , Accuracy: 93.21%
Epoch No.: 50 , Accuracy: 93.44%
Epoch No.: 60 , Accuracy: 93.89%
Epoch No.: 70 , Accuracy: 94.58%
Epoch No.: 80 , Accuracy: 94.68%
Epoch No.: 90 , Accuracy: 94.86%
Model Training Completed


In [20]:
#Run test input through trained model and see accuracy

y_predicted= forward(X_test, weight_hidden, weight_output, hidden_bias, output_bias)[0] #extract first return value of forward function

#onehot encode the output
#reference - https://www.adamsmith.haus/python/answers/how-to-replace-elements-in-a-numpy-array-if-a-condition-is-met-in-python
y_predicted = np.where(y_predicted < 0.5, 0, y_predicted)
y_predicted = np.where(y_predicted >= 0.5, 1, y_predicted)

print("Test Data Prediction Accuracy: {:.2f}%".format(accuracy_score(y_test, y_predicted) * 100))
#print("Epoch No.: {} , Accuracy: {:.2f}%".format(i, accuracy(y_test, y_predicted) * 100))
print(y_predicted)
print(y_predicted.shape)

Test Data Prediction Accuracy: 95.02%
[[0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 ...
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]]
(9902, 4)


In [21]:
np.save('Trained_weight_hidden.npy', weight_hidden)
np.save('Trained_bias_hidden.npy', hidden_bias)
np.save('Trained_weight_output.npy', weight_output)
np.save('Trained_bias_output.npy', output_bias)