<a href="https://colab.research.google.com/github/phoumithona/matrix_computation/blob/master/Assignment_9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm 
import time

file_data   = "mnist.csv"
handle_file = open(file_data, "r")
data        = handle_file.readlines()
handle_file.close()

size_row    = 28    # height of the image
size_col    = 28    # width of the image

num_image   = len(data)
count       = 0     # count for the number of images

# normalize the values of the input data to be [0, 1]
def normalize(data):
    data_normalized = (data - min(data)) / (max(data) - min(data))
    return(data_normalized)

# make a matrix each column of which represents an images in a vector form
list_image  = np.empty((size_row * size_col, num_image), dtype=float)
list_label  = np.empty(num_image, dtype=int)

for line in data:
    line_data   = line.split(',')
    label       = line_data[0]
    im_vector   = np.asfarray(line_data[1:])
    im_vector   = normalize(im_vector)

    list_label[count]       = label
    list_image[:, count]    = im_vector

    count += 1

In [3]:
# split dataset
train_label = list_label[:6000,]  # 6000 rows training label
test_label  = list_label[6000:,]  # 4000 rows testing label
train_image = list_image[:,:6000] # 6000 rows training image
test_image  = list_image[:,6000:] # 4000 rows testing image

# data transposed
x_train = train_image.T
y_train = train_label.T
x_test  = test_image.T
y_test  = test_label.T

print('train_image', x_train.shape)
print('train_label', y_train.shape)
print('test_image', x_test.shape)
print('test_label', y_test.shape)

train_image (6000, 784)
train_label (6000,)
test_image (4000, 784)
test_label (4000,)


In [4]:
# images count for each label
y_value = np.zeros((1,10))
for i in range (10):
    print("label:",i,"=",np.count_nonzero(y_train==i))
    y_value[0,i-1] = np.count_nonzero(y_train==i)

label: 0 = 568
label: 1 = 686
label: 2 = 625
label: 3 = 595
label: 4 = 599
label: 5 = 548
label: 6 = 562
label: 7 = 609
label: 8 = 587
label: 9 = 621


In [0]:
class DeepNeuralNetwork():
    
    def __init__(self, sizes, epochs = 10, lr = 0.001):
        self.sizes = sizes
        self.epochs = epochs
        self.l_rate = lr

        # we save all parameters in the neural network in this dictionary
        self.params = self.initialization()

    def sigmoid(self, Z, derivative=False):
        if derivative:
            return (np.exp(-Z))/((np.exp(-Z)+1)**2)
        return 1/(1 + np.exp(-Z))

    def initialization(self):
        # number of nodes in each layer
        x = self.sizes[0]   # input layer: x
        y = self.sizes[1]   # first hidden layer: y
        z = self.sizes[2]   # second hidden layer: z
        h = self.sizes[3]   # output layer: h

        params = {
            'u':np.random.randn(y, x) * np.sqrt(1. / y),
            'v':np.random.randn(z, y) * np.sqrt(1. / z),
            'w':np.random.randn(h, z) * np.sqrt(1. / h)
        }

        return params
    
    def forward_pass(self, x_train):
        params = self.params

        # input layer activations becomes sample
        params['A0'] = x_train

        # input layer to first hidden layer
        params['Z1'] = np.dot(params["u"], params['A0'])
        params['A1'] = self.sigmoid(params['Z1'])

        # frist hidden layer to second hidden layer
        params['Z2'] = np.dot(params["v"], params['A1'])
        params['A2'] = self.sigmoid(params['Z2'])

        # second hidden layer to output layer
        params['Z3'] = np.dot(params["w"], params['A2'])
        params['A3'] = self.sigmoid(params['Z3'])

        return params['A3']
    
        # Function to calculate the loss
    def calculate_cost(self, A3, y_train):
        cost = -np.sum(np.multiply(y_train, np.log(A3)) +  np.multiply(1-y_train, np.log(1-A3)))/m
        cost = np.squeeze(cost)
        return cost
    
    def backward_pass(self, y_train, output):
        params = self.params
        change_w = {}

        # Calculate w update
        error = output - y_train
        change_w['w'] = np.dot(error, params['A3'])

        # Calculate v update
        error = np.multiply( np.dot(params['w'].T, error), self.sigmoid(params['Z2'], derivative=True) )
        change_w['v'] = np.dot(error, params['A2'])

        # Calculate u update
        error = np.multiply( np.dot(params['v'].T, error), self.sigmoid(params['Z1'], derivative=True) )
        change_w['u'] = np.dot(error, params['A1'])

        return change_w
    
    def update_network_parameters(self, changes_to_w):
        for key, value in changes_to_w.items():
            for w_arr in self.params[key]:
                w_arr -= self.l_rate * value

    def compute_accuracy(self, x_test, y_test):
        predictions = []
        for x, y in zip(x_test, y_test):
            output = self.forward_pass(x)
            pred = np.argmax(output)
            predictions.append(pred == y)
        
        summed = sum(pred for pred in predictions) / 100.0
        return np.average(summed)

    def train(self, x_train, y_train, x_test, y_test):
        
        start_time = time.time()
        for iteration in range(self.epochs):
            for x,y in zip(x_train, y_train):
                output = self.forward_pass(x)
                changes_to_w = self.backward_pass(y, output)
                self.update_network_parameters(changes_to_w)
            
            accuracy = self.compute_accuracy(x_test, y_test)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2}'.format(
                iteration+1, time.time() - start_time, accuracy
                
              ))