In [19]:
import numpy as np
from datetime import datetime

class MNIST:
    
    def __init__(self, num_input, num_hidden, num_output, learning_rate = 1e-2):
        
        print("*Info: Processing MNIST testing ...")
                
        # initializing weights and biases in hidden/output layers    
        self.num_input   = num_input
        self.num_hidden  = num_hidden
        self.num_output  = num_output
        
        self.input_data  = []
        self.target_data = []
        
        self.W2 = np.random.randn(self.num_input, self.num_hidden) / np.sqrt(self.num_input/2)
        self.b2 = np.random.rand(self.num_hidden)              
        self.W3 = np.random.randn(self.num_hidden, self.num_output) / np.sqrt(self.num_hidden/2)
        self.b3 = np.random.rand(self.num_output)      
        
        # initializing leanring rate
        self.learning_rate = learning_rate
        
        # handling numerical errors
        self.delta = 1e-7 
       
    def train(self, input_data, target_data):
        
        self.input_data  = input_data
        self.target_data = target_data
        
        f = lambda x : self.feed_forward()
        self.W2 -= self.learning_rate * self.numerical_derivative(f, self.W2)
        self.b2 -= self.learning_rate * self.numerical_derivative(f, self.b2)
            
        self.W3 -= self.learning_rate * self.numerical_derivative(f, self.W3)
        self.b3 -= self.learning_rate * self.numerical_derivative(f, self.b3)
   
    def feed_forward(self):
            
        z2 = np.dot(self.input_data, self.W2) + self.b2
        y2 = self.sigmoid(z2)
        z3 = np.dot(y2, self.W3) + self.b3
        y  = self.sigmoid(z3)
    
        # cross-entropy 
        return  -np.sum(self.target_data*np.log(y + self.delta) + (1-self.target_data)*np.log((1 - y) + self.delta))   
    
    def loss_val(self):
    
        z2 = np.dot(self.input_data, self.W2) + self.b2
        y2 = self.sigmoid(z2)
        z3 = np.dot(y2, self.W3) + self.b3
        y  = self.sigmoid(z3)
    
        # cross-entropy 
        return  -np.sum(self.target_data*np.log(y + self.delta) + (1-self.target_data)*np.log((1 - y) + self.delta))  
    
    def predict(self, input_data):
        
        z2 = np.dot(input_data, self.W2) + self.b2
        y2 = self.sigmoid(z2)
        z3 = np.dot(y2, self.W3) + self.b3
        y  = self.sigmoid(z3)
    
        # predict number would be the highest number in the final list
        predict_num = np.argmax(y)
    
        return predict_num
    
    def accuracy(self, input_data, target_data):
        
        matched_list = []
        not_matched_list = []
        
        # list which contains (index, label, prediction) value
        index_label_prediction_list = []
        
        # temp list which contains label and prediction in sequence
        temp_list = []
        
        for index in range(len(input_data)):
                        
            label = int(target_data[index])
                        
            # normalize
            data = (input_data[index, :] / 255.0 * 0.99) + 0.01
      
            predicted_num = self.predict(data)
        
            if label == predicted_num:
                matched_list.append(index)
                
            else:
                not_matched_list.append(index)
                
                temp_list.append(index)
                temp_list.append(label)
                temp_list.append(predicted_num)
                
                index_label_prediction_list.append(temp_list)
                
                temp_list = []
                
        print("       Current Accuracy = ", len(matched_list)/(len(input_data)) )
        
        return matched_list, not_matched_list, index_label_prediction_list
    
    def sigmoid(self, x):
        return 1 / (1+np.exp(-x))

    def numerical_derivative(self, f, x):

        grad = np.zeros_like(x)

        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])

        while not it.finished:
            idx = it.multi_index        
            tmp_val = x[idx]
            x[idx] = float(tmp_val) + self.delta
            fx1 = f(x) # f(x+delta)

            x[idx] = tmp_val - self.delta 
            fx2 = f(x) # f(x-delta)
            grad[idx] = (fx1 - fx2) / (2*self.delta)

            x[idx] = tmp_val 
            it.iternext()   
        
        return grad  


In [20]:
# training data : 60000 x 785
# 60000 lines with 785 columns in each line
# col[0] is the answer which is 0-9
# col[1:784] is data 
training_data = np.loadtxt('./mnist_train.csv', delimiter=',', dtype=np.float32)

print("*Info: Reading training data with shape of", training_data.shape)

# num of inputs, hidden layers and outputs
# num of input  nodes : 784
# num of hidden nodes : 8  --> this could be any number.
# num of output nodes : 10

# initializing weightes and biases
# (60000x784)x(784x8)x(8x10) = (60000x10)
# W2 is (784x8)
# W3 is (8x10)
# basically it's (784x10) matrix.
# by adding a hidden layer with '8',
# it would be (784x8)x(8x10).

num_inputs    = training_data.shape[1] - 1
num_hiddens   = 8      
num_outputs   = 10
learning_rate = 1e-2


*Info: Reading training data with shape of (60000, 785)


In [22]:
# test training process.

index         = np.random.randint(0, 59999)

# normalizing input_data
# input_data is 1x784 matrix. each one ranges 0 - 255 which
# normalizes between 0 and 1 
input_data = ((training_data[index, 1:] / 255.0) * 0.99) + 0.01
print(training_data[index, 1:], input_data)

# normalizing target_data
# if answer is 9, then target_data[9] = 0.99 while others = 0.01
#7.0 [0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.99 0.01 0.01]
target_data = np.zeros(num_outputs) + 0.01    
target_data[int(training_data[index, 0])] = 0.99
        
print(training_data[index, 0], target_data) 
    

[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.  32. 121. 121. 206. 254. 183.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.  46. 157. 241. 244. 253. 253. 253. 253. 253.
 242.  12.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0. 

In [None]:
print("*Info: Starting neural network learning using numerical derivative...")
obj = MNIST(num_inputs, num_hiddens, num_outputs, learning_rate)

epochs        = 1
loss_val_list = []
num_data      = len(training_data)
start_time    = datetime.now()
for step in range(epochs):
    
    for index in range(num_data):
                
        # normalizing input_data and target_data    
        input_data = ((training_data[index, 1:] / 255.0) * 0.99) + 0.01
        target_data = np.zeros(num_outputs) + 0.01    
        target_data[int(training_data[index, 0])] = 0.99
        
        obj.train(input_data, target_data)
        
        if (index % 200 == 0):
            print("       epochs = {0} index = {1} loss = {2}" \
                  .format(step, index, obj.loss_val()))
            
        loss_val_list.append(obj.loss_val())        

end_time = datetime.now()
        
print()
print("*Info: Done with neural network learning using numerical derivative.")
print("       Elapsed Time: {0}" .format(end_time - start_time))

*Info: Starting neural network learning using numerical derivative...
*Info: Processing MNIST testing ...
       epochs = 0 index = 0 loss = 6.215174990605565
       epochs = 0 index = 200 loss = 3.3825424979876284
       epochs = 0 index = 400 loss = 3.080731693840619
       epochs = 0 index = 600 loss = 3.0942331082376198
       epochs = 0 index = 800 loss = 3.246262665766574
       epochs = 0 index = 1000 loss = 2.7356147580295525
       epochs = 0 index = 1200 loss = 2.3938351915116387
       epochs = 0 index = 1400 loss = 2.6312685478554045
       epochs = 0 index = 1600 loss = 2.8080123370649686
       epochs = 0 index = 1800 loss = 2.9555217533102085
       epochs = 0 index = 2000 loss = 3.4024204719935347
       epochs = 0 index = 2200 loss = 3.219151499579796
       epochs = 0 index = 2400 loss = 2.5250405558708833
       epochs = 0 index = 2600 loss = 3.432782967297394
       epochs = 0 index = 2800 loss = 2.647006092629606
       epochs = 0 index = 3000 loss = 2.922009328230