In [1]:
import numpy as np
import mnist_loader
import scipy.special
import random

In [2]:
class Network:
    def __init__(self,neurons):
        self.layers = len(neurons)
        self.neurons = neurons
        self.weights = []
        self.biases = []
        
        for i in range(1,self.layers):
            rows = neurons[i]
            cols = neurons[i-1]
            
            #Creates a weight numpy array with dimension = dims
            #Initializes them with random normal distributed value between 0-1
            
            #layer_weight = np.zeros((rows,cols))
            layer_weight = np.random.randn(rows,cols)
            self.weights.append(layer_weight)
            layer_bias = np.random.randn(rows,1)
            #layer_bias = np.zeros((rows,1))
            self.biases.append(layer_bias)

In [3]:
train_data, valid_data, test_data = mnist_loader.load_data_wrapper()

In [4]:
def sigmoid(z):
    
    return (1.0/(1.0+scipy.special.expit(-z)))

In [5]:
def feedforward(my_net,X):
    layer_activation_list = []
    weighted_sum_list = []
    for i in range(1,my_net.layers):
        if i==1:
            a = X
        else:
            a = layer_activation_list[-1]
            
        weight = my_net.weights[i-1]
        biases = my_net.biases[i-1]
        z = np.dot(weight,a) + biases
        weighted_sum_list.append(z)
        a = sigmoid(z)
        layer_activation_list.append(a)
        
    return (layer_activation_list,weighted_sum_list)
    

In [6]:
def test(network,data):
    
    activation_list,_ = feedforward(network,data[0])
    prediction = np.argmax(activation_list[-1],axis=0)
    
    accuracy = (sum((prediction == data[1]).astype(np.float32))/data[1].shape[0])*100
    
    return accuracy

In [7]:
def cost_gradient(activation,target):
    #print "Cost Gradients =",activation-target
    
    return (activation - target)

In [8]:
def sigmoid_derivative(z):
    #print "Sigmoid_Derivatives =",sigmoid(z)*(1-sigmoid(z))
    
    return sigmoid(z)*(1-sigmoid(z))

In [9]:
def update_weights(network,activation_list,delta_list,alpha,X):
    
    for i in range(network.layers-1):
        #print "Delta List [",i,"]:",delta_list[i]
        if i==0:
            dcdw = np.dot(delta_list[i],X.transpose())
        else:        
            dcdw = np.dot(delta_list[i],activation_list[i-1].transpose())
        dcdb = np.average(delta_list[i],axis=1).reshape(delta_list[i].shape[0],1)
        
        #print dcdw
        #print "**** Max_Before : {0} , Min_Before : {1} ".format(np.amax(network.weights[i]),np.amin(network.weights[i]))
        
        network.weights[i] -= alpha*dcdw
        network.biases[i] -= alpha*dcdb
        
        #print "Weights of {0} ".format(i),network.weights[i]
        
        #print "**** Max_After : {0} , Min_After : {1} ".format(np.amax(network.weights[i]),np.amin(network.weights[i]))
    return

In [10]:
def errorback(network,last_delta,weighted_list):
    
    delta_list = []
    delta_list.append(last_delta)
    
    for i in range(network.layers-2,0,-1):
        delta = (np.dot((network.weights[i]).transpose(),delta_list[-1]))*(sigmoid(weighted_list[i-1]))
        delta_list.append(delta)
        
    return delta_list

In [11]:
def gradient_descent(network,train_data,valid_data,alpha,epochs):
    
    for i in range(epochs):
        X = train_data[0]
        target = train_data[1]

        activation_list, weighted_list = feedforward(my_net,X)

        last_layer_delta = cost_gradient(activation_list[-1],target)*sigmoid_derivative(weighted_list[-1])

        final_delta_list = errorback(my_net,last_layer_delta,weighted_list)
        final_delta_list = final_delta_list[::-1]

        update_weights(my_net,activation_list,final_delta_list,alpha,X)
        
        accuracy = test(network,valid_data)
        print "Accuracy after Epoch [",i,"]",accuracy
        
    return

In [12]:
my_net = Network([784, 16, 10])
#X = train_data[0]
#activation_list, weighted_list = feedforward(my_net,X)
#print "Last Activation =",activation_list[-1]
#target = train_data[1]
#print target.shape
#test(activation_list[-1],target)
#last_layer_delta = cost_gradient(activation_list[-1],target)*sigmoid_derivative(weighted_list[-1])
#final_delta_list = errorback(my_net,last_layer_delta,weighted_list)
#print last_layer_delta

#Reverse a list
#final_delta_list = final_delta_list[::-1]
#print final_delta_list
epochs = 50
alpha = 1
#update_weights(my_net,activation_list,final_delta_list,alpha,X)
#print my_net.weights
gradient_descent(my_net,train_data,valid_data,alpha,epochs)

Weights of 0  [[ 0.70026457 -0.62464159 -0.09193716 ..., -0.31574784  1.93584355
   1.02249842]
 [-2.00160571 -0.0941441   0.55190387 ..., -0.59356357  1.02823932
  -2.26108415]
 [-1.20389499 -0.17908109  0.96611776 ...,  0.61489999 -0.63692344
  -0.68398859]
 ..., 
 [ 0.27022388 -2.0083846   0.37044703 ...,  0.51091014  0.54152901
  -0.52570161]
 [-0.54796152 -0.39349886  0.97773076 ..., -1.65354212 -0.69426343
  -0.43742664]
 [-0.42743148 -0.05141475  1.18697514 ...,  0.93757159  0.63821346
  -0.73630351]]
Weights of 1  [[ -805.8336811   -750.5176864   -587.97800621  -595.5064649   -817.08406198
   -572.23003062  -841.60595549  -719.4565519   -624.60550096
   -719.69160245  -633.54786007  -569.40717905  -793.90209126  -456.1140454
   -554.05991619  -541.62449282]
 [-4669.11486699 -3912.09230059 -3775.59706086 -3542.59827769 -4851.1729509
  -3318.9361908  -4833.48553401 -3869.18237906 -3766.45324783
  -4436.42000875 -3892.92714149 -3019.53290247 -4861.89812931
  -2639.69971645 -3569.9

Accuracy after Epoch [ 2 ] 9.91
Weights of 0  [[ 0.70026457 -0.62464159 -0.09193716 ..., -0.31574784  1.93584355
   1.02249842]
 [-2.00160571 -0.0941441   0.55190387 ..., -0.59356357  1.02823932
  -2.26108415]
 [-1.20389499 -0.17908109  0.96611776 ...,  0.61489999 -0.63692344
  -0.68398859]
 ..., 
 [ 0.27022388 -2.0083846   0.37044703 ...,  0.51091014  0.54152901
  -0.52570161]
 [-0.54796152 -0.39349886  0.97773076 ..., -1.65354212 -0.69426343
  -0.43742664]
 [-0.42743148 -0.05141475  1.18697514 ...,  0.93757159  0.63821346
  -0.73630351]]
Weights of 1  [[-15856.8336811  -13293.0176864  -13130.47800621 -15646.5064649
  -13359.58406198 -15623.23003062 -13384.10595549 -15770.4565519
  -15675.60550096 -13884.06657552 -15684.54786007 -15620.40717905
  -15844.90209126 -15507.1140454  -15605.05991619 -15592.62449282]
 [-19160.61486699 -15988.34230059 -15851.84706086 -18034.09827769
  -16927.4229509  -17810.4361908  -16909.73553401 -18360.68237906
  -18257.95324783 -16455.13456172 -18384.4271

Accuracy after Epoch [ 5 ] 9.91
Weights of 0  [[ 0.70026457 -0.62464159 -0.09193716 ..., -0.31574784  1.93584355
   1.02249842]
 [-2.00160571 -0.0941441   0.55190387 ..., -0.59356357  1.02823932
  -2.26108415]
 [-1.20389499 -0.17908109  0.96611776 ...,  0.61489999 -0.63692344
  -0.68398859]
 ..., 
 [ 0.27022388 -2.0083846   0.37044703 ...,  0.51091014  0.54152901
  -0.52570161]
 [-0.54796152 -0.39349886  0.97773076 ..., -1.65354212 -0.69426343
  -0.43742664]
 [-0.42743148 -0.05141475  1.18697514 ...,  0.93757159  0.63821346
  -0.73630351]]
Weights of 1  [[-30907.8336811  -28344.0176864  -28181.47800621 -30697.5064649
  -28410.58406198 -30674.23003062 -28435.10595549 -30821.4565519
  -30726.60550096 -28935.06657552 -30735.54786007 -30671.40717905
  -30895.90209126 -30558.1140454  -30656.05991619 -30643.62449282]
 [-33652.11486699 -30479.84230059 -30343.34706086 -32525.59827769
  -31418.9229509  -32301.9361908  -31401.23553401 -32852.18237906
  -32749.45324783 -30946.63456172 -32875.9271

Accuracy after Epoch [ 8 ] 9.91
Weights of 0  [[ 0.70026457 -0.62464159 -0.09193716 ..., -0.31574784  1.93584355
   1.02249842]
 [-2.00160571 -0.0941441   0.55190387 ..., -0.59356357  1.02823932
  -2.26108415]
 [-1.20389499 -0.17908109  0.96611776 ...,  0.61489999 -0.63692344
  -0.68398859]
 ..., 
 [ 0.27022388 -2.0083846   0.37044703 ...,  0.51091014  0.54152901
  -0.52570161]
 [-0.54796152 -0.39349886  0.97773076 ..., -1.65354212 -0.69426343
  -0.43742664]
 [-0.42743148 -0.05141475  1.18697514 ...,  0.93757159  0.63821346
  -0.73630351]]
Weights of 1  [[-45958.8336811  -43395.0176864  -43232.47800621 -45748.5064649
  -43461.58406198 -45725.23003062 -43486.10595549 -45872.4565519
  -45777.60550096 -43986.06657552 -45786.54786007 -45722.40717905
  -45946.90209126 -45609.1140454  -45707.05991619 -45694.62449282]
 [-48143.61486699 -44971.34230059 -44834.84706086 -47017.09827769
  -45910.4229509  -46793.4361908  -45892.73553401 -47343.68237906
  -47240.95324783 -45438.13456172 -47367.4271

Accuracy after Epoch [ 11 ] 9.91
Weights of 0  [[ 0.70026457 -0.62464159 -0.09193716 ..., -0.31574784  1.93584355
   1.02249842]
 [-2.00160571 -0.0941441   0.55190387 ..., -0.59356357  1.02823932
  -2.26108415]
 [-1.20389499 -0.17908109  0.96611776 ...,  0.61489999 -0.63692344
  -0.68398859]
 ..., 
 [ 0.27022388 -2.0083846   0.37044703 ...,  0.51091014  0.54152901
  -0.52570161]
 [-0.54796152 -0.39349886  0.97773076 ..., -1.65354212 -0.69426343
  -0.43742664]
 [-0.42743148 -0.05141475  1.18697514 ...,  0.93757159  0.63821346
  -0.73630351]]
Weights of 1  [[-61009.8336811  -58446.0176864  -58283.47800621 -60799.5064649
  -58512.58406198 -60776.23003062 -58537.10595549 -60923.4565519
  -60828.60550096 -59037.06657552 -60837.54786007 -60773.40717905
  -60997.90209126 -60660.1140454  -60758.05991619 -60745.62449282]
 [-62635.11486699 -59462.84230059 -59326.34706086 -61508.59827769
  -60401.9229509  -61284.9361908  -60384.23553401 -61835.18237906
  -61732.45324783 -59929.63456172 -61858.927

KeyboardInterrupt: 