In [1]:
import numpy as np
import random
random.random_state = 42
import gzip
import pickle
import matplotlib.pyplot as plt
from scipy.misc import imread, imresize, imshow
import time

In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'


%load_ext autoreload
%autoreload 2

In [3]:
class Network(object):
    '''layer_dims =list holding all units of each layer including input'''

    def __init__(self, layer_dims):
        '''Initialize all weights and biases
        
        Parameters =  a dict holding all weights and biases'''
        
        
        
        

        self.num_layers = len(layer_dims)#input, hidden, hidden output
        self.layer_dims = layer_dims #[3,7,7,1]
        self.parameters = {}
        
        for i in range(1,self.num_layers):
            self.parameters['w{}'.format(i)]= np.random.randn(layer_dims[i-1], layer_dims[i])*0.01
            self.parameters['b{}'.format(i)] = np.random.randn(1,layer_dims[i])*0.01
        

    def feedforward(self, a):
        
        '''feedforward function using sigmoid
        for all layers'''
        
        for i in range(1, self.num_layers):
            a =sigmoid(np.dot(a,self.parameters['w'+str(i)])+ self.parameters['b'+str(i)])
        return a
            
    def gather_backprop_data(self, x,y ):
        
        '''feedforward function used to gather
        all the zs (logits) and activations for backpropagation
        we also initialize all weight and biase gradients to 0
        
        '''
        
        gradients = {}    
        for i in range(0,self.num_layers-1):
            gradients['w{}'.format(str(i+1))]= np.zeros((self.layer_dims[i], self.layer_dims[i+1]), dtype = np.float32)
            gradients['b{}'.format(str(i+1))] = np.zeros((1,self.layer_dims[i+1]),dtype = np.float32)
    
        activations =[x] 
        zs = []
        a =x
        
        
        for i in range(1, self.num_layers):
            z= np.dot(a,self.parameters['w'+str(i)])+ self.parameters['b'+str(i)]
            zs.append(z)
            a = sigmoid(z)
            activations.append(a)
            
        return gradients, activations, zs
    

    def calculate_gradients(self,x, y, lambd):
        
        
        '''calculate all gradients with respect to
        cost. Here our cost function is cross_entroupy
        
        last_layer_z_error = dC/dZ  (z is logit)
        All weight gradients also include regularazation gradients
        
        '''
        
        
        
##### First we calculate the output layer gradients #########
        
        gradients, activations, zs = self.gather_backprop_data(x,y)
        
        #gradient of cost with respect to  Z of last layer
        last_layer_z_error = ((activations[-1] - y)) 
        
        
        
        #updating the weight_derivatives of final layer
        gradients['w'+ str(self.num_layers -1)] = np.dot(activations[-2].T,last_layer_z_error)/x.shape[0] + (lambd/x.shape[0])*(self.parameters['w'+ str(self.num_layers -1)])
        
        gradients['b'+ str(self.num_layers -1)] = np.mean(last_layer_z_error, axis =0)
        gradients['b'+ str(self.num_layers -1)] = np.expand_dims(gradients['b'+ str(self.num_layers -1)],0)
    

###HIDDEN LAYER GRADIENTS###

        z_previous_layer = last_layer_z_error
        
        
       
        for i in reversed(range(1,self.num_layers -1)):
            z_previous_layer =np.dot(z_previous_layer,self.parameters['w'+ str(i+1)].T, )*\
                                 (sigmoid_derivative(zs[i-1]))
                
            gradients['w'+str(i)] = np.dot((activations[i-1].T),z_previous_layer)/x.shape[0] + (lambd/x.shape[0])*(self.parameters['w'+str(i)])
            gradients['b'+str(i)] = np.mean(z_previous_layer, axis =0) 
            gradients['b'+str(i)] = np.expand_dims(gradients['b'+str(i)],0)
            
 
        return gradients
    
    
    
    def accuracy (self, testing_x, testing_y):
        
        results =  [(np.argmax(self.feedforward(x)),np.argmax(y))\
                    for x,y in zip(testing_x,testing_y)]
        
        return results, sum([x==y for x,y in results])/len(results)
    
    def predict(self,x):
        a = (self.feedforward(x))
        return np.argmax(a)
        



    def stochastic_gradient_decent(self,training_X, training_Y,testing_X,testing_Y,epochs,\
                                                    splits,learning_rate, lambd):
        print('pre training accuracy:', self.accuracy(testing_X,testing_Y)[1])
        
    
        for epoch in range(epochs):
            indices =np.random.permutation(len(training_X))
            training_X = training_X[indices]
            training_Y = training_Y[indices]
            #random.shuffle(zip(training_X,training_Y))
            mini_batches_X, mini_batches_Y =\
                                 create_mini_batches(training_X, training_Y, splits = splits)
            for  mini_batch_X, mini_batch_Y in zip(mini_batches_X, mini_batches_Y):
               
            
                    
                    
                    gradients = self.calculate_gradients(mini_batch_X, mini_batch_Y, lambd)
                  
                    for i in range(1,self.num_layers):
                        
                        self.parameters['w'+str(i)] = self.parameters['w'+str(i)]- learning_rate * (gradients['w'+str(i)])
                        self.parameters['b'+str(i)] = self.parameters['b'+str(i)] - learning_rate * (gradients['b'+str(i)])
                        
            
            print('post {} epochs training accuracy:'.format(str(epoch+1)), self.accuracy(testing_X,testing_Y)[1])
            
           
            
        #print('error is {0:.3f}'.format(error/len(testing_data)))
        return self.parameters

 

#### Activation functions

In [4]:
#activation functions

def sigmoid(z):
        return 1.0/(1.0+np.exp(-z))
    
def sigmoid_derivative(z):
    
    return sigmoid(z) * (1-sigmoid(z))

#### load data function

In [5]:
def load_data():

    f = gzip.open('C:\\Users\\Moondra\\Desktop\\Computer Vision\\My attempts\\data\\mnist_expanded.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f)
    f.close()  
    training_X =training_data[0].reshape(250000, 784)
    training_Y =training_data[1].reshape(250000,1)
    training_Y = np.eye(10)[training_Y].reshape(250000,10)   #np.eye(n_labels)[target_vector]
    test_X = test_data[0].reshape(10000, 784)
    test_Y = test_data[1].reshape(10000,1)
    test_Y = np.eye(10)[test_Y].reshape(10000,10)
    validation_X = validation_data[0]
    validation_Y= validation_data[1].reshape(10000,1)
    validation_Y = np.eye(10)[validation_Y].reshape(10000,10)
    
    return (training_X,training_Y ,test_X, test_Y, validation_X,validation_Y)
    

####  Create batches for SGD function

In [6]:
def create_mini_batches(training_X, training_Y, splits = 5000):
    
    
    
    mini_batches_training_X = np.split(training_X, 5000)
        
    mini_batches_training_Y = np.split(training_Y, 5000)
    
    assert len(mini_batches_training_X) == len(mini_batches_training_Y)
                
    
    return mini_batches_training_X, mini_batches_training_Y

####  Run main code


In [16]:
training_X,training_Y,test_X, test_Y, VD_X, VD_Y= load_data()

In [17]:
NN =Network([784,60,20, 10]) #input, hidden nodes, output nodes

new_parameters= NN.stochastic_gradient_decent(training_X, training_Y,test_X,test_Y,epochs =100,\
                                                    splits =5000,learning_rate =.1, lambd = .50)  #train data, test_data,
                                                                     #epochs =30, batch_size =10
                                                                    #learning_rate = 1.5

pre training accuracy: 0.0982
post 1 epochs training accuracy: 0.101
post 2 epochs training accuracy: 0.101
post 3 epochs training accuracy: 0.1135
post 4 epochs training accuracy: 0.1135
post 5 epochs training accuracy: 0.0974
post 6 epochs training accuracy: 0.1135
post 7 epochs training accuracy: 0.1009
post 8 epochs training accuracy: 0.1028
post 9 epochs training accuracy: 0.0982
post 10 epochs training accuracy: 0.1135
post 11 epochs training accuracy: 0.1135
post 12 epochs training accuracy: 0.1135
post 13 epochs training accuracy: 0.1135
post 14 epochs training accuracy: 0.1135
post 15 epochs training accuracy: 0.1135
post 16 epochs training accuracy: 0.1135
post 17 epochs training accuracy: 0.1135
post 18 epochs training accuracy: 0.1135
post 19 epochs training accuracy: 0.1135
post 20 epochs training accuracy: 0.1028
post 21 epochs training accuracy: 0.1135
post 22 epochs training accuracy: 0.1135
post 23 epochs training accuracy: 0.1135
post 24 epochs training accuracy: 0.10

KeyboardInterrupt: 

# Scores:

1) new_parameters= NN.stochastic_gradient_decent(training_X, training_Y,test_X,test_Y,epochs =100,\
                                                    splits =5000,learning_rate =.01, lambd = .50)


**post 57 epochs training accuracy: 0.1135**
    
    
2) new_parameters= NN.stochastic_gradient_decent(training_X, training_Y,test_X,test_Y,epochs =100,\
                                                    splits =5000,learning_rate =.1, lambd = .50)

**post 40 epochs training accuracy .88 - .93 -  

(fluctuated back and forth for aboutr 20 epochs)


3) new_parameters= NN.stochastic_gradient_decent(training_X, training_Y,test_X,test_Y,epochs =100,\
                                                    splits =5000,learning_rate =.1, lambd = .90)

**post 20 epochs tranining accuracy  .11 percent steady**
                                                    
                                                    
                                                    
4) NN =Network([784,60,20, 10]) #input, hidden nodes, output nodes

new_parameters= NN.stochastic_gradient_decent(training_X, training_Y,test_X,test_Y,epochs =100,\
                                                    splits =5000,learning_rate =.1, lambd = 2.0) 
                                                    
                                                    
**post 20 epochs tranining accuracy  .11 percent steady**


5) NN =Network([784,60,20, 10]) #input, hidden nodes, output nodes

new_parameters= NN.stochastic_gradient_decent(training_X, training_Y,test_X,test_Y,epochs =100,\
                                                    splits =5000,learning_rate =1, lambd = .80)
                                                    
** post 10 epochs training accuracy .11 percent


5)NN =Network([784,60,20, 10]) #input, hidden nodes, output nodes

new_parameters= NN.stochastic_gradient_decent(training_X, training_Y,test_X,test_Y,epochs =100,\
                                                    splits =5000,learning_rate =.1, lambd = 0)
                                                    
**post 15 epochs training accuracy = 97%**
                                                   
                                                    

In [None]:
NN.accuracy(VD_X, VD_Y)

In [None]:
3+
2