In [186]:
import numpy as np
import random 

In [187]:
## classs NetworkLayer
# The hidden layer and the output layer will be represented as neurons. 
# Each neuron needs a net input and an output (sigmoid on net input)
# 
# variables: 
#   
# methods: 
#   calc_weight_output 
# 
#   calc_output
#     sigmoid(calc_net_input)
#   
#   d_etot_out (target)
# 
#   d_out_net
# 
#   d_net_weight(index of weight)
class NetworkLayer: 
    
    ## weights is a numpy matrix of initial weights 
    # rows of weight matrix correspond to each neurons weight
    # (row i contains the weights leading into the i'th neuron of this layer)
    def __init__(self, weights): 
        self.weights = weights
    

In [188]:
## class NeuralNetwork
# Groups of neurons form layers in the neural network. 
# The first layer is just the inputs (no need to represent these as neurons). 
# The second layer is the hidden layer. 
# The third layer is the output layer. 
# 
# #change to batches 
# Loop until (convergence or large number reached)
#   First, the network will make a forward pass through all the layers and output a prediction.
#   {DEBUG INFO} print total error before weight update 
#   Second, the network will perform backpropagation to update the neuron weights. 
#     [backpropagation on output layer] The network will first update the output layer weights. 
#     [backpropagation on hidden layer] The network will then update the hidden layer weights. 
#   {DEBUG INFO} print the total error after updating all weights, it should have decreased 
# methods: 
#   forward_pass(layer)
#     run calc_weight_output on every neuron in layer 
#   
#   backpropagation
#     calcute gradient
#     update weights 
#   
#   train(training_data, epochs, batch_size)
class NeuralNetwork: 
    
    def __init__(self, input_layer_size, hidden_layer_size, output_layer_size):
        
        # Input layer is just training data
        
        #############################
        # Hidden layer
        # each neuron in the hidden layer as a weight connecting it to every point in the training data 
        #
        
        # make random initial weights 
        self.hidden_layer_size = hidden_layer_size
        
        rand_hidden_weights = np.random.randn(hidden_layer_size, input_layer_size)
        
        self.hidden_layer = NetworkLayer(rand_hidden_weights)
        #
        # End Hidden Layer 
        ##############################
        
        
        ##############################
        # Output Layer
        #
        self.output_layer_size = output_layer_size 
        rand_output_weights = np.random.randn(output_layer_size, hidden_layer_size)
        
        self.output_layer = NetworkLayer(rand_output_weights)
        #
        # End Output Layer 
        ################################
        
            
    def train(self, training_data, test_data, epochs, batch_size, alpha): 
        
        # ensure training data is a numpy matrix 
        
        for i in range(epochs): 
            
            # shuffle the training data 
            #random.shuffle(training_data)
            
            # create a set of batches according to batch_size
            #batch_groups = []
            #for i in xrange(0, len(training_data), batch_size): 
            #    batch_groups.append(training_data[i:i+batch_size])
            
            # for each batch 
            #for batch in batch_groups: 
                # run forward_pass
                
                # run backpropagation (and update weights)
            for inputs,outputs in training_data: 
                self.forward_pass(inputs)
                self.backpropagation(inputs, outputs, alpha)
            self.run_test(test_data)
            # print error
            # run forward_pass on test data 
            
    def run_test(self, test_data): 
        
        network_test_predictions = []
        expected_test_output = []
        for inputs, outputs in test_data: 
            network_test_predictions.append(self.forward_pass(inputs))
            #expected_test_predictions.append(outputs)
            expected_test_output.append(outputs)
        
        #network_test_output = forward_pass(test_input)
        #network_test_output = network_test_output.argmax(0)
        network_test_output = np.argmax(np.asarray(network_test_predictions), axis=1)
        #expected_test_output = test_output.argmax(0)
        #expected_test_output = np.argmax(np.asarray(expected_test_predictions), axis=1)
        
        correct = 0
        for i in range(len(expected_test_output)): 
            correct = correct + int(network_test_output[i][0] == expected_test_output[i])
        
        print ("Test Results: " + str(correct) + "/" + str(len(expected_test_output)) 
                + " = " + str(correct/float(len(expected_test_output)) * 100) +"%") 
    
    def backpropagation(self, inputs, outputs, alpha): 
        
        #calculate delta of all neurons 
        # delta is different for hidden layer neurons vs. output layer neurons 
        
        # delta for output layer neurons 
        # delta = 
        # (output of current output neuron - target) 
        # * (output of current output neuron) * (1 - output of current output neuron) 
        # size of output deltas = size of output neuron layer 
        #self.output_deltas = np.zeros(self.output_layer_size, 1)
        self.output_deltas = ((self.output_layer_output - outputs) 
                              * (self.output_layer_output * (1 - self.output_layer_output)))
        
        # delta for hidden layer neurons 
        # delta = 
        # [for each output layer neuron(
        # (output of output neuron - target)  
        # * (output of output neuron) * (1 - output of output neuron) 
        # * output layer weight from the current hidden neuron to for loop output neuron  )]
        # * (output of current hidden neuron) * (1 - output of current hidden neuron)
        # size of hidden deltas = size of hidden neuron layer 
        
        #added transpose on line below
        #temp_sum = np.sum((self.output_deltas * self.output_layer.weights.transpose()), axis=1)
        temp_sum = np.dot(self.output_layer.weights.transpose(), self.output_deltas)
        #print temp_sum.shape
        #print self.hidden_layer_output.shape
        self.hidden_deltas = (temp_sum * self.hidden_layer_output * (1 - self.hidden_layer_output))
        
        # gradient = delta of neuron * input to neuron 
        # gradients for input to hidden weights = np.dot( delta of hidden neurons, inputs to network )
        # gradients for hidden to output weights = np.dot( delta of output neurons, outputs of hidden neurons )
        # gradient matrix should match size of weights matrix 
        self.output_gradients = np.dot(self.output_deltas, self.hidden_layer_output.transpose())
        self.hidden_gradients = np.dot(self.hidden_deltas, self.network_input.transpose())
        
        # use matrix operations below 
        # update amount = learning rate * -1 * gradient
        output_update_amounts = alpha * -1 * self.output_gradients 
        hidden_update_amounts = alpha * -1 * self.hidden_gradients 
        
        self.output_layer.weights = self.output_layer.weights + output_update_amounts
        self.hidden_layer.weights = self.hidden_layer.weights + hidden_update_amounts 
        
    def forward_pass(self, data): 

        ######################################
        ### Forward pass through hidden layer 

        # input for hidden layer 
        self.network_input = data
        
        # dot product of hidden layer weights and network inputs 
        hidden_layer_net_input = np.dot(self.hidden_layer.weights, self.network_input)
        
        # apply activation(sigmoid) to dot product 
        self.hidden_layer_output = sigmoid(hidden_layer_net_input)
        
        
        #####################################
        ### Forward pass through output layer 
        
        # input for output layer is output of hidden layer 
        output_layer_input = self.hidden_layer_output
        
        # dot product of output layer weights and hidden layer output 
        output_layer_net_input = np.dot(self.output_layer.weights, output_layer_input)
        
        # apply activation(sigmoid) to dot product 
        self.output_layer_output = sigmoid(output_layer_net_input)
        
        return self.output_layer_output
        
        
    def sigmoid(self,t):
        return 1.0/(1.0+np.exp(-t))
        
    
    

In [189]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

neuralnet = NeuralNetwork(len(training_data[0][0]), 15, 10)
neuralnet.train(training_data, test_data, 1000, 777, 0.5)

Test Results: 8876/10000 = 88.76%
Test Results: 8957/10000 = 89.57%


KeyboardInterrupt: 