In [32]:
import numpy as np

In [64]:
class Network:
    def __init__(self, network_structure):
        #network_structure -> [2,3,4...5], each element represents the number of nodes
        # in the layer, and the length of the array is the number of layers
        self.input_matrix = input_matrix
        self.output_matrix = output_matrix
        self.number_of_training_examples = len(input_matrix)
        self.number_of_layers = len(network_structure)
        self.biases = []
        self.weights = []
        #initial biases and weights are all zeros
        for num_of_nodes in network_structure[1:]:
            layer_bias = np.zeros((num_of_nodes,1))
            self.biases.append(layer_bias)
        self.biases = np.array(self.biases)
        
        for previous_layer_num_nodes, current_layer_num_nodes in zip(network_structure[:-1],network_structure[1:]):
            layer_weights = np.zeros((current_layer_num_nodes, previous_layer_num_nodes))
            self.weights.append(layer_weights)
        self.weights = np.array(self.weights)
        

In [61]:
structure = np.array([2,4,3])
network = Network(structure)

In [62]:
network.biases

array([array([[ 0.],
       [ 0.],
       [ 0.],
       [ 0.]]),
       array([[ 0.],
       [ 0.],
       [ 0.]])], dtype=object)

In [63]:
network.weights

array([ array([[ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.]]),
       array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])], dtype=object)

Amazing! Things are going as planned!

In [1]:
class Network:
    def gradient_descent(self, original_value, learning_rate, gradient):
        #this function returns the new value after gradient descent
        return (original_value - learning_rate*gradient/self.number_of_training_examples)
    
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def sigmoid_prime(self, z):
        return sigmoid*(1-sigmoid)
    
    def layer_output(self, input_matrix, layer):
        layer_weights = self.weights[layer-1]
        layer_biases = self.biases[layer-1]
        z = np.dot(layer_weights, input_matrix) + layer_biases
        output_matrix = sigmoid(z)
        return (np.array(z), np.array(output_matrix))
    
    def cost_function_derivative(self, predicted_output):
        return (predicted_output - self.output_matrix)
    
    def feed_forward(self, input_matrix):
        current_input = input_matrix
        zs = []
        activations = [current_input]
        #Here we have feeded the input matrix into the network, computed all the activation values
        for layer in xrange(1, self.number_of_layers):
            (z, output) = layer_output(current_input, layer)
            activations.append(output)
            zs.append(z)
            current_input = output
            
        return (zs, activations)
    
    def back_propagation(self, input_matrix, output_matrix, zs, activations):
        #try np.zeros(self.biases.shape)
        delta_b = [np.zeros(self.biases.shape) for bias in self.biases]
        #try np.zeros(self.weights.shape)
        delta_w = [np.zeros(self.weights.shape) for weights in self.weights]
        delta = cost_function_derivative(activations[-1], output_matrix) * sigmoid_prime(zs[-1])
        
        for l in xrange(2,self.number_of_layers):
            #l represents the layers when we are coming back from output to input
            z = zs[-l]
            #weights(l-1) -> weights of (l-1) layer
            weights_l_1 = self.weights[-l+1]
            delta_l_1 = delta[-l+1]
            sigmoid_prime_z = sigmoid_prime(z)
            delta = np.dot(weights_l_1.transpose(), delta_l_1)*sigmoid_prime_z
            delta_b[-l] = delta
            delta_w[-l] = np.dot(delta, activations[-l-1].transpose())
        
        return (delta_b,delta_w)
    
    def fit(self, train_data, learning_rate, epochs, mini_batch_size, test_data):
        input_train_matrix = []
        output_train_matrix = []
        self.epochs = epochs
        for x, y in train_data:
            input_matrix.append(x)
            output_matrix.append(y)
            
        input_test_matrix = []
        output_test_matrix = []
        self.epochs = epochs
        for x, y in test_data:
            input_test_matrix.append(x)
            output_test_matrix.append(y)
            
        for epoch in xrange(self.epochs):
            mini_batch_inputs = [input_matrix[k:k+mini_batch_size] for k in xrange(0,len(input_matrix),mini_batch_size)]
            mini_batch_outputs = [output_matrix[k:k+mini_batch_size] for k in xrange(0, len(output_matrix), mini_batch_size)]
            
            for mini_batch_input, mini_batch_output in zip(mini_batch_inputs, mini_batch_outputs):
                zs, activations = feed_forward(mini_batch_input, mini_batch_output)
                delta_b, delta_w = back_propagation(mini_batch_input, mini_batch_output, zs, activations)
                self.biases = [gradient_descent(bias, learning_rate, db) for db, bias in zip(delta_b, self.biases)]
                self.weights = [gradient_descent(weight, learning_rate, dw) for dw, weight in zip(delta_w, self.weights)]
            
            correctly_predicted = 0
            for index, o in enumerate(output_test_matrix):
                i = input_test_matrix[index]
                predicted_output = self.feed_forward(i)
                predicted = True
                for p,a in zip(predicted_output, o):
                    if p != a:
                        predicted = False
                if predicted == True:
                    correctly_predicted = correctly_predicted + 1
                print("Epoch {0}: {1}/{2}").format(epoch, correctly_predicted,len(input_train_matrix))
            
        
                
        

In [91]:
import mnist_loader

In [92]:
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

IOError: [Errno 2] No such file or directory: '../data/mnist.pkl.gz'