# Implementing a neural network from scratch

Trying to take an object oriented approach and make this as intuitive as possible.

Testing this out on MNIST (the handwritten digit dataset), then will convert it into a Tensorflow NN. Current plan is to use Tensorflow.js and create a web app that shows the training process in real-time with D3 + React. Might also do a visualization in 3dsmax/cinema4d or something

In [189]:
import math
import numpy as np
from sklearn.datasets import fetch_mldata

In [223]:
"""
Load the MNIST dataset
Contains 70,000 examples of hand-written digits in 28x28 pixel form (784 item array)
So the shape is (70000, 784)
"""
mnist = fetch_mldata('MNIST original')

# The original data
# Stored as a 70,000 x 784 array
data = mnist["data"]

# Normalize the data because every grayscale value is out of 255
data = data/255

# Take first 90% for training, last 10% for testing
training_proportion = 0.9
training_index = math.floor(len(data)*training_proportion)

training_data = data[0 : training_index]
testing_data = data[training_index : len(data)]

print("Total data length:",len(data))
print("Training data length:",len(training_data))
print("Testing data length:",len(testing_data))

# The target labels for classifying the data
# These are used to actually train the network for a given input
targets = mnist["target"]
target_vals = list(set(targets))

# use the training targets to calculate output error during back propogation
training_targets = targets[0 : training_index]

# use the testing targets to predict accuracy
testing_targets = targets[training_index : len(targets)]

print("Targets: ", target_vals)

Total data length: 70000
Training data length: 63000
Testing data length: 7000
Targets:  [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]


In [292]:
class NeuralNetwork:
    """
    A basic 3 layer feed forward neural network
    
    The goal here is to abstract each component of the network (neuron, layer, synapses)
    to a high level to easily visualize how the network operates and how it trains
    
    The network records the state of the weights/neurons at each epoch and exports
    a JSON file representing the entire training process
    """
        
    def __init__(self, input_size, hidden_size, output_size):
        """
        Initializes the network given input layer,
        hidden layer, and output layer neuron size
        """
        
        # DEFINE LAYERS
        # Each layer is an array of Neuron objects
        self.input_layer = Layer(size=input_size, name="Input")
        self.hidden_layer = Layer(size=hidden_size, name="Hidden")
        self.output_layer = Layer(size=output_size, name="Output")
        
        # Define a larger object which contains all layers
        # for easy iteration
        self.layers = [self.input_layer,
                       self.hidden_layer,
                       self.output_layer]
        
        # DEFINE SYNAPSES
        # Represented by matrices
        # input -> hidden layer synapses
        self.input_to_hidden_synapses = Synapses(input_size, hidden_size)
        
        # hidden -> output synapses
        self.hidden_to_output_synapses = Synapses(hidden_size, output_size)
    
    def __str__(self):
        """
        Returns a string representation of the network
        Prints the layers with the synapses in between
        """
        delim = "\n"
        return delim.join([str(layer) for layer in self.layers])
        
    #########################################
    # MARK: Training and processing functions
    #########################################
    
    def train(self, data, targets, iterations, learning_rate):
        """
        Trains the network
        
        Data:
        The training data represented as 784-dimensional vectors
        
        Targets:
        The sample labels
        
        Iterations: how many times we train the network
            - on each iteration of training, predict an output and update
            the weights of the network via back propogation
        
        Learning rate: the size of the "steps" that gradient descent takes
            - higher learning rate means larger steps but also risks
              "overstepping" the minimum of the curve
            - low learning rate is more precise but calculating the gradient itself
              is computationally expensive so there is a good middle ground
        """
        for i in range(iterations):
            
            # grab the expected result from the targets/labels array
            expected = targets[i]
            
            # grab the relevant training sample to test against the expected label
            training_sample = np.asmatrix(data[i])
            
            # calculate the predicted value via forward propogation
            prediction = self.forward_propogate(training_sample)
            
            print("\nExpected: ", expected)
            print("Prediction: ", prediction)
            
            # update the weights of the network via backward propogation
            self.back_propogate(training_sample, expected, prediction)
                
    def forward_propogate(self, data, using_matrix_multiplication=True):
        """
        Passes an input data vector (an image grid, a list of tuples, etc) through
        the network and outputs a prediction from the output layer
        """
        
        # matrix multiplication allows us to compute weight x input calculations
        # in large batches and is super efficient
        if using_matrix_multiplication:
            
            # First, multiply the data and synapse matrices to sum up each
            # input / weight combination
            self.input_to_hidden_sum = np.dot(data,
                                              self.input_to_hidden_synapses.weights)
                        
            # Pass the entire summed matrix into the sigmoid function
            self.input_to_hidden_activated = self.activate(self.input_to_hidden_sum)
            
            # Multiply the activated input/hidden layer by
            # the second set of weights

            self.hidden_to_output_sum = np.dot(self.input_to_hidden_activated,
                                               self.hidden_to_output_synapses.weights)
            
            # Finally, pass the last sum of hidden -> output neurons
            # into the sigmoid
            final_prediction = self.activate(self.hidden_to_output_sum)
                    
        # otherwise, manually compute the sums of weights/inputs in each neuron
        
        # first pass the data from the input neurons to the hidden layer
        # multiply all of the inputs by their respective weights
        # add them up at each neuron
        # pass that through the activation function
            # you can call the function on an entire matrix
        
        #second pass that new number and multiply by second set of weights
        # add all of them up in the ouput neuron
        # apply the activation one more time
        # that's your prediction!
        
        return final_prediction
    
    def back_propogate(self, data, expected, output):
        """
        Updates the weights of the network based on training data to make the
        network more accurate
        """
        self.output_err = expected - output
        self.output_delta = self.output_err*self.activate(output, deriv=True)
        
        self.hidden_layer_err = self.output_delta.dot(self.hidden_to_output_synapses.weights.T)
        self.hidden_layer_delta = self.hidden_layer_err*self.activate(self.input_to_hidden_activated, deriv=True)
        
        self.input_to_hidden_synapses.update(data.T.dot(self.hidden_layer_delta))
        self.hidden_to_output_synapses.update(self.input_to_hidden_activated.T.dot(self.output_delta))
    
    def activate(self, x, deriv=False):
        """
        activation sigmoid function
        takes in the summed weights * inputs
        """
        if deriv:
            return x*(1-x)
        return 1/(1+np.exp(-x))
    
    ##########################################
    # MARK: Export and visualization functions
    ##########################################
    
    def export_network(self):
        """
        Exports the network as a list of objects that
        represent the entire state of the network
        
        Can use this for visualization purposes
        
        1. Network object: 
        {
            input_neurons: [],
            training_states: [
                { State 1 }, 
                { State 2 },
                { State 3 },
                ...
                { State n (n = number of epochs) }
            ],
            output_neurons: []
        }
        
            State object:
            (note: only needs to include the variables that change)
            {
                 input_to_hidden_synapses: [], -> matrix of weights
                 hidden_to_output_synapses: [], -> matrix of weights
                 hidden_layer: [] -> list of tuples: (sum, activated_sum)
            }
        """
        return {}

In [293]:
class Layer:
    """
    Stores a list of neurons
    Initialized with the number of neurons in the layer
    """
    def __init__(self, size, name):
        self.name = name
        self.neurons = [Neuron(neuron_id=i) for i in range(size)]
    
    def __str__(self):
        """
        Prints out the name of the layer and the neurons
        """
        s = "Layer: {}\n".format(self.name)
        delim = "\n"           
        return s + delim.join([str(n) for n in self.neurons])

In [294]:
class Neuron:
    """
    Takes in data and applies an activation function to pass to the next layer
    
    These functions are really only used if we don't use matrix multiplication to
    do all of the operations at once...
    
    But it's important to understand how this is happening on an individual-neuron level
    """
    
    # 'states' stores all of the previous values for visualization
    # Format: [(sum, activated_sum)] list of tuples containing the sum, then activated sum
    # This way we can visualize the process of the data actually being passed into the neuron
    # and then being processed
        
    def __init__(self, neuron_id):
        # Stores a single integer as the neuron's ID to identify which weights are relevant to it
        # during forward propogation
        self.neuron_id = neuron_id
        
        # Stores all of the previous values for visualization
        # Format: [(sum, activated_sum)] list of tuples containing the sum, then activated sum
        # This way we can visualize the process of the data actually being passed into the neuron
        # and then being processed
        self.states = []
    
    def process_input(self, weights, inputs):
        """
        performs matrix multiplication between the weights and the inputs
        then passes it to the activation function
        
        for example:
        for neuron 1:
        obtain all weights of form W11, W21, W31...WN1
        obtain all input neuron values I1, I2, I3...IN
        multiply each of them together and them add them up
        
        pass that sum to the activation function
        """
        return
    
    def activate(self, x, deriv=False):
        """
        activation sigmoid function
        takes in the summed weights * inputs
        """
        if deriv:
            return x*(1-x)
        return 1/(1+np.exp(-x))
    
    def save_current_state(self, processed, activated):
        """
        Saves the current state of the neuron into the
        states array given the processed and activated values
        """
        return
    
    def __str__(show_data=False):
        """
        Prints out the number of the neuron
        Shows the current sum and activated sum if specified
        """
        space = "    "
        return space + "Neuron"

In [295]:
class Synapses:
    """ 
    The connections in between layers
    Represented as a matrix of weights
    
    The synapses store all of its previous weights to visualize the training process
    """
    def __init__(self, rows, colums):
        # this initializes the weights to a random Column-d array
        self.weights = np.random.randn(rows, colums)
    
    def __str__():
        """
        Prints out the weights and the labels for each weight
        """
        return ""
        
    # updates the weights
    def update(new_weights):
        self.weights += new_weights

In [296]:
# Create an example network 
nn = NeuralNetwork(input_size=784,
                   hidden_size=15,
                   output_size=1)

In [297]:
# Train the network
nn.train(data=training_data,
         targets=training_targets,
         iterations=1000,
         learning_rate=0.01)


Expected:  0.0
Prediction:  [[0.26416604]]


ValueError: shapes (1,15) and (1,15) not aligned: 15 (dim 1) != 1 (dim 0)