# Implementing a neural network from scratch

Trying to take an object oriented approach and make this as intuitive as possible.
Probably going to test this out on MNIST, then will convert it into a Tensorflow NN.

Note: this is not a self-contained tutorial on how neural networks work but I explain why certain functions are the way they are for clarity's sake. I will assume anyone using this for self-education purposes has read about the basic structure of neural networks/perceptrons.

In [1]:
import math
import numpy as np
from sklearn.datasets import fetch_mldata

In [125]:
"""
Load the MNIST dataset
Contains 70,000 examples of hand-written digits in 28x28 pixel form (784)
"""
mnist = fetch_mldata('MNIST original')

# The original data
# Stored as a 70,000 x 784 array
data = mnist["data"]

# Take first 80% for training, last 20% for testing
training_data = data[0 : math.floor(len(data)*0.8)]
testing_data = data[math.floor(len(data)*0.8) : len(data)]

print("Total data length:",len(data))
print("Training data length:",len(training_data))
print("Testing data length:",len(testing_data))

# The target labels for classifying the data
# [0,1,2,3,4,5,6,7,8,9]
targets = mnist["target"]

Total data length: 70000
Training data length: 56000
Testing data length: 14000


In [112]:
class NeuralNetwork:
    """
    A simple feed forward neural network
    
    The goal here is to abstract each component of the network (neuron, layer, synapses)
    to a high level to easily visualize how the network operates and how it trains
    
    The network records the state of the weights/neurons at each epoch and exports
    a JSON file representing the entire training process
    """
        
    def __init__(self, input_size, hidden_size, output_size):
        """
        Initializes the network given input layer,
        hidden layer, and output layer neuron size
        """
        
        # LAYERS
        # Each layer is an array of Neuron objects
        self.input_layer = Layer(input_size)
        self.hidden_layer = Layer(hidden_size)
        self.output_layer = Layer(output_size)
        
        # Define a larger object which contains all layers
        self.layers = [self.input_layer,
                       self.hidden_layer,
                       self.output_layer]
        
        # SYNAPSES
        # input -> hidden layer synapses
        self.input_to_hidden_synapses = Synapses(input_size, hidden_size)
        
        # hidden -> output synapses
        self.hidden_to_output_synapses = Synapses(hidden_size, output_size)
    
    
    #########################################
    # MARK: Training and processing functions
    #########################################
    
    def train(self, epochs, learning_rate):
        """
        Trains the network
        
        Epochs: the number of iterations
        Learning rate: the size of the "steps" that gradient descent takes
            - higher learning rate means larger steps but also risks
              "overstepping" the minimum of the curve
            - low learning rate is more precise but calculating the gradient itself
              is computationally expensive so there is a good middle ground
        """
        return 0
    
    def forward_propogate(self, data, using_matrix_multiplication=True):
        """
        Passes an input data vector (an image grid, a list of tuples, etc) through
        the network and outputs a prediction from the output layer
        """
        # matrix multiplication allows us to compute weight x input calculations
        # in large batches and is super efficient
        if using_matrix_multiplication:
            return 0
        
        # otherwise, manually compute the sums of weights/inputs in each neuron
        
        # first pass the data from the input neurons to the hidden layer
        # multiply all of the inputs by their respective weights
        # add them up at each neuron
        # pass that through the activation function
            # you can call the function on an entire matrix
        
        #second pass that new number and multiply by second set of weights
        # add all of them up in the ouput neuron
        # apply the activation one more time
        # that's your answer!
        
        # first implement it manually with scalar multiplication 
        # then show how it's done with matrix multiplication
    
    def backward_propogate(self):
        """
        Updates the weights of the network based on training data to make the
        network more accurate
        """
        return 0
    
    ##########################################
    # MARK: Export and visualization functions
    ##########################################
    
    def export_network(self):
        """
        Exports the network as a list of objects that
        represent the entire state of the network
        
        Can use this for visualization purposes
        
        1. Network object: 
        {
            input_neurons: [], -> doesn't change with training
            training_states: [
                { State 1 }, 
                { State 2 },
                { State 3 },
                ...
                { State n (n = number of epochs) }
            ],
            output_neurons: [] -> doesn't change with training
        }
        
            State object:
            #note: only needs to include the variables that change
            {
                 input_to_hidden_synapses: [], -> matrix of weights
                 hidden_to_output_synapses: [], -> matrix of weights
                 hidden_layer: [] -> list of tuples: (sum, activated_sum)
            }
        """
        return self

In [106]:
class Layer:
    """
    Stores a list of neurons
    Initialized with the number of neurons in the layer
    """
    def __init__(self, size):
        self.neurons = [Neuron() for _ in range(size)]

In [107]:
class Neuron:
    """
    Takes in data and applies an activation function to pass to the next layer
    
    These functions are really only used if we don't use matrix multiplication to
    do all of the operations at once...
    
    But it's important to understand how this is happening on an individual-neuron level
    """
    
    # 'all_values' stores all of the previous values for visualization
    # Format: [(sum, activated_sum)] list of tuples containing the sum, then activated sum
    # This way we can visualize the process of the data actually being passed into the neuron
    # and then being processed
    
    all_values = []
    
    def process_input(weights, inputs):
        """
        performs matrix multiplication between the weights and the inputs
        then passes it to the activation function
        """
        return 0
    
    def activate(self, x, deriv=False):
        """
        activation sigmoid function
        takes in the summed weights * inputs
        """
        if deriv:
            return x*(1-x)
        return 1/(1+np.exp(-x))

In [108]:
class Synapses:
    """ 
    The connections in between layers
    Represented as a matrix of weights
    
    The synapses store all of its previous weights to visualize the training process
    """
    def __init__(self, rows, colums):
        # this initializes the weights to a random Column-d array
        self.weights = np.random.randn(rows, colums)
    
    # updates the weights
    def update():
        return 0

In [109]:
# Create an example network 
nn = NeuralNetwork(input_size=2,
                   hidden_size=3,
                   output_size=1)