# Preface
A significant proportion of this project was inspired by videos from 3Blue1Brown. Along with that, this will come heavily inspired from Neural Networks and Deep Learning, the free online book. It can be reached [neuralnetworksanddeeplearning.com](http://neuralnetworksanddeeplearning.com/). This version is outdated so many components were changed into how I conceptually understand them.

In [25]:
# Imports
import random
import numpy as np
from itertools import permutations

In [50]:
class Network(object):
    """
    Creates a Neural Network in specified layers with implemented Neural Network Methods
    """
    def __init__(self, sizes : list[int]):
        """
        Initializes the Network with random, normally distributed, biases and weights. Uses Xavier initializatoin.

        Args:
            sizes (list): List of the number of neurons in each layer
        """
        self.sizes = sizes
        self.biases = [np.random.randn(layer, 1) for layer in sizes[1:]]
        self.weights = [np.random.randn(layer, prev) for prev, layer in zip(sizes[:-1], sizes[1:])]
    
    
    
    def feedForward(self, inputs : list[np.ndarray]):
        """
        Returns the output of the network given the input list

        Args:
            inputs (list): Input to the layer
        """
        for bias, weight in zip(self.biases, self.weights):
            inputs = np.tanh(np.dot(weight, inputs) + bias)
        return inputs
    
    
    
    def SGD(self, training_data : list[tuple[np.ndarray, np.ndarray]], epochs : int, mini_batch_size : int, learningRate : float, test_data : list[tuple[np.ndarray, np.ndarray]] =None) -> None:
        """
        Trains the network using Stochastic Gradient Descent. 

        Args:
            training_data (list of tuples): Training Data for the network
            epochs (int): number of iterations to adjust network
            mini_batch_size (int): size of each batch to pass through
            learningRate (float): Amount to shift layers in backpropogration
            test_data (list, optional): Passed through test data. Defaults to None.
        """
        
        for i in range(epochs):
            np.random.shuffle(training_data)
            mini_batches = [training_data[j:j + mini_batch_size] for j in range(0, len(training_data), mini_batch_size)]
            
            for batch in mini_batches:
                self.update_mini_batch(batch, learningRate)
                
            if test_data:
                print(f'Epoch {i + 1}: {self.evaluate(test_data)} / {len(test_data)}') 
            else:
                print(f'Epoch {i + 1} complete!')
        
        
        
    def update_mini_batch(self, mini_batch : list[tuple[np.ndarray, np.ndarray]], learningRate : float):
        """
        Updates the weights and biases using SGD and Back Propogation

        Args:
            mini_batch (list[tuple[np.ndarray, np.ndarray]]): list containing a subset of the training data
            learningRate (float): Amount by which to shift existing layers after backpropogration
        """
        new_biases = [np.zeros(b.shape) for b in self.biases] #array of zeros w/ same dimension [( 1, 2), (3, 4)] -> [(0, 0), (0, 0)] 
        new_weights = [np.zeros(w.shape) for w in self.weights]
        
        for x, y in mini_batch:
            change_new_biases, change_new_weights = self.backprop(x.reshape(-1, 1), y.reshape(-1, 1))
            new_biases = [cur + change for cur, change in zip(new_biases, change_new_biases)]
            new_weights = [cur + change for cur, change in zip(new_weights, change_new_weights)]
        # Move opposite of maximum gain (in error)
        self.weights = [weight - (learningRate / len(mini_batch)) * new_weight for weight, new_weight in zip(self.weights, new_weights)]
        self.biases = [bias - (learningRate / len(mini_batch)) * new_biases for bias, new_biases in zip(self.biases, new_biases)] 
        
        
    def backprop(self, x, y) -> tuple:
        """
        Return a tuple representing the gradient for the cost function. 

        Args:
            x (np.ndarray): Input array
            y (np.ndarray): Expected output array
        """
        new_biases = [np.zeros(b.shape) for b in self.biases] #array of zeros w/ same dimension [( 1, 2), (3, 4)] -> [(0, 0), (0, 0)] 
        new_weights = [np.zeros(w.shape) for w in self.weights]
        
        #feed forward
        activation = x
        activations = [x] #To store all activations in each layer
        zs = [] #To store all z vectors
        
        # Iterate across layers
        for bias, weight in zip(self.biases, self.weights):
            z = np.dot(weight, activation) + bias # Have to reshape to correct dimensionality
            zs.append(z)
            activation = np.tanh(z)
            activations.append(activation)
        
        # backwards pass
        delta = self.cost_derivative(activations[-1], y) * (1 / np.cosh(zs[-1])) ** 2 # Find the cost of our result (sech^2 from the derivative of tanh)
        
        new_biases[-1] = delta
        new_weights[-1] = np.dot(delta, activations[-2].transpose())
        
        for layer in range(2, len(self.sizes)): # Iterate backwards to update weights/biases
            z = zs[-layer]
            sp =  (1 / np.cosh(z)) ** 2
            delta = np.dot(self.weights[-layer + 1].transpose(), delta) * sp
            new_biases[-layer] = delta
            new_weights[-layer] = np.dot(delta, activations[-layer - 1].transpose())
            
        return (new_biases, new_weights)
    
    def evaluate(self, test_data : list[tuple[np.ndarray, np.ndarray]]):
        """
        Return the number of correct outputs

        Args:
            test_data (list[tuple[np.ndarray, np.ndarray]]): test data in the form of ([input], [output])
        """
        test_results = [(np.argmax(self.feedForward(x.reshape(-1, 1))), np.argmax(y)) for (x,y) in test_data]
        return sum([int(x==y) for (x, y) in test_results])
        
    
    def cost_derivative(self, output_activations : np.ndarray, y : np.ndarray):
        """
        Return vector of partial derivatives of cost with respect to layer for the 

        Args:
            output_activations (np.ndarray): what we got
            y (np.ndarray): what the goal was
        """
        return (output_activations - y)

In [51]:
# Lets test the network!
myNet = Network([20,10, 2]) # have 2 neurons, 3 neurons, 2 neurons.

def createTestData(nums : tuple[int]):
    if 1 in nums:
        return [1, 0]
    return [0, 1]

def convertToInputs1(lst: list) -> list:
    retList = [0] * 20
    for i in range(len(lst)):
        retList[10*i + lst[i]] = 1
    return retList

assert(convertToInputs1([1, 9]) == [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

myData = [(np.array(convertToInputs1(x)), np.array(createTestData(x))) for x in [(np.array([int(10 * random.random()), int(10 * random.random())])) for _ in range(10000)]]
testData = [(np.array(convertToInputs1(x)), np.array(createTestData(x))) for x in [(np.array([int(10 * random.random()), int(10 * random.random())])) for _ in range(100)]]

myNet.SGD(myData, epochs=10, mini_batch_size=100, learningRate=0.1, test_data=testData)

Epoch 1: 86 / 100
Epoch 2: 87 / 100
Epoch 3: 90 / 100
Epoch 4: 91 / 100
Epoch 5: 91 / 100
Epoch 6: 91 / 100
Epoch 7: 93 / 100
Epoch 8: 94 / 100
Epoch 9: 94 / 100
Epoch 10: 95 / 100


In [56]:
# Lets test the network!
myNet = Network([16, 8, 4]) # have 4 neurons, 4 neurons, 2 neurons.

# Here is my rule: numbers must be in order. Output should be how many numbers are in order by the end. Simple O(n) 
def numsInPlace(nums : tuple[int]) -> int:
    return sum([1 if i + 1 == nums[i] else 0 for i in range(len(nums))])

assert(numsInPlace(tuple([1, 3, 2, 4])) == 2)

def convertToInputs2(lst: list) -> list:
    retList = [0] * 16
    for i in range(len(lst)):
        retList[4*i + lst[i] - 1] = 1
    return retList

assert(convertToInputs2([1, 2, 3, 4]) == [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1])

myData = [(np.array(convertToInputs2(x)), np.array([1 if numsInPlace(x) == i else 0 for i in range(4)])) for x in list(permutations([1, 2, 3, 4], 4))] * 5
testData = [(np.array(convertToInputs2(x)), numsInPlace(convertToInputs2(x)) - 1) for x in list(permutations([1, 2, 3, 4], 4)) if random.random() < (1/3)]
# #self, training_data : list[tuple[np.ndarray, np.ndarray]], epochs : int, mini_batch_size : int, learningRate : float, test_data : list[tuple[np.ndarray, int]] =None
myNet.SGD(myData, epochs=20, mini_batch_size=32, learningRate=1, test_data=testData)

Epoch 1: 1 / 7
Epoch 2: 3 / 7
Epoch 3: 4 / 7
Epoch 4: 1 / 7
Epoch 5: 3 / 7
Epoch 6: 2 / 7
Epoch 7: 2 / 7
Epoch 8: 2 / 7
Epoch 9: 5 / 7
Epoch 10: 5 / 7
Epoch 11: 5 / 7
Epoch 12: 6 / 7
Epoch 13: 5 / 7
Epoch 14: 5 / 7
Epoch 15: 5 / 7
Epoch 16: 5 / 7
Epoch 17: 5 / 7
Epoch 18: 5 / 7
Epoch 19: 5 / 7
Epoch 20: 5 / 7


# Analysis
It appears that this Neural Net can effectively predict all of these simple patterns. I'm still curious about it's capabilities: thus, I plan to use it in some of my future projects. Perhaps using different inputs that are outside of just numbers would do a better job. The model was originally designed to predict handwritten numbers as according to the book. I'm sure it can be reworked using more inputs and an adjusted learning rate to be more accurate in application.

I found that using more standardized inputs ([1, 0, 0, 0, 0] rather than [5]) allowed the network to perform better. I'm still curious about it's capabilities and interested to see how it can be applied. 