# Preface
A significant proportion of this project was inspired by videos from 3Blue1Brown. Along with that, this will come heavily inspired from Neural Networks and Deep Learning, the free online book. It can be reached [neuralnetworksanddeeplearning.com](http://neuralnetworksanddeeplearning.com/). This version is outdated so many components were changed into how I conceptually understand them.

In [2]:
# Imports
import random
import numpy as np
from itertools import permutations

In [65]:
class Network(object):
    """
    Creates a Neural Network in specified layers with implemented Neural Network Methods
    """
    def __init__(self, sizes : list[int]):
        """
        Initializes the Network with random, normally distributed, biases and weights.

        Args:
            sizes (list): List of the number of neurons in each layer
        """
        self.sizes = sizes
        self.biases = [np.random.randn(layer, 1) for layer in sizes[1:]]
        self.weights = [np.random.randn(layer, prev) for prev, layer in zip(sizes[:-1], sizes[1:])]
    
    
    
    def feedForward(self, inputs : list[np.ndarray]):
        """
        Returns the output of the network given the input list

        Args:
            inputs (list): Input to the lyaer
        """
        for bias, weight in zip(self.biases, self.weights):
            inputs = np.tanh(np.dot(weight, inputs) + bias)
        return inputs
    
    
    
    def SGD(self, training_data : list[tuple[np.ndarray, np.ndarray]], epochs : int, mini_batch_size : int, learningRate : float, test_data : list[tuple[np.ndarray, int]] =None) -> None:
        """
        Trains the network using Stoachastic Gradient Descent. 

        Args:
            training_data (list of tuples): Training Data for the network
            epochs (int): number of iterations to adjust network
            mini_batch_size (int): size of each batch to pass through
            learningRate (float): Amount to shift layers in backpropogration
            test_data (list, optional): Passed through test data. Defaults to None.
        """
        
        for i in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[j:j + mini_batch_size] for j in range(0, len(training_data), mini_batch_size)]
            mini_batches = [batch for batch in mini_batches if len(batch) == mini_batch_size]
            
            for batch in mini_batches:
                self.update_mini_batch(batch, learningRate)
                
            if test_data:
                print(f'Epoch {i}: {self.evaluate(test_data)} / {len(test_data)}') 
            else:
                print(f'Epoch {i} complete!')
        
        
        
    def update_mini_batch(self, mini_batch : list[tuple[np.ndarray, np.ndarray]], learningRate : float):
        """
        Updates the weights and biases using SGD and Back Propogation

        Args:
            mini_batch (list[tuple[np.ndarray, np.ndarray]]): list containing a subset of the training data
            learningRate (float): Amount by which to shift existing layers after backpropogration
        """
        new_biases = [np.zeros(b.shape) for b in self.biases] #array of zeros w/ same dimension [( 1, 2), (3, 4)] -> [(0, 0), (0, 0)] 
        new_weights = [np.zeros(w.shape) for w in self.weights]
        
        for x, y in mini_batch:
            change_new_biases, change_new_weights = self.backprop(x, y)
            new_biases = [cur + change for cur, change in zip(new_biases, change_new_biases)]
            new_weights = [cur + change for cur, change in zip(new_weights, change_new_weights)]
        
        self.weights = [weight - (learningRate / len(mini_batch) * new_weight) for weight, new_weight in zip(self.weights, new_weights)]
        self.biases = [bias - (learningRate / len(mini_batch) * new_biases) for bias, new_biases in zip(self.biases, new_biases)] 
        
        
    def backprop(self, x, y) -> tuple:
        """
        Return a tuple representing the gradient for the cost function. 

        Args:
            x (np.ndarray): Input array
            y (np.ndarray): Expected output array
        """
        new_biases = [np.zeros(b.shape) for b in self.biases] #array of zeros w/ same dimension [( 1, 2), (3, 4)] -> [(0, 0), (0, 0)] 
        new_weights = [np.zeros(w.shape) for w in self.weights]
        
        #feed forward
        activation = x
        activations = [x] #To store all activations in each layer
        zs = [] #To store all z vectors
        
        # Iterate across layers
        for bias, weight in zip(self.biases, self.weights):
            z = np.dot(weight, activation).reshape(-1, 1) + bias # Have to reshape to correct dimensionality
            zs.append(z)
            activation = np.tanh(z)
            activations.append(activation)
        
        # backwards pass
        delta = self.cost_derivative(activations[-1].transpose()[0], y).reshape(-1, 1) * (1 / np.cosh(zs[-1])) ** 2 # Find the cost of our result (sech^2 from the derivative of tanh)
        
        new_biases[-1] = delta
        new_weights[-1] = np.dot(delta, activations[-2].transpose())
        
        for layer in range(-2, -len(self.sizes), -1): # Using negative indexing
            z = zs[layer]
            sp = np.tanh(z)
            delta = np.dot(self.weights[layer + 1].transpose(), delta) * sp
            new_biases[layer] = delta
            print(activations[layer - 1])
            new_weights[layer] = np.dot(delta, [activations[layer - 1]])
            break
            
        return (new_biases, new_weights)
    
    def evaluate(self, test_data : list[tuple[np.ndarray, int]]):
        """
        Return the number of correct outputs

        Args:
            test_data (list[tuple[np.ndarray, np.ndarray]]): test data in the form of ([input], output)
        """
        test_results = [(np.argmax(self.feedForward(x)), y) for (x,y) in test_data]
        return sum(int(x + 0.01 > y and x - 0.01 < y) for (x, y) in test_results)
        
    
    def cost_derivative(self, output_activations, y):
        """
        Return vector of partial derivatives of cost with respect to layer for the 

        Args:
            output_activations (_type_): what we got
            y (_type_): what the goal was
        """
        return output_activations - y

In [75]:
# Lets test the network!
myNet = Network([2, 3, 2]) # have 2 neurons, 3 neurons, 2 neurons.

def createTestData(nums : tuple[int]):
    if 1 in nums:
        return [1, 0]
    return [0, 1]

myData = [(x, np.array(createTestData(x))) for x in [(np.array([int(10 * random.random()), int(10 * random.random())])) for _ in range(100)]]
testData = [(x, 1 if 1 in x else 0) for x in [(np.array([int(10 * random.random()), int(10 * random.random())])) for _ in range(5)]]

# #self, training_data : list[tuple[np.ndarray, np.ndarray]], epochs : int, mini_batch_size : int, learningRate : float, test_data : list[tuple[np.ndarray, int]] =None
myNet.SGD(myData, 5, 5, 1, test_data=testData)
# np.dot([1, 2, 3], [1, 2, 3])

[9 6]
[5 8]
[2 8]
[0 2]
[3 9]
[2 6]
[9 5]
[1 4]
[9 3]
[2 8]
[0 0]
[1 4]
[8 2]
[2 3]
[2 5]
[5 0]
[9 7]
[2 7]
[2 1]
[0 7]
[2 9]
[9 3]
[4 8]
[7 1]
[1 8]
[0 0]
[0 2]
[2 3]
[9 4]
[6 3]
[8 8]
[7 3]
[9 5]
[5 6]
[1 1]
[8 1]
[1 0]
[5 1]
[2 2]
[5 1]
[5 1]
[3 3]
[7 9]
[2 8]
[9 3]
[8 6]
[5 1]
[9 3]
[0 3]
[7 8]
[3 7]
[9 8]
[2 2]
[5 8]
[8 1]
[9 7]
[7 9]
[8 0]
[7 9]
[9 3]
[6 8]
[8 8]
[8 4]
[2 0]
[7 6]
[2 5]
[7 8]
[6 3]
[9 6]
[9 9]
[4 9]
[1 9]
[5 4]
[3 9]
[6 4]
[0 4]
[8 7]
[7 8]
[8 0]
[5 8]
[2 3]
[5 0]
[6 3]
[9 9]
[9 2]
[4 6]
[3 3]
[6 1]
[8 9]
[5 6]
[4 6]
[6 6]
[7 0]
[7 9]
[2 5]
[7 6]
[0 8]
[3 5]
[2 2]
[2 3]
Epoch 0: 0 / 5
[2 8]
[6 3]
[8 0]
[6 3]
[0 0]
[8 2]
[8 9]
[2 2]
[3 7]
[6 1]
[1 4]
[2 1]
[2 0]
[7 3]
[2 2]
[5 8]
[7 1]
[2 8]
[7 6]
[7 9]
[6 3]
[1 1]
[9 4]
[2 8]
[7 9]
[9 6]
[3 5]
[8 8]
[0 4]
[0 3]
[3 3]
[7 8]
[1 0]
[9 7]
[7 9]
[6 6]
[2 7]
[4 8]
[5 6]
[9 2]
[3 3]
[2 6]
[0 0]
[0 2]
[5 1]
[1 4]
[9 5]
[8 1]
[0 7]
[8 0]
[2 5]
[8 7]
[5 0]
[6 4]
[2 3]
[6 8]
[2 3]
[2 5]
[2 9]
[9 3]
[9 9]
[9 6]
[7 6]
[0 8]
[

In [76]:
# Lets test the network!
myNet = Network([4, 8, 4]) # have 4 neurons, 4 neurons, 2 neurons.

# Here is my rule: numbers must be in order. Output should be how many numbers are in order by the end. Simple O(n) 
def numsInPlace(nums : tuple[int]) -> int:
    return sum([1 if i + 1 == nums[i] else 0 for i in range(len(nums))])

assert(numsInPlace(tuple([1, 3, 2, 4])) == 2)

myData = [(np.array(x), np.array([1 if numsInPlace(x) == i else 0 for i in range(4)])) for x in list(permutations([1, 2, 3, 4], 4))]
testData = [(np.array(x), numsInPlace(x) - 1) for x in list(permutations([1, 2, 3, 4], 4)) if random.random() < 0.3]
# #self, training_data : list[tuple[np.ndarray, np.ndarray]], epochs : int, mini_batch_size : int, learningRate : float, test_data : list[tuple[np.ndarray, int]] =None
# len(myData)
myNet.SGD(myData, 5, 5, 1, test_data=testData)

# np.dot([1, 2, 3], [1, 2, 3])

[3 2 1 4]
[3 4 1 2]
[2 1 3 4]
[1 3 2 4]
[2 1 4 3]
[4 1 2 3]
[4 1 3 2]
[3 4 2 1]
[4 2 3 1]
[2 4 3 1]
[4 3 2 1]
[3 2 4 1]
[1 2 4 3]
[1 3 4 2]
[4 3 1 2]
[3 1 4 2]
[1 4 3 2]
[2 4 1 3]
[4 2 1 3]
[3 1 2 4]
Epoch 0: 0 / 10
[1 3 2 4]
[2 1 4 3]
[3 4 1 2]
[1 4 3 2]
[2 3 4 1]
[1 2 4 3]
[2 1 3 4]
[4 3 2 1]
[3 2 4 1]
[3 1 2 4]
[4 2 1 3]
[1 2 3 4]
[2 4 3 1]
[1 3 4 2]
[3 4 2 1]
[4 1 2 3]
[3 2 1 4]
[4 1 3 2]
[2 3 1 4]
[4 2 3 1]
Epoch 1: 0 / 10
[4 1 3 2]
[1 4 2 3]
[4 2 1 3]
[1 2 3 4]
[3 4 2 1]
[3 1 4 2]
[3 1 2 4]
[2 4 3 1]
[1 3 4 2]
[4 2 3 1]
[2 3 1 4]
[3 2 1 4]
[4 3 1 2]
[3 4 1 2]
[3 2 4 1]
[1 4 3 2]
[2 3 4 1]
[2 4 1 3]
[4 3 2 1]
[2 1 3 4]
Epoch 2: 0 / 10
[3 4 1 2]
[4 2 1 3]
[3 1 2 4]
[1 4 2 3]
[3 4 2 1]
[4 2 3 1]
[4 3 1 2]
[1 4 3 2]
[2 3 1 4]
[2 4 3 1]
[1 3 2 4]
[1 3 4 2]
[3 2 1 4]
[4 1 3 2]
[2 1 4 3]
[3 1 4 2]
[2 1 3 4]
[4 1 2 3]
[2 3 4 1]
[3 2 4 1]
Epoch 3: 0 / 10
[2 1 4 3]
[3 4 1 2]
[1 2 4 3]
[3 2 4 1]
[1 3 2 4]
[3 4 2 1]
[1 4 2 3]
[2 3 1 4]
[4 3 1 2]
[4 2 3 1]
[1 4 3 2]
[2 4 3 1]
[3 2 1 4]
[4 2 1