In [1]:
import random
import numpy as np

In [None]:
def sigmoid(z):
    """Sigmoid activation function."""
    return 1.0/(1.0+np.exp(-z))

In [33]:
def sigmoid_prime(z):
    """Derivative of the sigmoid activation function."""
    return sigmoid(z) * (1 - sigmoid(z))

In [None]:
def quadratic_cost_derivative(self, output_activation, target):
    """Derivative of the quadratic cost"""
    return output_activation - target

In [None]:
class Network():
    def __init__(self, net_size):
        """
        weights: weight matrices
        biases: bias matrices
        """
        # info
        self.net_size = net_size
        self.num_layers = len(self.net_size)
        
        # weight matrices
        layer_pairs = zip(self.net_size[:-1], self.net_size[1:])
        self.weights = [np.random.randn(y, x) for x, y in layer_pairs]

        # biases matrices
        self.biases = [np.random.randn(y, 1) for y in self.net_size[1:]]

    def forward_pass_fast(self, a):
        for w, b in zip(self.weights, self.biases):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def forward_pass(self, input):
        """
        w: single weight matrix
        b: single bias matrix
        z: single pre-activation matrix
        a: single activation matrix
        """

        # set input as first activation
        a = input 

        # arrays to store all pre-activation and activation matrices during the feedforward pass
        pre_activations = []
        activations = [input] 

        for w, b in zip(self.weights, self.biases):
            z = np.dot(w, a) + b
            pre_activations.append(z)

            a = sigmoid(z)
            activations.append(a)
        
        return pre_activations, activations

    def backprop(self, activations, pre_activations, target):
        # empty arrays for each weight matrix and bias matrix to store future gradients of the cost w.r.t the specific weight and bias
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]

        # delta for last layer
        delta = quadratic_cost_derivative(activations[-1], target) * sigmoid_prime(pre_activations[-1])

        # gradients for last layer
        nabla_w[-1] = np.dot(delta * pre_activations[-2].transpose())
        nabla_b[-1] = delta




        










In [30]:
neural_net = Network([2, 3, 1])

In [31]:
pre_activations, activations = neural_net.forward_pass([3, 3])