# Neural Network from Scratch With Gradient Descent

## Create the Base Layer class

In [1]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        # takes input and returns output
        pass

    def backward(self, output_gradient, learning_rate):
        #takes derivative of loss with respect to output and returns derivative of loss with respect to input, also updates weights
        pass

## Create the Dense layer
### This layer will be fully connected, meaning every input neuron is connected to every output neuron.



In [2]:
import numpy as np


class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.bias

    def backward(self, output_gradient, learning_rate):
        weight_gradient = np.dot(output_gradient, self.input.T)
        self.weights -= learning_rate * output_gradient
        self.bias -= learning_rate * output_gradient
        return np.dot(self.weights.T, output_gradient)



## Activation functions

In [3]:
class Activation(Layer):
    def __init__(self, activation_function, derivative_function):
        self.activation_function = activation_function
        self.derivative_function = derivative_function

    def forward(self, input):
        self.input = input
        return self.activation_function(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.derivative_function(self.input))

## Hyperbolic Tangent Activation Function

In [4]:
class Tanh(Activation):
    def __init__(self):
        tanh = lambda x: np.tanh(x)
        tanh_derivative = lambda x: 1 - np.tanh(x) ** 2
        super().__init__(tanh, tanh_derivative)

## Loss Function - MSE

In [5]:
def mse_loss(predicted, actual):
    return np.mean(np.power(actual - predicted, 2))


def mse_loss_derivative(predicted, actual):
    return 2 * (predicted - actual) / np.size(actual)

## Time to use our layers to create a simple neural network

In [None]:
# but first, lets create functions to train and test the model

def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output


def train(network, loss_function, loss_derivative, X_train, y_train, epochs=10000, learning_rate=0.01, verbose=True):
    for epoch in range(epochs):
        error = 0
        for x, y in zip(X_train, y_train):
            # forward pass
            output = predict(network, x)

            #error calculation
            error += loss_function(y, output)

            # backward pass
            output_gradient = loss_derivative(y, output)
            for layer in reversed(network):
                output_gradient = layer.backward(output_gradient, learning_rate)

            error /= len(X_train)

            if verbose:
                print(f'Epoch {epoch + 1}/{epochs}, Error: {error:.4f}')