Okay, let's start over after we've understood some maths. 

In [51]:
# Imports:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Data Setup:
X = np.load('data/X-data.npy')
y = np.load('data/y-data.npy')

np.random.seed(0)

In [52]:
def split_test_train(X, y, rate=0.2):
    # First shuffle randomly 
    assert len(X) == len(y)
    p = np.random.permutation(len(X))
    X_shuffled, y_shuffled = X[p], y[p]
    
    # Split into test and train set
    i_test = round(len(X) * 0.2)
    X_test, X_train = X_shuffled[:i_test], X_shuffled[i_test:]
    y_test, y_train = y_shuffled[:i_test], y_shuffled[i_test:]
    
    return X_train, y_train, X_test, y_test
    
X_train, y_train, X_test, y_test = split_test_train(X, y)

In [53]:
# Abstract Class Definitions: 
class Layer():
    def __init__(self): 
        self.inputs = None  # The inputs into this layer. 
        self.outputs = None # The ouputs of this layer. 
        
    # Forward propagation method.
    def forward(self, inputs):
        pass
    
    # Backward propagation method.
    def backward(self):
        pass
    
class Activation():
    def activate():
        pass
    
    def prime():
        pass
    
class Loss():
    def calculate_loss():
        pass

In [54]:
class ReLU(Activation):
    def activate(self, x): 
        return np.maximum(x, 0.0)
    
    def derivative(self, x):
        return (x > 0) * 1  # * 1 to return a number.
    
class Sigmoid(Activation):
    def activate(self, x):
        return 1 / (1 + np.exp(-x))
    
    def derivative(self, x):
        s = self.activate(x)
        return s * (1 - s)

In [55]:
class CrossEntropyLoss():
    def calculate_loss(self, y_true, y_pred):
        return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))
    
    def derivative(self, y_true, y_pred): 
        r = ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)
        return r
    
class MSELoss():
    def calculate_loss(self, y_true, y_pred):
        return 1/2 * (y_true - y_pred) * (y_true - y_pred)
    
    def derivative(self, y_true, y_pred):
        return y_true - y_pred

In [56]:
# Dense (fully connected) Layer Class:
class Dense(Layer): 
    def __init__(self, input_size, output_size, activation_function='relu', name='unnamed'):
        self.name = name
        self.weights = np.random.randn(output_size, input_size)
        self.biases = np.zeros((output_size, 1))
        self.outputs = None
        
        if activation_function == 'relu':
            self.activation = ReLU()
        elif activation_function == 'sigmoid':
            self.activation = Sigmoid()
        else:
            self.activation = ReLU()  # Default to ReLU activation function.
        
    def print_weights(self):
        print('Weights:\n', pd.DataFrame(self.weights))
        
    def print_biases(self):
        print('Biases:\n', pd.DataFrame(self.biases))
    
    def forward(self, inputs):
        self.inputs = inputs
        self.outputs = self.activation.activate(np.dot(self.weights, inputs) + self.biases)
        return self.outputs
    
    def backward(self, delta_l, learning_rate):
        self.weights -= learning_rate * np.dot(self.outputs.T, delta_l)
        return delta_l

In [57]:
from operator import le


class Network:
    def __init__(self, layers, loss_function='cross_entropy'):
        self.layers = layers
        self.output = None
        
        if loss_function == 'cross_entropy':
            self.loss = CrossEntropyLoss()
        elif loss_function == 'mse':
            self.loss = MSELoss()
        else:
            self.loss = CrossEntropyLoss()  # Default to cross entropy loss. 
    
    def train(self, X_train, y_train, number_epochs, learning_rate=0.1):
        for epoch in range(number_epochs):
            error = 0
            
            for x, y in zip(X_train, y_train):
                # Process the forward pass. This goes through every layer.
                x = x.reshape(21, 1)
                self.predict(x)  # Create a matrix for dot product in forward()
                
                # Calculate the error after the forward pass. 
                error += self.loss.calculate_loss(y, self.output)
                # print('Prediction', self.output)
                
                # Error of the output layer, delta^L
                dC_da = self.loss.derivative(y, self.output)
                
                da_dz = self.layers[1].activation.derivative(self.layers[1].outputs)
                
                delta_L = np.multiply(dC_da, da_dz)  # delta of the final layer. 
                self.layers[-1].weights = self.layers[-1].weights - learning_rate * np.dot(self.layers[-1].outputs.T, delta_L)

                for layer in reversed(self.layers[:-1]):
                    layer.backward(delta_L, learning_rate)
                
                da_dz_l0 = self.layers[0].activation.derivative(self.layers[0].outputs)
                delta_l0 = np.multiply(np.dot(self.layers[1].weights.T, delta_L), da_dz_l0)
                
                self.layers[0].weights = self.layers[0].weights - learning_rate * np.dot(self.layers[0].outputs.T, delta_l0)
                
                   
    def predict(self, x):
        outputs = x
        for layer in self.layers:
            outputs = layer.forward(outputs)
        self.output = outputs
        return outputs
            

For backpropagation, the formula for the very last layer is: 
![last-layer-backprop](https://miro.medium.com/max/828/1*zRDMl-GxVO7qENH5dNrZ-g.png)

In [58]:
np.random.seed(42)  # To keep results consistent.

number_inputs = X.shape[1]
epochs = 1

layers = [
    Dense(number_inputs, 2, activation_function='relu', name='Layer 1'),
    Dense(2, 1, activation_function='sigmoid', name='Layer 2')
]

In [59]:
def print_layer_outputs(network):
    for layer in network.layers:
        print(layer.name, layer.outputs.shape, ':\n', layer.outputs)

In [60]:
X_practice = X[0:1]
X_practice.shape

(1, 21)

In [61]:
network = Network(layers, loss_function='mse')
network.train(X, y, number_epochs=epochs)
# print_layer_outputs(network)

[[-0.10981139 -0.2952668 ]]
[[-0.104131   -0.28958641]]
[[-0.09839394 -0.28384935]]
[[-0.09251884 -0.27797426]]
[[-0.08673864 -0.27219406]]
[[-0.08086355 -0.26631896]]
[[-0.07501285 -0.26046827]]
[[-0.06913776 -0.25459317]]
[[-0.06326267 -0.24871808]]
[[-0.05738757 -0.24284299]]
[[-0.05151248 -0.23696789]]
[[-0.04563739 -0.2310928 ]]
[[-0.0397623  -0.22521771]]
[[-0.0338872  -0.21934262]]
[[-0.02801211 -0.21346752]]
[[-0.02213702 -0.20759243]]
[[-0.01626192 -0.20171734]]
[[-0.01038683 -0.19584225]]
[[-0.00451174 -0.18996715]]
[[-0.01038683 -0.19584225]]
[[-0.01626192 -0.20171734]]
[[-0.02213702 -0.20759243]]
[[-0.01626192 -0.20171734]]
[[-0.01038683 -0.19584225]]
[[-0.00451174 -0.18996715]]
[[ 0.00135486 -0.18410056]]
[[ 0.00722995 -0.17822546]]
[[ 0.01310504 -0.17235037]]
[[ 0.01898013 -0.16647528]]
[[ 0.02488098 -0.16057444]]
[[ 0.03075607 -0.15469934]]
[[ 0.03663116 -0.14882425]]
[[ 0.04250625 -0.14294916]]
[[ 0.04838135 -0.13707407]]
[[ 0.05425644 -0.13119897]]
[[ 0.06013153 -0.125

In [62]:
network.output

array([[0.5]])

In [63]:
pred = []
for i in X:
    pred.append(network.predict(i.reshape(21, 1)))

In [64]:
res = pd.DataFrame()
res["predictions"] = pred
res["actual"] = y

In [65]:
res["predictions"] = res["predictions"].apply(lambda x: x[0][0])
res["predictions"] = res["predictions"].apply(lambda x: 0 if x < 0.5 else 1)

In [66]:
print("Accuracy:", sum(x == y for x, y in zip(res['predictions'], res['actual'])) / len(X) * 100)

Accuracy: 64.32337434094903
