In [2]:
import numpy as np

In [None]:
class SigmoidFunction:
    def __init__(self):
        None
    
    def function(self, z):
        return 1 / (1 + np.exp(z))
    
    def derivative(self, z):
        return self.function(z) * (1 + self.function(z))

In [None]:
class MeanSquaredCost:
    def __init__(self):
        None
        
    def compute(self, y_hat, y):
        return np.dot((y_hat - y).T, y_hat - y)
        
    def final_derivative(self, a, y):
        return a - y

In [None]:
class Dense:
    
    def __init__(self, layer_dims, activation, input_dim=None):
        # Layer activation function
        self.activation = activation
        
        # Layer parameters
        self.W = np.zeros((layer_dims, input_dim))
        self.b = np.zeros((layer_dims, 1))
        
        # Cache
        self.A_ = None
        self.Z = None
        self.dW = None
        self.db = None
    
    def forward(self, A):
        # Z[l] = W[l] * A[l-1] + b[l]
        # A[l] = f(Z[l])
        
        self.A_ = A        
        self.Z = np.dot(self.W, self.A_) + self.b
        A = self.activation.function(self.Z)
        
        return A
        
    def backward(self, A_, dA=None, dZ=None):
        if dZ == None:
            dZ = self.W.dot(dA) * self.activation.derivative(self.Z)
        dW = (1 / self.A_.shape[0]) * np.dot(dZ, self.A_.T)
        db = (1 / self.A_.shape[0]) * np.sum(dZ, axis=1, keepdims=True)
        dA = np.dot(self.W.T, dZ)
        return dA
                
        
    def update_weights(self, learning_rate):
        # W[l] = W[l] - alpha * dW[l]
        # b[l] = b[l] - alpha * db[l]
        
        self.W = self.W - learning_rate * self.dW
        self.b = self.b - learning_rate * self.db

In [None]:
class NeuralNetwork:
    
    def __init__(self, layers):
        self.layers = layers
        self.nb_layers = len(layers)
    
    def train(self, X, y, nb_epochs, learning_rate, cost):
        for i in range(nb_epochs):     
            
            # Forward propagation
            A = [X]
            for l in range(self.nb_layers):
                A.append(self.layers[l].forward(A[-1]))
            
            # Compute cost
            y_hat = A[-1]
            c = cost.compute(y_hat, y)

            # Backward propagation
            dZ = cost.final_derivative(A[-1], y)
            dA = self.layers[self.nb_layers - 1].backward(A[self.nb_layers - 2])
            for l in range(1, self.nb_layers):
                dA = self.layers[self.nb_layers - l - 1].backward(A[self.nb_layers - l - 2], dA)

            # Update weights
            for l in range(self.nb_layers):
                self.layers[l].update_weights(learning_rate)
        
    def predict(self, X_test):
        a = X_test
        for l in range(self.nb_layers):
            a = self.layers[l].forward(a)
        return a