# Imports

In [37]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

# Data setup 

In [74]:
X = np.load("data/X-data.npy")
y = np.load("data/y-data.npy")

In [75]:
X = X.reshape(len(X), X.shape[1], 1)

# Class definitions

In [62]:
class Layer():
    def __init__(self):
        self.input = None
        self.output = None
        
    def forward(self, input):
        pass
    
    def backward(self, output, learning_rate):
        pass

In [63]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias

    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient

In [64]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [65]:
class Tanh(Activation):
    def __init__(self):
        def tanh(x):
            return np.tanh(x)

        def tanh_prime(x):
            return 1 - np.tanh(x) ** 2

        super().__init__(tanh, tanh_prime)

class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))

        def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

        super().__init__(sigmoid, sigmoid_prime)

class Softmax(Layer):
    def forward(self, input):
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
    
    def backward(self, output_gradient, learning_rate):
        # This version is faster than the one presented in the video
        n = np.size(self.output)
        return np.dot((np.identity(n) - self.output.T) * self.output, output_gradient)
        # Original formula:
        # tmp = np.tile(self.output, n)
        # return np.dot(tmp * (np.identity(n) - np.transpose(tmp)), output_gradient)

In [66]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

In [67]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

def train(network, loss, loss_prime, x_train, y_train, epochs = 1000, learning_rate = 0.01, verbose = True):
    for e in range(epochs):
        error = 0
        for x, y in zip(x_train, y_train):
            # forward
            output = predict(network, x)

            # error
            error += loss(y, output)

            # backward
            grad = loss_prime(y, output)
            for layer in reversed(network):
                grad = layer.backward(grad, learning_rate)

        error /= len(x_train)
        if verbose:
            print(f"{e + 1}/{epochs}, error={error}")

In [76]:
#X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
#X = X.reshape(len(X), X.shape[1],1)

network = [
    Dense(X.shape[1], 40),
    Tanh(),
    Dense(40, 1),
    Tanh()
]

# train
train(network, mse, mse_prime, X, y, epochs=500, learning_rate=0.1)

1/500, error=0.3712670224239242
2/500, error=0.3725804963974302
3/500, error=0.37258042020403637
4/500, error=0.37258033997839635
5/500, error=0.372580255390831
6/500, error=0.37258016607467387
7/500, error=0.3725800716209338
8/500, error=0.3725799715720035
9/500, error=0.3725798654142101
10/500, error=0.3725797525689552
11/500, error=0.3725796323821189
12/500, error=0.372579504111316
13/500, error=0.37257936691048393
14/500, error=0.37257921981112324
15/500, error=0.3725790616993113
16/500, error=0.3725788912873374
17/500, error=0.3725787070784246
18/500, error=0.37257850732249664
19/500, error=0.37257828996022396
20/500, error=0.3725780525515414
21/500, error=0.3725777921833906
22/500, error=0.3725775053492632
23/500, error=0.37257718778997495
24/500, error=0.37257683428026045
25/500, error=0.3725764383384317
26/500, error=0.37257599182464746
27/500, error=0.3725754843745717
28/500, error=0.3725749025839877
29/500, error=0.3725742288065426
30/500, error=0.37257343933221376
31/500, er

240/500, error=0.03895489330850707
241/500, error=0.038884410767984025
242/500, error=0.0388169967499387
243/500, error=0.03875250234581854
244/500, error=0.038690641286301126
245/500, error=0.038630995706926186
246/500, error=0.03857303491458112
247/500, error=0.03851614466123977
248/500, error=0.038459662289935755
249/500, error=0.03840291232522191
250/500, error=0.03834523778459633
251/500, error=0.03828602428680071
252/500, error=0.03822471625678
253/500, error=0.0381608265377613
254/500, error=0.03809394205904317
255/500, error=0.03802372857700162
256/500, error=0.037949936715123436
257/500, error=0.0378724095263363
258/500, error=0.0377910888733336
259/500, error=0.03770601497334615
260/500, error=0.03761731209444193
261/500, error=0.03752515541409178
262/500, error=0.037429720108455584
263/500, error=0.03733112192606579
264/500, error=0.03722936444199422
265/500, error=0.03712430762859351
266/500, error=0.03701566464356932
267/500, error=0.03690302275527381
268/500, error=0.0367

471/500, error=0.02042080930108187
472/500, error=0.020396130638998193
473/500, error=0.02037211258621678
474/500, error=0.020348741451229963
475/500, error=0.020325992809761734
476/500, error=0.02030383562118877
477/500, error=0.020282235271349928
478/500, error=0.02026115577842027
479/500, error=0.020240561350898943
480/500, error=0.02022041744773673
481/500, error=0.02020069145940367
482/500, error=0.02018135310394451
483/500, error=0.020162374612430388
484/500, error=0.02014373076256659
485/500, error=0.020125398806604876
486/500, error=0.02010735832949052
487/500, error=0.020089591064826374
488/500, error=0.02007208068940867
489/500, error=0.020054812611532028
490/500, error=0.020037773763802374
491/500, error=0.020020952407627392
492/500, error=0.02000433795383944
493/500, error=0.019987920801790994
494/500, error=0.019971692197785575
495/500, error=0.01995564411257802
496/500, error=0.019939769137026716
497/500, error=0.019924060394416264
498/500, error=0.0199085114678686
499/50

In [80]:
pred = []
for i in X:
    pred.append(predict(network, i))

In [81]:
pred

[array([[-0.01704453]]),
 array([[0.07731202]]),
 array([[0.1175094]]),
 array([[0.09595428]]),
 array([[0.06503201]]),
 array([[0.13185598]]),
 array([[0.12918777]]),
 array([[0.26742894]]),
 array([[0.1373634]]),
 array([[0.11749134]]),
 array([[0.22929267]]),
 array([[0.20591239]]),
 array([[0.09613761]]),
 array([[0.3454583]]),
 array([[0.20009867]]),
 array([[0.23123916]]),
 array([[0.15896636]]),
 array([[0.13213057]]),
 array([[0.10621882]]),
 array([[0.99993458]]),
 array([[0.99999985]]),
 array([[1.]]),
 array([[0.07601079]]),
 array([[0.11184531]]),
 array([[0.11767106]]),
 array([[0.0136506]]),
 array([[0.12356632]]),
 array([[0.19427272]]),
 array([[0.14299931]]),
 array([[0.38953364]]),
 array([[0.24169734]]),
 array([[0.15580668]]),
 array([[0.19916714]]),
 array([[0.28218239]]),
 array([[0.17954828]]),
 array([[0.21511883]]),
 array([[0.23736756]]),
 array([[0.99999999]]),
 array([[0.78491778]]),
 array([[0.23466406]]),
 array([[0.92034514]]),
 array([[0.63359185]]),
 ar