In [2]:
import numpy as np

In [3]:
#losses

def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

In [4]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        # TODO: return output
        pass

    def backward(self, output_gradient, learning_rate):
        # TODO: update parameters and return input gradient
        pass

In [6]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias

    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient

In [7]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [8]:
class Tanh(Activation):
    def __init__(self):
        def tanh(x):
            return np.tanh(x)

        def tanh_prime(x):
            return 1 - np.tanh(x) ** 2

        super().__init__(tanh, tanh_prime)

class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))

        def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

        super().__init__(sigmoid, sigmoid_prime)

class Softmax(Layer):
    def forward(self, input):
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
    
    def backward(self, output_gradient, learning_rate):
        # This version is faster than the one presented in the video
        n = np.size(self.output)
        return np.dot((np.identity(n) - self.output.T) * self.output, output_gradient)
        # Original formula:
        # tmp = np.tile(self.output, n)
        # return np.dot(tmp * (np.identity(n) - np.transpose(tmp)), output_gradient)

In [12]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

def train(network, loss, loss_prime, x_train, y_train, epochs = 1000, learning_rate = 0.01, verbose = True):
    for e in range(epochs):
        error = 0
        for x, y in zip(x_train, y_train):
            # forward
            output = predict(network, x)

            # error
            error += loss(y, output)

            # backward
            grad = loss_prime(y, output)
            for layer in reversed(network):
                grad = layer.backward(grad, learning_rate)

        error /= len(x_train)
        if verbose:
            print(f"{e + 1}/{epochs}, error={error}")

In [24]:
sample_num = 1000 # number of sample

X = np.random.uniform(low = -1, high = 1, size = sample_num)
np.random.shuffle(X)
m = np.random.randint(low = -10, high = 10)
c = np.random.randint(low = -10, high = 10)

y = m*X + c # equation of straight line
Y = y + (0.6 * np.random.randn(X.shape[0])) # add noise
X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
Y = np.reshape([Y], (4, 2, 1))

ValueError: cannot reshape array of size 1000 into shape (1,8)

In [18]:
class Reshape(Layer):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape

    def forward(self, input):
        return np.reshape(input, self.output_shape)

    def backward(self, output_gradient, learning_rate):
        return np.reshape(output_gradient, self.input_shape)

In [20]:
X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
Y = np.reshape([[0], [1], [1], [0]], (4, 1, 1))

In [21]:
network = [
    Dense(2, 3),
    Tanh(),
    Dense(3, 1),
    Tanh()
]

In [22]:
train(network, mse, mse_prime, X, Y, epochs=10000, learning_rate=0.1)

1/10000, error=1.0121110998532796
2/10000, error=0.38176605952068876
3/10000, error=0.3355927473545638
4/10000, error=0.3307717438400179
5/10000, error=0.32941217090929203
6/10000, error=0.3285713899313662
7/10000, error=0.3278360925891129
8/10000, error=0.32713074609318693
9/10000, error=0.32643973193313547
10/10000, error=0.3257588089913628
11/10000, error=0.32508595970497106
12/10000, error=0.324419585173497
13/10000, error=0.3237580990874791
14/10000, error=0.32309978913198434
15/10000, error=0.3224427299131002
16/10000, error=0.3217847077009648
17/10000, error=0.3211231519578554
18/10000, error=0.3204550758972514
19/10000, error=0.31977703012445785
20/10000, error=0.31908507392583507
21/10000, error=0.3183747687619555
22/10000, error=0.3176411980922209
23/10000, error=0.31687901682354086
24/10000, error=0.3160825324434189
25/10000, error=0.3152458182914331
26/10000, error=0.3143628574700875
27/10000, error=0.3134277136181882
28/10000, error=0.31243472218330126
29/10000, error=0.31