In [28]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

In [29]:
def one_hot_encode(Y):
    Y = Y.flatten()
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    return one_hot_Y

def softmax(Z):
    exp = Z  # np.exp(Z)
    A = exp / np.sum(exp)
    # print(np.sum(exp, axis=axis))
    return A

In [30]:
data = pd.read_csv("mnist_train.csv", header=None).to_numpy()
x, y = data[:, 1:]/255, one_hot_encode(data[:, 0:1])  # x.shape, y.shape = (60000, 784) (60000, output_neurons)
# plt.imshow(x[0].reshape(28, 28), cmap="gray")
# test  = pd.read_csv("mnist_test.csv", header=None).to_numpy()

In [31]:
# hidden = 8
# w0 = np.random.rand(hidden, 784)
# b0 = np.random.rand(hidden)
# w1 = np.random.rand(hidden, hidden)
# b1 = np.random.rand(hidden)
# w2 = np.random.rand(10, hidden)
# b2 = np.random.rand(10)
# def relu(x):
# 	return np.maximum(x, 0)
# def relu_(x):
# 	return (x > 0) * 1

In [32]:
# a1 = relu(x @ w0.T + b0.T)
# a2 = relu(a1 @ w1.T + b1.T)
# a3 = softmax(a2 @ w2.T + b2.T, axis=1)

In [33]:
class Layer:
    def __init__(self, input, output, alpha = 0.01) -> None:
        self.input = input
        self.output = output
        self.w = np.random.rand(output, input)  # w.shape = (neurons_in_this_layer, neurons_in_prev_layer)
        self.b = np.random.rand(output)  # b.shape = (neurons_in_this_layer,)
        self.alpha = alpha

    def relu(self, x):
        return np.maximum(x, 0)
    def relu_(self, x):
        return (x > 0) * 1

    def softmax(self, Z):
        A = np.exp(Z) / sum(np.exp(Z))
        return A
    def softmax_(self, Z):
        return Z

    def forward(self, x):
        self.x = x  # x.shape = (60000, neurons_in_prev_layer)
        self.z = x @ self.w.T + self.b    # z.shape = (60000, neurons_in_this_layer)
        self.a = self.activation(self.z)  # a.shape = (60000, neurons_in_this_layer)
        return self.a

    def update(self):
        # print(self.dw.shape, self.w.shape, self.db.shape, self.b.shape, f"{self.alpha = }", (self.alpha * self.dw).shape)
        # print(self.w.shape, self.dw.shape)
        self.w = self.w - self.alpha * self.dw
        self.b = self.b - self.alpha * self.db.mean(axis=0).T

class HiddenLayer(Layer):  # let this have 16 neurons
    def __init__(self, input, output, act_f="relu") -> None:
        super().__init__(input, output)
        self.activation = self.relu if act_f == "relu" else self.softmax
        self.activation_ = self.relu_ if act_f == "relu" else self.softmax_
    
    def backward(self, next_layer):
        dcdb_Lp1 = next_layer.db         # dcdb_Lp1.shape = (60000, neurons_in_next_layer)
        w_Lp1 = next_layer.w             # w_Lp1.shape    = (neurons_in_next_layer, neurons_in_this_layer)
        prev = dcdb_Lp1 @ w_Lp1      # equivalent to dcda = (60000, neurons_in_this_layer)
        dadz = self.activation_(self.z)  # dadz.shape     = (60000, neurons_in_this_layer)
        dzdw = self.x                    # dzdw.shape     = (60000, neurons_in_prev_layer)

        self.dw = (prev * dadz).T @ dzdw # dw.shape       = (output_neurons, neurons_in_prev_layer)
        self.db = (prev * dadz)            # db.shape       = (60000, output_neurons)

        return self

class OutputLayer(Layer):
    def __init__(self, input, output, act_f="softmax") -> None:
        super().__init__(input, output)
        self.activation = self.softmax if act_f == "softmax" else self.relu
        self.activation_ = self.softmax_ if act_f == "softmax" else self.relu_

    def backward(self, y):               # y.shape    = (60000, output_neurons)  Y is one hot encoded
        # C (error) = (self.a - y)^2
        dcda = self.a - y                # dcda.shape = (60000, output_neurons)
        dadz = self.activation_(self.z)  # dadz.shape = (60000, output_neurons)
        dzdw = self.x                    # dzdw.shape = (60000, neurons_in_prev_layer)

        self.dw = (dcda * dadz).T @ dzdw # dw.shape   = (output_neurons, neurons_in_prev_layer)
        self.db = (dcda * dadz)          # db.shape   = (60000, output_neurons)

        return self  # coz this is needed to calculate the error in the previous layer

class NeuralNetwork:
    def __init__(self, layers, alpha = 0.01, name = "Neural Network"):
        self.layers = layers
        self.name = name
        for layer in self.layers:
            layer.alpha = alpha

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, y):
        for layer in reversed(self.layers):
            y = layer.backward(y)
        return y

    def update(self):
        for layer in reversed(self.layers):
            layer.update()
    
    def train(self, x, y, epochs=10):
        losses = []
        for epoch in tqdm(range(epochs)):
            y_pred = self.forward(x)
            print(y_pred[0], y[0])
            self.backward(y)
            self.update()
            
            loss = np.sum((y_pred - y)**2)
            losses.append(loss)
        return losses
            
    def __str__(self):
        return f"Neural Network: {self.name}"

In [40]:
hidden = 4
layers = [
            HiddenLayer(784, hidden),
            HiddenLayer(hidden, hidden),
            OutputLayer(hidden, 10),
        ]

nn = NeuralNetwork(layers, alpha=1e-10, name="trial_nn")

In [35]:
# y_pred = nn.forward(x)
# y = layers[4].backward(y_pred)
# y = layers[3].backward(y)
# y = layers[2].backward(y)
# y = layers[1].backward(y)
# y = layers[0].backward(y)


# layers[4].update()
# layers[3].update()
# layers[2].update()
# layers[1].update()
# layers[0].update()

In [41]:
J = nn.train(x, y, epochs=10)

  A = np.exp(Z) / sum(np.exp(Z))
  A = np.exp(Z) / sum(np.exp(Z))
 10%|█         | 1/10 [00:00<00:03,  2.85it/s]

[0.00000000e+000 0.00000000e+000 0.00000000e+000 3.69520835e-185
 2.42273494e-202 5.84752008e-116 0.00000000e+000 0.00000000e+000
 0.00000000e+000 1.12353057e-154] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


 20%|██        | 2/10 [00:00<00:02,  3.42it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


 30%|███       | 3/10 [00:00<00:02,  3.46it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


 40%|████      | 4/10 [00:01<00:01,  3.46it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


 50%|█████     | 5/10 [00:01<00:01,  3.24it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


 60%|██████    | 6/10 [00:01<00:01,  3.36it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


 70%|███████   | 7/10 [00:02<00:00,  3.42it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


 80%|████████  | 8/10 [00:02<00:00,  3.54it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


 90%|█████████ | 9/10 [00:02<00:00,  3.56it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


100%|██████████| 10/10 [00:02<00:00,  3.47it/s]

[nan nan nan nan nan nan nan nan nan nan] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]





In [37]:
J

[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]