# MNIST Prediction with Convolutional Neural Network

- MNIST dataset: is a dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images. More info can be found at the [MNIST homepage](http://yann.lecun.com/exdb/mnist/).
- Goal: build a simple artificial neural network to predict the digit in the images.
- Reference: [Oddly Satisfying Deep Learning](https://pythonandml.github.io/dlbook/content/convolutional_neural_networks/cnn_over_mlp.html)

#### Import libraries

In [39]:
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from keras.datasets import mnist

#### 1. Lib

##### 1.1. Base Layer class

In [40]:
class BaseLayer:
    def __init__(self) -> None:
        self.input = None
        self.output = None

    def forward(self, X: np.ndarray) -> np.ndarray:
        """
        :param X: input data

        TODO: return the output of the layer
        """
        pass

    def backpropagation(self, dZ: np.ndarray, lr: float) -> np.ndarray:
        """
        :param dZ: gradient of the loss with respect to the output of the layer
        :param lr: learning rate

        TODO: update parameters and return the gradient of the input
        """
        pass

##### 1.2. Weight Initialization

In [41]:
class WeightInitializer:
    def __init__(self, inp_size: int, out_size: int, act_type: str = None) -> None:
        if act_type is None:
            self.act_type = "random"
        else:
            self.act_type = act_type

        self.inp_size = inp_size
        self.out_size = out_size

    def zeros(self) -> np.ndarray:
        return np.zeros(shape=(self.out_size, self.inp_size))

    def ones(self) -> np.ndarray:
        return np.ones(shape=(self.out_size, self.inp_size))

    def random(self) -> np.ndarray:
        return np.random.randn(self.out_size, self.inp_size)

    def random_uniform(self) -> np.ndarray:
        return np.random.uniform(-1, 1, (self.out_size, self.inp_size))

    def get(self) -> np.ndarray:
        if self.act_type == "zeros":
            return self.zeros()
        elif self.act_type == "ones":
            return self.ones()
        elif self.act_type == "random":
            return self.random()
        elif self.act_type == "random_uniform":
            return self.random_uniform()
        else:
            raise ValueError("Invalid activation type")

##### 1.3. Cost Function

In [42]:
class Cost:
    def __init__(self, cost_type: str = "mse") -> None:
        """
        :param cost_type: type of cost function to use (mse, cross_entropy)
        """
        self.cost_type = cost_type

    def mse(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        return np.mean((y_true - y_pred) ** 2)

    def d_mse(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        return 2 * (y_pred - y_true) / np.size(y_true)

    def cross_entropy(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        return -np.mean(y_true * np.log(y_pred + 1e-15))

    def d_cross_entropy(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        return -y_true / (y_pred + 1e-15)

    def get_cost(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        if self.cost_type == "mse":
            return self.mse(y_true, y_pred)
        elif self.cost_type == "cross_entropy":
            return self.cross_entropy(y_true, y_pred)
        else:
            raise ValueError("Invalid cost type")

    def get_d_cost(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        if self.cost_type == "mse":
            return self.d_mse(y_true, y_pred)
        elif self.cost_type == "cross_entropy":
            return self.d_cross_entropy(y_true, y_pred)
        else:
            raise ValueError("Invalid cost type")

#### 2. Layer

##### 2.1. Convolutional Layer

##### 2.2. Dense Layer

In [43]:
class Dense(BaseLayer):
    def __init__(self, inp_size: int, out_size: int) -> None:
        self.weights = WeightInitializer(inp_size, out_size).get()
        self.bias = WeightInitializer(1, out_size).get()

    def forward(self, X: np.ndarray) -> np.ndarray:
        self.X = X
        return np.dot(self.weights, self.X) + self.bias

    def backpropagation(self, dZ: np.ndarray, lr: float) -> np.ndarray:
        dW = np.dot(dZ, self.X.T)
        self.weights -= lr * dW
        self.bias -= lr * dZ
        return np.dot(self.weights.T, dZ)

##### 2.3. Activation Layer

In [44]:
class Activation(BaseLayer):
    def __init__(self, act_type: str = "reLU") -> None:
        self.act_type = act_type

    def linear(self, X: np.ndarray) -> np.ndarray:
        return X

    def d_linear(self, X: np.ndarray) -> np.ndarray:
        return 1

    def reLU(self, X: np.ndarray) -> np.ndarray:
        return np.maximum(0, X)

    def d_reLU(self, X: np.ndarray) -> np.ndarray:
        return np.where(X > 0, 1, 0)

    def sigmoid(self, X: np.ndarray) -> np.ndarray:
        return 1 / (1 + np.exp(-X))

    def d_sigmoid(self, X: np.ndarray) -> np.ndarray:
        return self.sigmoid(X) * (1 - self.sigmoid(X))

    def tanh(self, X: np.ndarray) -> np.ndarray:
        return np.tanh(X)

    def d_tanh(self, X: np.ndarray) -> np.ndarray:
        return 1 - np.tanh(X) ** 2

    def get_activation(self, X: np.ndarray) -> np.ndarray:
        if self.act_type == "linear":
            return self.linear(X)
        elif self.act_type == "reLU":
            return self.reLU(X)
        elif self.act_type == "sigmoid":
            return self.sigmoid(X)
        elif self.act_type == "tanh":
            return self.tanh(X)
        else:
            raise ValueError("Invalid activation type")

    def get_d_activation(self, X: np.ndarray) -> np.ndarray:
        if self.act_type == "linear":
            return self.d_linear(X)
        elif self.act_type == "reLU":
            return self.d_reLU(X)
        elif self.act_type == "sigmoid":
            return self.d_sigmoid(X)
        elif self.act_type == "tanh":
            return self.d_tanh(X)
        else:
            raise ValueError("Invalid activation type")

    def forward(self, X: np.ndarray) -> np.ndarray:
        self.X = X
        return self.get_activation(self.X)

    def backpropagation(self, dZ: np.ndarray, lr: float) -> np.ndarray:
        return np.multiply(dZ, self.get_d_activation(self.X))

#### 3. Convolutional Neural Network (CNN) Model class

#### 4. Training the model

In [45]:
X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
Y = np.reshape([[0], [1], [1], [0]], (4, 1, 1))

In [46]:
network = [Dense(2, 3), Activation("tanh"), Dense(3, 1), Activation("tanh")]

In [47]:
epochs = 1000
lr = 0.1

In [50]:
for e in range(epochs):
    error = 0
    for x, y in zip(X, Y):
        for layer in network:
            x = layer.forward(x)

        error += Cost().get_cost(y, x)

        dZ = Cost().get_d_cost(y, x)
        for layer in network[::-1]:
            dZ = layer.backpropagation(dZ, lr)

    error /= len(X)
    print(f"Epoch {e + 1}/{epochs} - Error: {error}")

Epoch 1/1000 - Error: 0.0004408531663266601
Epoch 2/1000 - Error: 0.00044132807362769124
Epoch 3/1000 - Error: 0.0004383127110737105
Epoch 4/1000 - Error: 0.0004387732086667791
Epoch 5/1000 - Error: 0.00043583384362402903
Epoch 6/1000 - Error: 0.00043628439994462066
Epoch 7/1000 - Error: 0.0004334124260921905
Epoch 8/1000 - Error: 0.0004338575134598741
Epoch 9/1000 - Error: 0.000431045168268228
Epoch 10/1000 - Error: 0.0004314892692395029
Epoch 11/1000 - Error: 0.0004287295117716031
Epoch 12/1000 - Error: 0.00042917713448864617
Epoch 13/1000 - Error: 0.0004264635431734209
Epoch 14/1000 - Error: 0.00042691924408367677
Epoch 15/1000 - Error: 0.00042424593135643275
Epoch 16/1000 - Error: 0.00042471434428457245
Epoch 17/1000 - Error: 0.000422075885691768
Epoch 18/1000 - Error: 0.00042256175672600625
Epoch 19/1000 - Error: 0.0004199531326828663
Epoch 20/1000 - Error: 0.0004204613607314121
Epoch 21/1000 - Error: 0.00041787790963227547
Epoch 22/1000 - Error: 0.00041841359284367294
Epoch 23/10

In [52]:
for x, y in zip(X, Y):
    for layer in network:
        x = layer.forward(x)

    print(
        f"Input: {np.reshape(x, (1, 1))[0][0]} - Prediction: {np.reshape(y, (1, 1))[0][0]}"
    )

Input: -0.005486787307183797 - Prediction: 0
Input: 0.9837989355905562 - Prediction: 1
Input: 0.984093789813408 - Prediction: 1
Input: -0.02292253188105641 - Prediction: 0
