In [1]:
import pandas as pd
import numpy as np
import numpy.typing as npt
from typing import NamedTuple, Callable
from enum import Enum

In [2]:
Images = npt.NDArray[np.int8]
# Layer = NamedTuple('Layer', [
#     ('weight', npt.NDArray[np.float32]),
#     ('bias', npt.NDArray[np.float32]),
#     ('activation', Callable),
#     ('back_prop', Callable)
# ]
# )

# NN (Perceptron) implementation
- 2 layers (with 10 neurons each)
    - Hidden layer (Size of the amount of pixel it image has )
    - Output layer (indicates one digit from 0 to 9)
- 784 inputs (1 image of 28x28 pixels)


In [3]:
class ActivationFunctions(Enum):

    ReLU = 1
    SOFTMAX = 2


class Layer:

    weight: npt.NDArray[np.float32]
    bias: npt.NDArray[np.float32]
    activation: Callable
    derivate: Callable
    predict: np.ndarray | None

    def __init__(
        self,
        weight: npt.NDArray[np.float32],
        bias: npt.NDArray[np.float32],
        act: ActivationFunctions,
    ) -> None:

        self.weight = weight
        self.bias = bias

        self.predict = None
        self.act_name = act

        if act == ActivationFunctions.ReLU:
            self.activation = self._ReLU
            self.derivate = self._ReLU_derive

        elif act == ActivationFunctions.SOFTMAX:
            self.activation = self._softmax

    def __repr__(self) -> str:
        return f"Layer(neurons={self.weight.shape}, activation={self.act_name})"

    def _ReLU(self) -> np.ndarray:

        return np.maximum(self.z, 0)

    def _ReLU_derive(self):
        return self.z > 0

    def _softmax(self):
        calc = np.exp(self.z)

        return calc / np.sum(calc)

    def foward_propagation(self, input: np.ndarray):
        self._finput = input
        self.z = self.weight.dot(input) + self.bias
        self.predict = self.activation()

    def back_propagation(
        self,
        input: np.ndarray,
        size: int,
        prev_layer: "Layer | None" = None,
    ):
        if self.predict is None:
            raise ValueError("Wrong usage! Run foward_prop before")

        if prev_layer is None:
            self.dz = self.predict - input

        else:
            self.dz = prev_layer.weight.T.dot(prev_layer.dz) * self.derivate(self.z)

        self.dw = (1 / size) * self.dz.dot(self._finput.T)

        self.db = (1 / size) * np.sum(self.dz)

    def update_weights(self, alpha: float):
        self.weight = self.weight - alpha * self.dw
        self.bias = self.bias - alpha * self.db

In [12]:
class NeuralNetwork:

    _layers: list[Layer]
    _inputs: np.ndarray
    _bias: np.ndarray
    _targets_size: int

    def _encode_target(self, target: Images) -> npt.NDArray[np.int8]:
        """One hot encoding of the target predictions"""
        enc = np.zeros((target.size, target.max() + 1))  # array of N labels
        # insert 1 in the position that represents the label
        enc[np.arange(target.size), target] = 1
        return enc.astype(np.int8).T  # As coluns

    def __init__(self, inputs: npt.NDArray[np.float32], target: Images) -> None:

        self._inputs = inputs
        self._targets = self._encode_target(target)
        self._targets_size = target.size
        self._layers = []

    def add_layer(self, neurons_size: int, inputs_size: int, act: ActivationFunctions):

        weight = (np.random.rand(neurons_size, inputs_size) - 0.5).astype("float32")

        bias = (np.random.rand(neurons_size, 1) - 0.5).astype("float32")

        layer = Layer(weight, bias, act)
        self._layers.append(layer)

    def get_predictions(self):
        a = self._layers[-1].predict
        if a is None:
            raise Exception("Not trained")
        return np.argmax(a, 0)

    def accuracy(self) -> int:

        return np.sum(self.get_predictions() == self._targets) / self._targets_size

    def predict(self, input: np.ndarray):
        for layer in self._layers:
            layer.foward_propagation(input)
            predictions = self.get_predictions()
        return predictions

    def fit(self, iterations: int, alpha: float, logs_step: int = 0):
        self.data = [("Iteration", "Accuracy")]

        for i in range(iterations):
            _input = self._inputs
            for layer in self._layers:
                layer.foward_propagation(_input)  # type: ignore
                _input = layer.predict

            prev_layer = None
            layer_input = self._targets
            for layer in reversed(self._layers):
                prev_layer = layer.back_propagation(
                    input=layer_input,
                    size=self._targets_size,
                    prev_layer=prev_layer,  # type: ignore
                )

            for layer in self._layers:
                layer.update_weights(alpha)
            if logs_step != 0 and i % logs_step == 0:
                print(self.accuracy())
                self.data.append((i, self.accuracy()))

# Aux Functions

In [5]:
def get_train_test_data(
    data: Images, test_size: int
) -> tuple[tuple[Images, Images], tuple[Images, Images]]:
    """Split the data randomly into train and test data"""

    data = data.copy()

    rows, cols = data.shape

    np.random.shuffle(data)

    data_test = data[0:test_size].T

    y_test = data_test[0]
    x_test = data_test[1:cols]
    x_test = (x_test / 255.0).astype("int16")

    data_train = data[1000:rows].T

    x_train = (data_train[1:cols] / 255.0).astype("int16")

    y_train = data_train[0]

    # Split data into labels and input data

    return (x_train, y_train), (x_test, y_test)

# Import dataset's with the pixels

In [6]:
data = np.array(pd.read_csv("./digit-recognizer/train.csv", sep=",").astype("int16"))



test = pd.read_csv("./digit-recognizer/test.csv")

# Transponding dataset so each column is a example image

- Also shuffling the dataset (Prevent overfitting while spliting between train and test)

In [13]:
(x_train, y_train), (x_test, y_test) = get_train_test_data(data, 1000)

In [14]:
model = NeuralNetwork(x_train, y_train)  # type: ignore
model.add_layer(10, 784, ActivationFunctions.ReLU)
model.add_layer(10, 10, ActivationFunctions.SOFTMAX)

0.01624390243902439
0.019317073170731707
0.02129268292682927
0.022609756097560975
0.026341463414634145
0.028317073170731708


In [18]:
import plotly.express as px

df = pd.DataFrame(model.data[1:], columns=model.data[0])
px.line(x=df["Iteration"], y=df["Accuracy"])