# Tugas Besar IF4074 - Pembelajaran Mesin Lanjut
# Implementasi Convolutional Neural Network

# Simple CNN
**Simple CNN** is a convolutional neural network implemented in Python and fine-tuned using backpropagation algorithm.

## Setup
Assuming you've installed the latest version of Python (if not, guides for it are widely available),
1. ensure pip is installed by running `python -m ensurepip --upgrade`;
2. install the Python dependencies by running `pip install -r requirements.txt`.

## Contribution (Milestone 1)
| NIM      | Name                   | Contribution(s)                                                       |
|----------|------------------------|-----------------------------------------------------------------------|
| 13520041 | Ilham Pratama          | Dataset handling; Detector, Pooling, Dense, and Flatten layer; Report |
| 13520042 | Jeremy S.O.N. Simbolon | Class model; Convolutional layer; Report                              |


### Library Import

In [1]:
import math
import os

from typing import Any

import cv2
import jsonpickle
import jsonpickle.ext.numpy
import numpy as np
import numpy.typing as npt

from scipy.special import expit

### Dataset Loading

In [2]:
class Utils:
    """
    Module related utility functions.

    This class is used to prepare the image dataset for the CNN model. In
    addition, this class is also used to save and load the CNN model.
    """

    @staticmethod
    def load_dataset(dataset_path: str) -> tuple[npt.NDArray, npt.NDArray, dict]:
        """
        Preprocess the dataset and return useful information for further processing.

        :param dataset_path: A string representation of the path pointing to
                             the dataset.
        :return: A tuple consisted of an ndarray of dataset image path, an
                 ndarray of image labels, and a dictionary that maps class
                 labels to folder name.
        """
        folder_list = sorted(os.listdir(dataset_path))
        image_path = []
        image_label = np.array([], dtype=np.int16)
        image_dictionary = {}
        for i, folder_name in enumerate(folder_list):
            class_folder_path = os.path.join(dataset_path, folder_name)
            list_image_name = sorted(os.listdir(class_folder_path))
            temp_folder_path = [os.path.join(class_folder_path, image_name) for image_name in list_image_name]

            image_path += temp_folder_path
            temp_class_label = np.full(len(list_image_name), i, dtype=np.int16)
            image_label = np.concatenate((image_label, temp_class_label), axis=0)
            image_dictionary[str(i)] = folder_name

        return np.asarray(image_path), image_label, image_dictionary

    @staticmethod
    def convert_image_to_matrix(path: npt.NDArray) -> npt.NDArray:
        """
        Convert the image dataset into a list of ndarray.

        Each ndarray is an RGB representation of each image in the dataset.

        :param path: An ndarray of string representation of the path pointing
                     to each image entry in the dataset.
        :return: A list of ndarray representation of the image in the dataset.
        """
        list_of_image_matrix = []
        size = (256, 256)

        for file_img in path:
            image = cv2.imread(file_img, 1)
            matrix = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            matrix = cv2.resize(matrix, size)
            list_of_image_matrix.append(matrix)

        return np.array(list_of_image_matrix)

    @staticmethod
    def save_model(model_object: "Model", file_name: str = "model.json") -> None:
        jsonpickle.ext.numpy.register_handlers()
        with open(file_name, "w") as file:
            json = jsonpickle.encode(model_object, indent=4)
            file.write(json)

    @staticmethod
    def load_model(file_name: str = "model.json") -> "Model":
        jsonpickle.ext.numpy.register_handlers()
        with open(file_name, "r") as file:
            json = file.read()
            return jsonpickle.decode(json)

### Model Representation

In [3]:
class Model:
    """
    The convolutional neural network model used to classify images.
    """

    def __init__(self) -> None:
        """
        Instantiate the convolutional neural network model.
        """
        self._layers = []
        self._result = []

    class Layer:
        """
        Base representation of the layer used as part of the convolutional
        neural network architecture.
        """

        def __init__(self, name) -> None:
            """
            Instantiate the base layer.

            :param name: Name of the layer.
            """
            self._name = name

        def feed_forward(self) -> None:
            """Indicate the forward propagation is being performed."""
            print(f"Performing feed forward on {self._name} layer...")
            print()

    class ConvolutionLayer(Layer):
        """
        The convolutional layer in convolutional neural network.

        This class is inherited from the ``Layer`` class. This layer is used
        to perform the convolution operation on the input weights.
        """

        def __init__(
            self,
            filter_count: int,
            filter_size: tuple[int, int] = (32, 32),
            padding_size: int = 0,
            stride_size: tuple[int, int] = (1, 1),
        ) -> None:
            """
            Instantiate the convolutional layer.

            :param filter_count: An integer specifying the amount of feature
                                 to be extracted in the form of the amount of
                                 filters.
            :param filter_size: A tuple of two integers specifying the height
                                and width of the convolution filter.
            :param padding_size: An integer specifying the dimension of 0's to
                                 be added around the weight.
            :param stride_size: A tuple of two integers specifying the pixel
                                step size along the height and width of the
                                input weight.
            """
            super().__init__("convolution")
            self._filter_count = filter_count
            self._filter_dimension = 0
            self._filter_height, self._filter_width = filter_size
            self._filter_weights = None
            self._padding_size = padding_size
            self._stride_height, self._stride_width = stride_size
            self._output_height = 0
            self._output_width = 0
            self._weight_dimension = 0
            self._weight_height = 0
            self._weight_width = 0
            self._biases = None

        def _pad_weights(
            self,
            weights: npt.NDArray[npt.NDArray[npt.NDArray[float]]],
            padding_size: int,
        ) -> npt.NDArray[npt.NDArray[npt.NDArray[float]]]:
            """
            Pad the specified weights with 0's around it.

            :param weights: The ndarray of weights to be padded with 0's.
            :param padding_size: An integer specifying the dimension of 0's to
                                 be added around the weight.
            :return: An ndarray of weights padded with 0's.
            """
            self._weight_dimension = len(weights)

            self._weight_height = (weight_height := len(weights[0])) + 2 * padding_size
            self._weight_width = (weight_width := len(weights[0][0])) + 2 * padding_size

            padded_weights = [
                [
                    [
                        weights[i][j - padding_size][k - padding_size]
                        if padding_size <= j < weight_height + padding_size
                        or padding_size <= k < weight_width + padding_size
                        else 0.0
                        for k in range(self._weight_width)
                    ]
                    for j in range(self._weight_height)
                ]
                for i in range(self._weight_dimension)
            ]

            return np.array(padded_weights)

        def convolute(
            self,
            weights: npt.NDArray[npt.NDArray[npt.NDArray[float]]],
        ) -> npt.NDArray[npt.NDArray[npt.NDArray[float]]]:
            """
            Perform the convolution operation on the input weights.

            :param weights: An ndarray of input weights.
            :return: An ndarray of features extracted from the weights.
            """
            self._filter_dimension = len(weights)
            self._output_height = (
                math.ceil((len(weights[0]) - self._filter_height + 2 * self._padding_size) / self._stride_height) + 1
            )
            self._output_width = (
                math.ceil((len(weights[0][0]) - self._filter_width + 2 * self._padding_size) / self._stride_width) + 1
            )

            if self._filter_weights is None:
                self._filter_weights = np.random.rand(
                    self._filter_count,
                    self._filter_dimension,
                    self._filter_height,
                    self._filter_width,
                )
            if self._biases is None:
                self._biases = np.random.rand(self._filter_count, self._output_height, self._output_width)

            feature_maps = np.copy(self._biases)
            weights = self._pad_weights(weights, self._padding_size)
            for i in range(self._filter_count):
                for j in range(0, self._weight_height - self._filter_height + 1, self._stride_height):
                    for k in range(0, self._weight_width - self._filter_width + 1, self._stride_width):
                        for l in range(self._filter_dimension):
                            field = weights[l, j : j + self._filter_height, k : k + self._filter_width]
                            feature = field * self._filter_weights[i][l]
                            feature_maps[i][j][k] += np.sum(feature)
            return feature_maps

        def feed_forward(
            self, weights: npt.NDArray[npt.NDArray[npt.NDArray[float]]]
        ) -> npt.NDArray[npt.NDArray[npt.NDArray[float]]]:
            """
            Indicate and perform the convolution process on the input weights.

            :param weights: The ndarray of weights to be convoluted.
            :return: An ndarray of convoluted weights.
            """
            super().feed_forward()
            result = self.convolute(weights)
            print("Convolution result: ")
            print(result)
            print()
            return result

    class DetectorLayer(Layer):
        """
        The detector layer in convolutional neural network.

        This class is inherited from the ``Layer`` class. This layer is used to
        introduce non-linearity to the learning process using the reLU
        activation function.
        """

        def __init__(self) -> None:
            """Instantiate the detector layer."""
            super().__init__("detector")

        @staticmethod
        def detect(feature: npt.NDArray) -> npt.NDArray:
            """
            Apply the reLU activation function on the input weights.

            :param feature: An ndarray of input weights.
            :return: An ndarray of weights on which the reLU function has been
                     applied.
            """
            return np.maximum(feature, 0)

        def feed_forward(self, feature: npt.NDArray) -> npt.NDArray:
            """
            Indicate and perform the detector process on the input weights.

            :param feature: The ndarray of weights on which reLU function is
                            to be applied.
            :return: An ndarray of activated weights.
            """
            super().feed_forward()
            result = self.detect(feature)
            print("Detector result: ")
            print(result)
            print()
            return result

    class PoolingLayer(Layer):
        """
        The pooling layer in convolutional neural network.

        This class is inherited from the ``Layer`` class. This layer is used to
        down-sample the input weights according to the specified pooling
        operation.
        """

        def __init__(self, filter_size: int, stride_size: int, mode: str = "max") -> None:
            """
            Instantiate the pooling layer.

            :param filter_size: An integer specifying the dimension of the
                                pooling window.
            :param stride_size: An integer specifying the pixel step size along
                                the height and width of the input weight.
            :param mode: A string specifying the preferred pooling operation.
                         Must either be ``average`` or ``max``.
            """
            super().__init__("pooling")
            self._filter_size = filter_size
            self._stride_size = stride_size
            self._mode = mode

        def average(self, input_matrix: npt.NDArray, d: int, h: int, w: int) -> float:
            """
            Take the average of the input values over the pooling window.

            :param input_matrix: The ndarray of weights on which the operation
                                 is applied.
            :param d: An integer specifying the depth location of the pooling
                      window.
            :param h: An integer specifying the height location of the pooling
                      window.
            :param w: An integer specifying the width location of the pooling
                      window.
            :return: The average of the input values.
            """
            h_start = h * self._stride_size
            w_start = w * self._stride_size
            h_end = h_start + self._filter_size
            w_end = w_start + self._filter_size
            return np.average(input_matrix[d, h_start:h_end, w_start:w_end])

        def max(self, input_matrix: npt.NDArray, d: int, h: int, w: int) -> float:
            """
            Take the maximum of the input values over the pooling window.

            :param input_matrix: The ndarray of weights on which the operation
                                 is applied.
            :param d: An integer specifying the depth location of the pooling
                      window.
            :param h: An integer specifying the height location of the pooling
                      window.
            :param w: An integer specifying the width location of the pooling
                      window.
            :return: The maximum of the input values.
            """
            h_start = h * self._stride_size
            w_start = w * self._stride_size
            h_end = h_start + self._filter_size
            w_end = w_start + self._filter_size
            return np.max(input_matrix[d, h_start:h_end, w_start:w_end])

        def pool(self, input_matrix: npt.NDArray) -> npt.NDArray:
            """
            Perform the pooling operation on the input weights.

            :param input_matrix: An ndarray of input weights.
            :return: An ndarray of down-sampled weights.
            """
            depth, height, width = input_matrix.shape
            filter_height = (height - self._filter_size) // self._stride_size + 1
            filter_width = (width - self._filter_size) // self._stride_size + 1
            pooled = np.zeros([depth, filter_height, filter_width], dtype=np.double)
            for d in range(0, depth):
                for h in range(0, filter_height):
                    for w in range(0, filter_width):
                        if self._mode == "average":
                            pooled[d, h, w] = self.average(input_matrix, d, h, w)
                        elif self._mode == "max":
                            pooled[d, h, w] = self.max(input_matrix, d, h, w)
            return pooled

        def feed_forward(self, input_matrix: npt.NDArray) -> npt.NDArray:
            """
            Indicate and perform the pooling operation on the input weights.

            :param input_matrix: An ndarray of input weights.
            :return: An ndarray of down-sampled weights.
            """
            super().feed_forward()
            result = self.pool(input_matrix)
            print("Pooling result: ")
            print(result)
            print()
            return result

    class DenseLayer(Layer):
        """
        The dense layer in convolutional neural network.

        This class is inherited from the ``Layer`` class. This layer is used to
        abstractly represent the input data using its weights.
        """

        def __init__(self, unit_count: int, activation: str = "sigmoid") -> None:
            """
            Instantiate the dense layer.

            :param unit_count: An integer specifying the dimension of the
                               output space.
            :param activation: The activation function to be applied to each
                               node. Must either be ``sigmoid`` or ``relu``.
            """
            super().__init__("dense")
            self._unit_count = unit_count
            self._activation = activation
            self._bias = np.zeros(unit_count)
            self._weight = np.random.randn(unit_count)

        def dense(self, input_matrix: npt.NDArray) -> npt.NDArray:
            """
            Perform the linear combination and activation of the input weights
            using the layer's weights.

            :param input_matrix: An ndarray of input weights.
            :return: An ndarray of linearly-combined and activated weights.
            """
            result = np.zeros(self._unit_count)

            for i in range(self._unit_count):
                input_weight = np.sum(self._weight[i] * input_matrix)
                result[i] = input_weight + self._bias[i]

            if self._activation == "sigmoid":
                return expit(result)
            elif self._activation == "relu":
                return np.maximum(result, 0)

        def feed_forward(self, input_matrix: npt.NDArray) -> npt.NDArray:
            """
            Indicate and perform the linear combination and activation of the
            input weights using the layer's weights.

            :param input_matrix: An ndarray of input weights.
            :return: An ndarray of linearly-combined and activated weights.
            """
            super().feed_forward()
            result = self.dense(input_matrix)
            print("Dense result: ")
            print(result)
            print()
            return result

    class FlattenLayer(Layer):
        """
        The flatten layer in convolutional neural network.

        This class is inherited from the ``Layer`` class. This layer is used to
        flatten the input weights.
        """

        def __init__(self) -> None:
            """Instantiate the flatten layer."""
            super().__init__("flatten")

        @staticmethod
        def flatten(input_matrix: npt.NDArray) -> npt.NDArray:
            """
            Perform the flatten operation on the input weights.

            :param input_matrix: An ndarray of input weights.
            :return: An ndarray of flatten weights.
            """
            return input_matrix.flatten()

        def feed_forward(self, input_matrix: npt.NDArray) -> npt.NDArray:
            """
            Indicate and perform the flatten operation on the input weights.

            :param input_matrix: An ndarray of input weights.
            :return: An ndarray of flatten weights.
            """
            super().feed_forward()
            result = self.flatten(input_matrix)
            print("Flatten result: ")
            print(result)
            print()
            return result

    def add_layer(self, name: str, **kwargs: Any) -> None:
        """
        Sequentially add the specified layer into the model.

        :param name: A string representation of the layer to be added.
        :param kwargs: Layer-related parameters in the form of key-value pairs.
        """
        match name:
            case "convolution":
                self._layers.append(self.ConvolutionLayer(**kwargs))
            case "detector":
                self._layers.append(self.DetectorLayer())
            case "pooling":
                self._layers.append(self.PoolingLayer(**kwargs))
            case "dense":
                self._layers.append(self.DenseLayer(**kwargs))
            case "flatten":
                self._layers.append(self.FlattenLayer())

    def forward_propagate(self, tensor: npt.NDArray) -> None:
        """
        Indicate and perform the forward propagation operation on the model.

        :param tensor: An ndarray of input weights representing the input
                       pictures.
        """
        for layer in self._layers:
            tensor = layer.feed_forward(tensor)
        print("Forward propagation result: ")
        print(tensor)
        self._result = tensor

    def backward_propagate(self) -> None:
        """
        Indicate and perform the backward propagation operation on the model.
        """
        pass

    def train(self, tensor: npt.NDArray[npt.NDArray]) -> None:
        """
        This is a docstring placeholder.

        :param tensor: An ndarray of representations of the input pictures to
                       be fed into the model.
        """
        pass

### Test result

In [4]:
folder_path, class_label, class_dictionary = Utils.load_dataset("./dataset")
image_matrix = Utils.convert_image_to_matrix(folder_path)
image_number = 0
image_matrix = [image_matrix[image_number]]

model = Model()
model.add_layer(
    "convolution",
    filter_count=32,
    filter_size=(3, 3),
    padding_size=0,
    stride_size=(1, 1),
)
model.add_layer("detector")
model.add_layer("pooling", filter_size=3, stride_size=2, mode="average")
model.add_layer("flatten")
model.add_layer("dense", unit_count=8, activation="relu")
model.add_layer("dense", unit_count=1, activation="sigmoid")
model.forward_propagate(image_matrix)

Performing feed forward on convolution layer...
Convolution result: 
[[[ 648.49656359  641.52144541  638.99898134 ...  614.39773912
    612.05264008  615.96808925]
  [ 640.2958566   636.21464776  637.14061266 ...  615.46199162
    612.83096313  616.60636706]
  [ 637.26780274  635.15181697  637.95385063 ...  616.28592164
    613.84294571  616.46747929]
  ...
  [ 247.46409385  191.63659062  137.63907593 ...   38.20088444
     73.84022579  199.50167162]
  [ 196.17667191  118.69304054  138.57464835 ...   80.74233723
     84.21460189  244.83939581]
  [ 142.671394     62.25614926   86.21873602 ...   99.7036421
     91.93258199  219.9353509 ]]

 [[ 977.5944704   965.87805988  959.96179834 ...  920.38347878
    917.51781425  922.34268052]
  [ 965.41419635  958.12757317  957.58488995 ...  923.71278104
    920.56192325  924.49532705]
  [ 959.53156334  954.97527866  956.81634506 ...  925.94677319
    922.600966    926.264062  ]
  ...
  [ 425.86931094  317.51873983  194.59234257 ...   56.12594987
