In [53]:
import numpy as np
from numpy import ndarray
from typing import List

In [54]:
# The Building Blocks of Neural Networks
def assert_same_shape(array: ndarray, array_grad: ndarray):
    assert array.shape == array_grad.shape, f"array and grad shapes do not match:  {array.shape} != {array_grad.shape}"

a, b = np.array([1, 2, 3]), np.array([[1], [2]])
# assert_same_shape(a, b)

In [55]:
# Operations
class Operation:
    def forward(self, input_:ndarray):
        self.input_ = input_
        self.output = self._output()
        return self.output
    
    def backward(self, output_grad: ndarray) -> ndarray:
        assert_same_shape(self.output, output_grad)
        self.input_grad = self._input_grad(output_grad)
        assert_same_shape(self.input_, self.input_grad)
        return self.input_grad

    def _output(self) -> ndarray:
        raise NotImplementedError

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        raise NotImplementedError

class ParamOperation(Operation):
    def __init__(self, param: ndarray):
        self.param = param

    def backward(self, output_grad: ndarray) -> ndarray:
        super().backward(output_grad)
        self.param_grad = self._param_grad(output_grad)
        assert_same_shape(self.param, self.param_grad)
        return self.input_grad

    def _param_grad(self, output_grad: ndarray) -> ndarray:
        raise NotImplementedError



In [62]:
# Layers
class WeightMultiply(ParamOperation):
    def _output(self) -> ndarray:
        return np.dot(self.input_, self.param)

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        return np.dot(output_grad, np.transpose(self.param))

    def _param_grad(self, output_grad: ndarray) -> ndarray:
        return np.dot(np.transpose(self.input_), output_grad)

class BiasAdd(ParamOperation):
    def __ini__(self, B: ndarray):
        assert B.shape[0] == 1
        super().__ini__(B)

    def _output(self) -> ndarray:
        return self.input_ + self.param

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        return np.one_likes(self.input_) * output_grad

    def _param_grad(self, output_grad: ndarray) -> ndarray:
        param_grad = np.one_likes(self.param) * output_grad
        return np.sum(param_grad, axis=0).reshape(1, param_grad.shape[1])

class Sigmoid(Operation):
    def _output(self) -> ndarray:
        return 1 / (1 + np.exp(-1 * self.input_))

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        sigmoid_backward = self._output() * (1 - self._output())
        return sigmoid_backward * output_grad

class Linear(Operation):
    def _output(self) -> ndarray:
        return self.input_

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        return output_grad