# Hidden Layer

In [11]:
from abc import abstractmethod, ABC
import numpy as np

In [12]:
class Tensor:

    def __init__(self, data):
        self.data = np.array(data)

        self.grad = None
        self.gradient_fn = lambda: None
        self.parents = set()

    def gradient(self):
        if self.gradient_fn:
            self.gradient_fn()

        for p in self.parents:
            p.gradient()

    def size(self):
        return self.data.shape[-1]

In [13]:
class DataLoader:

    def __init__(self, batch_size):
        self.batch_size = batch_size

        self.features = [[28.1, 58.0],
                         [22.5, 72.0],
                         [31.4, 45.0],
                         [19.8, 85.0],
                         [27.6, 63]]
        self.labels = [[165],
                       [95],
                       [210],
                       [70],
                       [155]]

    def __len__(self):  # 3
        return len(self.features)

    def __getitem__(self, index):  # 4
        return (Tensor(self.features[index: index + self.batch_size]),
                Tensor(self.labels[index: index + self.batch_size]))

In [14]:
class Layer(ABC):

    def __call__(self, x: Tensor):
        return self.forward(x)

    @abstractmethod
    def forward(self, x: Tensor):
        pass

    def parameters(self):
        return []

In [15]:
np.random.seed(99)

class Linear(Layer):

    def __init__(self, in_size, out_size):
        self.in_size = in_size
        self.out_size = out_size

        self.weight = Tensor(np.random.rand(out_size, in_size) / in_size)
        self.bias = Tensor(np.zeros(out_size))

    def forward(self, x: Tensor):
        p = Tensor(x.data @ self.weight.data.T + self.bias.data)

        def gradient_fn():
            self.weight.grad = p.grad.T @ x.data / len(x.data)
            self.bias.grad = np.sum(p.grad, axis=0) / len(x.data)
            x.grad = p.grad @ self.weight.data

        p.gradient_fn = gradient_fn
        p.parents = {self.weight, self.bias, x}
        return p

    def parameters(self):
        return [self.weight, self.bias]

In [16]:
class Sequential(Layer):

    def __init__(self, layers):
        self.layers = layers

    def forward(self, x: Tensor):
        for l in self.layers:
            x = l(x)
        return x

    def parameters(self):
        return [p for l in self.layers for p in l.parameters()]

In [17]:
class MSELoss:

    def __call__(self, p: Tensor, y: Tensor):
        mse = Tensor(((p.data - y.data) ** 2).mean())

        def gradient_fn():
            p.grad = (p.data - y.data) * 2

        mse.gradient_fn = gradient_fn
        mse.parents = {p}
        return mse

In [18]:
class SGD:

    def __init__(self, parameters, lr):
        self.parameters = parameters
        self.lr = lr

    def backward(self):
        for p in self.parameters:
            p.data -= p.grad * self.lr

In [19]:
LEARNING_RATE = 0.00001
BATCHES = 2
EPOCHS = 100

In [20]:
dataset = DataLoader(BATCHES)

feature, label = dataset[0]
model = Sequential([Linear(feature.size(), 4),
                    Linear(4, label.size())])
loss = MSELoss()
sgd = SGD(model.parameters(), LEARNING_RATE)

for epoch in range(EPOCHS):
    print(f"epoch: {epoch}")

    for i in range(0, len(dataset), dataset.batch_size):
        feature, label = dataset[i]

        prediction = model(feature)
        print(f'prediction: {prediction.data}')

        error = loss(prediction, label)
        print(f'error: {error.data}')

        error.gradient()
        sgd.backward()
        print(f"output weight: {model.layers[1].weight.data}")
        print(f"output bias: {model.layers[1].bias.data}")
        print(f"hidden weight: {model.layers[0].weight.data}")
        print(f"hidden bias: {model.layers[0].bias.data}")

epoch: 0
prediction: [[12.24107503]
 [12.84944226]]
error: 15042.001647559144
output weight: [[0.30435532 0.02937598 0.25904367 0.19927976]]
output bias: [0.00234909]
hidden weight: [[0.35134767 0.28063015]
 [0.41285238 0.01597532]
 [0.41584306 0.31124267]
 [0.16027583 0.0509313 ]]
hidden bias: [5.81769425e-04 4.00857355e-06 4.52079205e-04 4.38556683e-04]
prediction: [[16.07615691]
 [20.14118528]]
error: 20046.17916320456
output weight: [[0.36560205 0.06066227 0.32882282 0.21722563]]
output bias: [0.00478692]
hidden weight: [[0.37288511 0.32008851]
 [0.41493114 0.01978379]
 [0.43417406 0.34482657]
 [0.17437769 0.07676707]]
hidden bias: [1.32373490e-03 7.56221291e-05 1.08358274e-03 9.24366175e-04]
prediction: [[25.09113252]]
error: 16876.313848797945
output weight: [[0.44473871 0.09365536 0.41640312 0.24229824]]
output bias: [0.0073851]
hidden weight: [[0.39910232 0.37993215]
 [0.41928122 0.0297133 ]
 [0.45775385 0.39864999]
 [0.18995487 0.11232369]]
hidden bias: [0.00227363 0.00023323 