# Activation Function

In [17]:
import numpy as np

In [18]:
features = np.array([[28.1, 58.0],
                     [22.5, 72.0],
                     [31.4, 45.0],
                     [19.8, 85.0],
                     [27.6, 63]])
labels = np.array([[165],
                   [95],
                   [210],
                   [70],
                   [155]])

In [19]:
np.random.seed(99)

hidden_weight = np.random.rand(4, 2) / 2
hidden_bias = np.zeros(4)
output_weight = np.random.rand(1, 4) / 4
output_bias = np.zeros(1)

In [20]:
def forward(x, w, b):
    return x @ w.T + b

def backward(x, d, w, b, lr):
    return w - d.T @ x * lr / len(x), b - np.sum(d, axis=0) * lr / len(x)

In [21]:
def relu(x):
    return np.maximum(0, x)

def relu_backward(y, d):
    return (y > 0) * d

In [22]:
def mse_loss(p, y):
    return ((p - y) ** 2).mean()

def gradient(p, y):
    return (p - y) * 2

def gradient_backward(d, w):
    return d @ w

In [23]:
LEARNING_RATE = 0.00001
BATCHES = 2
EPOCHS = 100

In [24]:
for epoch in range(EPOCHS):
    print(f"epoch: {epoch}")

    for i in range(0, len(features), BATCHES):
        feature = features[i: i + BATCHES]
        label = labels[i: i + BATCHES]

        hidden = relu(forward(feature, hidden_weight, hidden_bias))
        prediction = forward(hidden, output_weight, output_bias)
        print(f'prediction: {prediction}')

        error = mse_loss(prediction, label)
        print(f'error: {error}')

        output_delta = gradient(prediction, label)
        hidden_delta = relu_backward(hidden, gradient_backward(output_delta, output_weight))

        output_weight, output_bias = backward(hidden, output_delta, output_weight, output_bias, LEARNING_RATE)
        print(f"output weight: {output_weight}")
        print(f"output bias: {output_bias}")

        hidden_weight, hidden_bias = backward(feature, hidden_delta, hidden_weight, hidden_bias, LEARNING_RATE)
        print(f"hidden weight: {hidden_weight}")
        print(f"hidden bias: {hidden_bias}")

epoch: 0
prediction: [[12.24107503]
 [12.84944226]]
error: 15042.001647559144
output weight: [[0.30435532 0.02937598 0.25904367 0.19927976]]
output bias: [0.00234909]
hidden weight: [[0.35134767 0.28063015]
 [0.41285238 0.01597532]
 [0.41584306 0.31124267]
 [0.16027583 0.0509313 ]]
hidden bias: [5.81769425e-04 4.00857355e-06 4.52079205e-04 4.38556683e-04]
prediction: [[16.07615691]
 [20.14118528]]
error: 20046.17916320456
output weight: [[0.36560205 0.06066227 0.32882282 0.21722563]]
output bias: [0.00478692]
hidden weight: [[0.37288511 0.32008851]
 [0.41493114 0.01978379]
 [0.43417406 0.34482657]
 [0.17437769 0.07676707]]
hidden bias: [1.32373490e-03 7.56221291e-05 1.08358274e-03 9.24366175e-04]
prediction: [[25.09113252]]
error: 16876.313848797945
output weight: [[0.44473871 0.09365536 0.41640312 0.24229824]]
output bias: [0.0073851]
hidden weight: [[0.39910232 0.37993215]
 [0.41928122 0.0297133 ]
 [0.45775385 0.39864999]
 [0.18995487 0.11232369]]
hidden bias: [0.00227363 0.00023323 