In [66]:
import numpy as np

np.random.seed(0)

## Problem

In [67]:
X = np.random.rand(1000, 2)
y = 50 * np.exp(np.sin(X))

In [68]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [69]:
X_train.shape, y_train.shape

((670, 2), (670, 2))

In [70]:
in_size = 2
hid_1_size = 3
hid_2_size = 4
hid_3_size = 3
out_size = 2

sizes = [in_size, hid_1_size, hid_2_size, hid_3_size, out_size]

# in_size = 2
# hid_2_size = 5
# hid_3_size = 3
# out_size = 2

# sizes = [in_size, hid_2_size, hid_3_size, out_size]

In [71]:
W = []

for i in range(1, len(sizes)):
    W.append(np.random.rand(sizes[i], sizes[i-1]))
    print(f"W_l{i} | Layer {i}, with index {i-1}")
    print(W[i-1])
    print("\n")


W_l1 | Layer 1, with index 0
[[0.81151847 0.47608399]
 [0.52315599 0.25052059]
 [0.60504302 0.30290481]]


W_l2 | Layer 2, with index 1
[[0.57728401 0.16967812 0.15946909]
 [0.41702974 0.42681952 0.26810926]
 [0.13159685 0.03921054 0.02523183]
 [0.27155029 0.46185344 0.72624328]]


W_l3 | Layer 3, with index 2
[[0.4748717  0.90405082 0.0352198  0.18066062]
 [0.33851449 0.57749619 0.85273616 0.35020195]
 [0.26798868 0.06188917 0.82130348 0.37966644]]


W_l4 | Layer 4, with index 3
[[0.5715502  0.98355542 0.00159457]
 [0.14545014 0.77911099 0.80512749]]




In [72]:
z = []
a = []


for i in range(1, len(sizes)):
    z.append(np.zeros(sizes[i]))
    a.append(np.zeros(sizes[i]))
    print(f"z_l{i} | Layer {i}, with index {i-1}:")
    print(z[i-1])
    print(f"a_l{i} | Layer {i}, with index {i-1}:")
    print(a[i-1])
    print("\n")

z_l1 | Layer 1, with index 0:
[0. 0. 0.]
a_l1 | Layer 1, with index 0:
[0. 0. 0.]


z_l2 | Layer 2, with index 1:
[0. 0. 0. 0.]
a_l2 | Layer 2, with index 1:
[0. 0. 0. 0.]


z_l3 | Layer 3, with index 2:
[0. 0. 0.]
a_l3 | Layer 3, with index 2:
[0. 0. 0.]


z_l4 | Layer 4, with index 3:
[0. 0.]
a_l4 | Layer 4, with index 3:
[0. 0.]




In [73]:
def forward(xi, log=False):
        
    # Input
    if log: print("Input x:")
    x = np.array([xi]).T
    if log: print(x, "\n")

    z[0] = W[0] @ x
    a[0] = 1 / (1 + np.exp(-z[0]))
    if log: print(f"W_l{1}", "\n", W[0], "\n")
    if log: print(f"z_l{1}", "\n", z[0], "\n")
    if log: print(f"a_l{1}", "\n", a[0], "\n")

    # Hidden layers, and output
    for i in range(1, len(sizes)-1):

        if log: print(f"Layer {i+1}:")
        z[i] = W[i] @ a[i-1]

        if i != len(sizes)-2:
            a[i] = 1 / (1 + np.exp(-z[i]))
            if log: print(f"W_l{i+1}", "\n", W[i], "\n")
            if log: print(f"z_l{i+1}", "\n", z[i], "\n")
            if log: print(f"a_l{i+1}", "\n", a[i], "\n")
        else:
            a[i] = z[i]
            if log: print(f"W_l{i+1}", "\n", W[i], "\n")
            if log: print(f"z_l{i+1}", "\n", z[i], "\n")
            if log: print("y", "\n", a[i], "\n")                

    return a[len(sizes)-2]


def loss_MSE(x, t, log=False):
    # MSE
    if log: print("t" "\n", t, "\n")

    try:
        MSE = 0.5 * ( (t[0] - x[0])** 2 + (t[1] - x[1])** 2 )
        return MSE
    except Exception as e:
        print(e)


def backward(x, t):

    # Initialization
    W_grad = []
    for w in W:
        W_grad.append(np.zeros_like(w))

    delta = []
    for i in range(1, len(sizes)):
        delta.append(np.zeros(sizes[i]))

    # -------------------------------
    # Final LAYER
    # --> compute delta
    out_size = sizes[-1]
    hid_last_size = sizes[-2]
    L = len(sizes)-2
    for i in range(out_size):
        # delta^L_i = y_i - t_1
        delta[L][i] = a[L][i] - t[i]

    # --> compute W_grad
    for i in range(out_size):
        for j in range(hid_last_size):
            W_grad[L][i,j] = delta[L][i] * a[L-1][j]

    # -------------------------------
    # ALL OTHER LAYER 
    for l in range(L, 0, -1):
        # --> compute delta
        next_size = sizes[l+1]
        prev_size = sizes[l]
        for k in range(prev_size):
            acc = 0
            for i in range(next_size):
                acc += delta[l][i] * W[l][i,k] * a[l-1][k] * (1 - a[l-1][k])
            delta[l-1][k] = acc

        # --> compute W_grad
        pre_prev_size = sizes[l-1]
        for i in range(prev_size):
            for j in range(pre_prev_size):
                if l!=1:
                    W_grad[l-1][i,j] = delta[l-1][i] * a[l-2][j]
                else:
                    W_grad[l-1][i,j] = delta[l-1][i] * x[j]

    # UPDATE WEIGHTS
    mu = 0.0001
    for idx, w_grad in enumerate(W_grad):
        W[idx] = W[idx] - mu * w_grad

In [74]:
EPOCHS = 200
for ep in range(EPOCHS):
    for i in range(X_train.shape[0]):
        target = y_train[i]
        out = forward(X_train[i], False)
        loss = loss_MSE(out, target)
        backward(X_train[i], target)

    if ep%10==0:
        loss = 0
        for j in range(X_train.shape[0]):
            target = y_train[j]
            out = forward(X_train[j])
            loss += loss_MSE(out, target)
        l_train = loss / X_train.shape[0]

        loss = 0
        for j in range(X_test.shape[0]):
            target = y_test[j]
            out = forward(X_test[j])
            loss += loss_MSE(out, target)
        l_test = loss / X_test.shape[0]

        print(f"Epoch: {ep}, \t Test loss: {l_test}, \t Train loss: {l_train}")

Epoch: 0, 	 Test loss: [4847.52288109], 	 Train loss: [4878.71278938]
Epoch: 10, 	 Test loss: [490.64253482], 	 Train loss: [474.52705404]
Epoch: 20, 	 Test loss: [418.30279043], 	 Train loss: [395.84457943]
Epoch: 30, 	 Test loss: [417.74046541], 	 Train loss: [394.43608822]
Epoch: 40, 	 Test loss: [417.78257187], 	 Train loss: [394.36965218]
Epoch: 50, 	 Test loss: [417.7199754], 	 Train loss: [394.29993958]
Epoch: 60, 	 Test loss: [417.59226099], 	 Train loss: [394.18419922]
Epoch: 70, 	 Test loss: [417.3413959], 	 Train loss: [393.96182856]
Epoch: 80, 	 Test loss: [416.71129507], 	 Train loss: [393.41174527]
Epoch: 90, 	 Test loss: [414.42539062], 	 Train loss: [391.48224441]
Epoch: 100, 	 Test loss: [410.25953906], 	 Train loss: [387.88317087]
Epoch: 110, 	 Test loss: [407.03241988], 	 Train loss: [384.87217515]
Epoch: 120, 	 Test loss: [404.83911817], 	 Train loss: [382.79803839]
Epoch: 130, 	 Test loss: [403.28506391], 	 Train loss: [381.34094458]
Epoch: 140, 	 Test loss: [402.1

In [76]:
# Prediction
for i in range(5):
    out = forward(X_test[i])
    loss = loss_MSE(out, y_test[i])
    print(f"input \t{X_test[i][0]} {X_test[i][1]}")
    print(f"target \t{y_test[i][0]} {y_test[i][1]}")
    print(f"out \t{out[0][0]} {out[1][0]}")
    print(f"loss \t{loss} \n")

input 	0.6267064759591049 0.7275436095907498
target 	89.88250476831203 97.22814149547783
out 	82.06237339078395 81.53286394019605
loss 	[153.74809615] 

input 	0.32147293085420525 0.47418481226362175
target 	68.57906974666787 78.93591811610015
out 	81.29021082183004 80.76568449634547
loss 	[82.46057622] 

input 	0.05790927689626024 0.29138882053690274
target 	52.97922848956347 66.64005232873065
out 	80.22248897216187 79.70485419222098
loss 	[456.44214473] 

input 	0.6147698861442988 0.037129603891447815
target 	89.01399117162639 51.89093326749542
out 	81.04846741619382 80.52550237411454
loss 	[441.69405831] 

input 	0.4254017253550547 0.06355377483615843
target 	75.54358249126678 53.27856036812152
out 	80.65259633515664 80.13218626624881
loss 	[373.60962317] 

