In [1]:
import numpy as np
from sklearn.datasets import load_iris

In [2]:
iris = load_iris()

In [33]:
data = iris["data"]
target = iris["target"]
data = (data - data.mean(axis=0)) / data.std(axis=0)
data.shape, target.shape

((150, 4), (150,))

In [43]:
w1 = np.random.randn(4,16)
b1 = np.random.randn(16)
w2 = np.random.randn(16,3)
b2 = np.random.randn(3)

In [44]:
def relu(x):
    x = np.where(x < 0,0.0,x)
    return x
    
def softmax(x):
    x = np.exp(x)
    x = x / np.sum(x)
    return x
    
def forward(x):
    a = x@w1 + b1
    b = relu(a)
    c = b@w2 + b2
    d = softmax(c)
    return [x,a,b,c,d]
    
def backward(l,fp,target):
    x,a,b,c,d = fp
    oh = np.zeros(d.shape)
    oh[0,target] = 1.0
    dl_dc = d - oh #(1,3) cross entropy + softmax derivative
    dl_dw2 = b.T @ dl_dc # (16,3)
    dl_db2 = dl_dc #(1,3) 
    dc_db = w2.T #(3,16)
    dl_db = dl_dc @ dc_db #(1,16)
    db_da = np.where(a < 0,0.0,1.0) #(1,16)
    dl_da = db_da * dl_db #(1,16)
    dl_dw1 = x.T @ dl_da #(4,16)
    dl_db1 = dl_da #(1,16)
    return [dl_dw1,dl_db1.reshape(-1),dl_dw2,dl_db2.reshape(-1)]    
    
def loss(preds,target):
    preds = np.log(preds)
    loss = -preds[0,target]
    return loss

In [45]:
learning_rate = 0.01

for epoch in range(50):
    idxs = np.random.permutation(len(data))
    losses = []
    for x,y in zip(data[idxs],target[idxs]):
        x = x[None,:]
        fp = forward(x)
        l = loss(fp[-1],y)
        losses.append(l)
        gradients = backward(l,fp,y)
        for w, g in zip([w1,b1,w2,b2],gradients):
            w -= learning_rate * g
    print(f"Epoch {epoch} Loss: {np.mean(losses)}")

Epoch 0 Loss: 1.0496839349693763
Epoch 1 Loss: 0.3353357169062118
Epoch 2 Loss: 0.24412064284321208
Epoch 3 Loss: 0.20134505177339826
Epoch 4 Loss: 0.17480800940283306
Epoch 5 Loss: 0.15734366276824588
Epoch 6 Loss: 0.1433637234463169
Epoch 7 Loss: 0.12657954970485585
Epoch 8 Loss: 0.12366277648440777
Epoch 9 Loss: 0.11345174161002594
Epoch 10 Loss: 0.10262063516472147
Epoch 11 Loss: 0.10322570287004523
Epoch 12 Loss: 0.09790190259784919
Epoch 13 Loss: 0.09160199773170576
Epoch 14 Loss: 0.08595214952041667
Epoch 15 Loss: 0.0832177238935504
Epoch 16 Loss: 0.07502691103079856
Epoch 17 Loss: 0.08026540334414702
Epoch 18 Loss: 0.07408943792047112
Epoch 19 Loss: 0.07374967195786264
Epoch 20 Loss: 0.07003339490674274
Epoch 21 Loss: 0.06904640383811927
Epoch 22 Loss: 0.06490929980311731
Epoch 23 Loss: 0.0625697292832957
Epoch 24 Loss: 0.06607975250505337
Epoch 25 Loss: 0.06151986770267728
Epoch 26 Loss: 0.06062778089654536
Epoch 27 Loss: 0.06164601370692373
Epoch 28 Loss: 0.06005840897024825


In [47]:
preds = []
for x in data:
    p = forward(x[None,:])[-1]
    preds.append(np.argmax(p,axis=1))
preds = np.concatenate(preds)
acc = (preds == target).astype(np.float).mean()
print(f"Training Accuracy: {acc}")

Training Accuracy: 0.98
