In [47]:
from sklearn.datasets import load_iris
import numpy as np

iris=load_iris()
X=iris.data
y=iris.target


def one_hot(y, num_classes):
    return np.eye(num_classes)[y]

y_oh = one_hot(y, 3)




In [48]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(
    X, y_oh, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [49]:
def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return (z > 0).astype(float)

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def cross_entropy(y_true, y_pred):
    eps = 1e-9
    return -np.mean(np.sum(y_true * np.log(y_pred + eps), axis=1))

In [50]:
np.random.seed(42)

input_dim = 4
hidden1 = 16
hidden2 = 8
output_dim = 3

W1 = np.random.randn(input_dim, hidden1) * 0.1
b1 = np.zeros((1, hidden1))

W2 = np.random.randn(hidden1, hidden2) * 0.1
b2 = np.zeros((1, hidden2))

W3 = np.random.randn(hidden2, output_dim) * 0.1
b3 = np.zeros((1, output_dim))

W1

array([[ 0.04967142, -0.01382643,  0.06476885,  0.15230299, -0.02341534,
        -0.0234137 ,  0.15792128,  0.07674347, -0.04694744,  0.054256  ,
        -0.04634177, -0.04657298,  0.02419623, -0.19132802, -0.17249178,
        -0.05622875],
       [-0.10128311,  0.03142473, -0.09080241, -0.14123037,  0.14656488,
        -0.02257763,  0.00675282, -0.14247482, -0.05443827,  0.01109226,
        -0.11509936,  0.0375698 , -0.06006387, -0.02916937, -0.06017066,
         0.18522782],
       [-0.00134972, -0.10577109,  0.08225449, -0.12208436,  0.02088636,
        -0.19596701, -0.1328186 ,  0.01968612,  0.07384666,  0.01713683,
        -0.01156483, -0.03011037, -0.1478522 , -0.07198442, -0.04606388,
         0.10571222],
       [ 0.03436183, -0.17630402,  0.0324084 , -0.03850823, -0.0676922 ,
         0.06116763,  0.10309995,  0.09312801, -0.08392175, -0.03092124,
         0.03312634,  0.09755451, -0.04791742, -0.0185659 , -0.1106335 ,
        -0.11962066]])

In [None]:

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

plt.ion()

learning_rate = 0.01
epochs = 300
plot_every = 5

train_loss, val_loss = [], []
train_acc, val_acc = [], []

for epoch in range(1):
    # Forward Pass
    z1 = X_train @ W1 + b1
    a1 = relu(z1)

    z2 = a1 @ W2 + b2
    a2 = relu(z2)

    z3 = a2 @ W3 + b3
    y_pred = softmax(z3)
    
    # Metrics
    loss = cross_entropy(y_train, y_pred)
    acc = accuracy_score(
        np.argmax(y_train, axis=1),
        np.argmax(y_pred, axis=1)
    )

    train_loss.append(loss)
    train_acc.append(acc)
    

    # Backpropagation
    dz3 = y_pred - y_train
    dW3 = a2.T @ dz3 / len(X_train)
    db3 = dz3.mean(axis=0, keepdims=True)

    da2 = dz3 @ W3.T
    dz2 = da2 * relu_derivative(z2)
    dW2 = a1.T @ dz2 / len(X_train)
    db2 = dz2.mean(axis=0, keepdims=True)

    da1 = dz2 @ W2.T
    dz1 = da1 * relu_derivative(z1)
    dW1 = X_train.T @ dz1 / len(X_train)
    db1 = dz1.mean(axis=0, keepdims=True)
    
    # Update Weights
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1

In [52]:
dW3

array([[ 0.00255991, -0.00011185, -0.00244806],
       [-0.0219671 ,  0.01280887,  0.00915823],
       [ 0.00789026,  0.00225529, -0.01014555],
       [-0.01716208,  0.00812048,  0.0090416 ],
       [-0.00080115,  0.00091305, -0.00011191],
       [ 0.00420402, -0.0008654 , -0.00333862],
       [ 0.00046855,  0.00026701, -0.00073556],
       [ 0.00192515, -0.00097366, -0.00095149]])