In [87]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [88]:
from sklearn import datasets
iris=datasets.load_iris()
X=iris.data
y=iris.target

In [89]:
num_classes = len(np.unique(y))
y = np.eye(num_classes)[y]

In [90]:
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
print("x_train shape:", x_train.shape)

x_train shape: (120, 4)


In [91]:
x_train= np.array(x_train)
y_train= np.array(y_train)  

In [92]:
y_test= np.array(y_val)
x_test= np.array(x_val)

In [93]:
input_size = x_train.shape[1]
hidden1=32
hidden2=16
output_size=3

In [94]:
#initialize weights and biases

In [95]:
#input-->hidden1-->hidden2-->output
W1=np.random.randn(input_size, hidden1)*0.01
b1=np.zeros((1, hidden1))

W2=np.random.randn(hidden1, hidden2)*0.01
b2=np.zeros((1, hidden2))

W3=np.random.randn(hidden2, output_size)*0.01
b3=np.zeros((1, output_size))

relu
relu_derivative
softmax
compute_loss_and_acc
forward
backward
update_params
predict

In [96]:
# ReLU Activation
def relu(Z):
    return np.maximum(0, Z)

In [97]:
def relu_derivative(Z):
    return (Z > 0).astype(float)

In [98]:
def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

In [99]:
def compute_loss_and_accuracy(y_true, y_pred):
    eps = 1e-12
    y_pred = np.clip(y_pred, eps, 1 - eps)  # prevent log(0)
    m = y_true.shape[0]
    log_likelihood = -np.sum(y_true * np.log(y_pred), axis=1)
    loss = np.mean(log_likelihood)
    accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_true, axis=1))
    return loss, accuracy


In [100]:
def forward(X):
    # X: (m, input_size)
    Z1 = X.dot(W1) + b1            # (m, hidden_size)
    A1 = relu(Z1)

    Z2 = A1.dot(W2) + b2           # (m, hidden_size)
    A2 = relu(Z2)

    Z3 = A2.dot(W3) + b3           # (m, output_size)
    A3 = softmax(Z3)

    cache = (X, Z1, A1, Z2, A2, Z3, A3)
    return A3, cache

In [101]:
def backward(cache, Y):
    X, Z1, A1, Z2, A2, Z3, A3 = cache
    m = X.shape[0]

    # dL/dZ3
    dZ3 = (A3 - Y) / m                      # (m, output_size)
    dW3 = A2.T.dot(dZ3)                     # (hidden_size, output_size)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    dA2 = dZ3.dot(W3.T)                     # (m, hidden_size)
    dZ2 = dA2 * relu_derivative(Z2)         # (m, hidden_size)
    dW2 = A1.T.dot(dZ2)                     # (hidden_size, hidden_size)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    dA1 = dZ2.dot(W2.T)                     # (m, hidden_size)
    dZ1 = dA1 * relu_derivative(Z1)         # (m, hidden_size)
    dW1 = X.T.dot(dZ1)                      # (input_size, hidden_size)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    grads = (dW1, db1, dW2, db2, dW3, db3)
    return grads

In [102]:
def upgrade_parameters(grads, learning_rate):
    global W1, b1, W2, b2, W3, b3
    dW1, db1, dW2, db2, dW3, db3 = grads
    
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3

In [103]:
def predict(X):
    A3, _ = forward_propagation(X)
    return np.argmax(A3, axis=1)

In [107]:
hidden_size = 32
epochs = 50
lr = 0.05
batch_size = 32
seed = 42
np.random.seed(seed)

In [108]:
n_samples = x_train.shape[0]
num_batches = int(np.ceil(n_samples / batch_size))

for epoch in range(1, epochs + 1):
    # shuffle
    perm = np.random.permutation(n_samples)
    X_shuf = x_train[perm]
    Y_shuf = y_train[perm]

    epoch_loss = 0.0
    epoch_acc = 0.0

    for i in range(num_batches):
        start = i * batch_size
        end = start + batch_size
        X_batch = X_shuf[start:end]
        Y_batch = Y_shuf[start:end]

        # forward
        Y_hat, cache = forward(X_batch)

        # loss & acc
        loss, acc = compute_loss_and_accuracy(Y_batch, Y_hat)
        epoch_loss += loss * X_batch.shape[0]
        epoch_acc += acc * X_batch.shape[0]

        # backward
        grads = backward(cache,Y_batch)

        # update
        upgrade_parameters(grads, lr)

    epoch_loss /= n_samples
    epoch_acc /= n_samples

    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch:3d}/{epochs}  Loss: {epoch_loss:.4f}  Acc: {epoch_acc:.4f}")


Epoch   1/50  Loss: 1.0920  Acc: 0.3417
Epoch   5/50  Loss: 1.0887  Acc: 0.3417
Epoch  10/50  Loss: 1.0808  Acc: 0.3583
Epoch  15/50  Loss: 1.0631  Acc: 0.8083
Epoch  20/50  Loss: 1.0155  Acc: 0.6750
Epoch  25/50  Loss: 0.9052  Acc: 0.6583
Epoch  30/50  Loss: 0.7642  Acc: 0.6583
Epoch  35/50  Loss: 0.6223  Acc: 0.6917
Epoch  40/50  Loss: 0.5024  Acc: 0.7833
Epoch  45/50  Loss: 0.4391  Acc: 0.8750
Epoch  50/50  Loss: 0.3664  Acc: 0.9750


In [110]:
Y_hat_all, _ = forward(x_train)
final_loss, final_acc = compute_loss_and_accuracy(Y_hat_all, y_train)
print(f"\nTraining finished. Final loss: {final_loss:.4f}, final accuracy: {final_acc:.4f}")

def predict(X):
    Y_hat, _ = forward(X)
    return np.argmax(Y_hat, axis=1)


Training finished. Final loss: 7.7337, final accuracy: 0.9167
