Rawan Reda

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler


In [None]:
iris = load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)

In [None]:
encoder = OneHotEncoder()
y_onehot = encoder.fit_transform(y)

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

In [None]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return (z > 0).astype(float)

def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    # Convert sparse matrix to dense array for element-wise multiplication
    y_true_dense = y_true.toarray() if hasattr(y_true, 'toarray') else y_true
    return -np.sum(y_true_dense * np.log(y_pred + 1e-9)) / m

def accuracy(y_true, y_pred):
    # Convert sparse matrix to dense array for comparison
    y_true_dense = y_true.toarray() if hasattr(y_true, 'toarray') else y_true
    return np.mean(np.argmax(y_true_dense, axis=1) == np.argmax(y_pred, axis=1))

In [None]:
np.random.seed(42)
input_size = X_train.shape[1]   # 4 features
hidden_size = 8
output_size = y_train.shape[1]  # 3 classes

W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.01
b2 = np.zeros((1, output_size))


In [None]:
for epoch in range(epochs):
    # ---- Forward pass ----
    z1 = np.dot(X_train, W1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)

    # ---- Compute loss ----
    loss = cross_entropy_loss(y_train, a2)

    # ---- Backpropagation ----
    m = y_train.shape[0]
    dz2 = a2 - y_train
    dW2 = np.dot(a1.T, dz2) / m
    db2 = np.sum(np.asarray(dz2), axis=0, keepdims=True) / m

    da1 = np.dot(dz2, W2.T)
    dz1 = np.asarray(da1) * np.asarray(relu_derivative(z1))
    dW1 = np.dot(X_train.T, dz1) / m
    db1 = np.sum(np.asarray(dz1), axis=0, keepdims=True) / m

    # ---- Update weights ----
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1

    # ---- Print progress ----
    if (epoch + 1) % 50 == 0:
        train_acc = accuracy(y_train, a2)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}, Accuracy: {train_acc:.4f}")

In [None]:
z1_test = np.dot(X_test, W1) + b1
a1_test = relu(z1_test)
z2_test = np.dot(a1_test, W2) + b2
a2_test = softmax(z2_test)

test_acc = accuracy(y_test, a2_test)
test_acc