In [1]:
import numpy as np
import pandas as pd

In [None]:
# -----------------------------
# Manual MLP Components
# -----------------------------

# Activation Functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=1, keepdims=True)

def cross_entropy_loss(y_pred, y_true):
    # y_true must be one-hot encoded
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), y_true.argmax(axis=1)] + 1e-9)
    loss = np.sum(log_likelihood) / m
    return loss


In [None]:

# -----------------------------
# MLP Initialization
# -----------------------------
def init_mlp(input_dim, hidden_dim, output_dim):
    params = {
        "W1": np.random.randn(input_dim, hidden_dim) * 0.01,
        "b1": np.zeros((1, hidden_dim)),
        "W2": np.random.randn(hidden_dim, output_dim) * 0.01,
        "b2": np.zeros((1, output_dim))
    }
    return params

# -----------------------------
# Forward Pass
# -----------------------------
def forward_pass(X, params):
    Z1 = np.dot(X, params["W1"]) + params["b1"]
    A1 = relu(Z1)
    Z2 = np.dot(A1, params["W2"]) + params["b2"]
    A2 = softmax(Z2)
    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache

# -----------------------------
# Backward Pass (Backprop)
# -----------------------------
def backward_pass(X, y_true, params, cache):
    m = X.shape[0]
    dZ2 = cache["A2"] - y_true
    dW2 = np.dot(cache["A1"].T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, params["W2"].T)
    dZ1 = dA1 * relu_derivative(cache["Z1"])
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return grads

# -----------------------------
# Update Parameters
# -----------------------------
def update_params(params, grads, lr):
    params["W1"] -= lr * grads["dW1"]
    params["b1"] -= lr * grads["db1"]
    params["W2"] -= lr * grads["dW2"]
    params["b2"] -= lr * grads["db2"]
    return params

# -----------------------------
# One Training Step (Batch)
# -----------------------------
def train_step(X, y_true, params, lr=0.01):
    y_pred, cache = forward_pass(X, params)
    loss = cross_entropy_loss(y_pred, y_true)
    grads = backward_pass(X, y_true, params, cache)
    params = update_params(params, grads, lr)
    return loss, params

In [None]:

# -----------------------------
# Simulate Training with Dummy Data
# -----------------------------
np.random.seed(42)
n_samples = 10
n_features = 6
n_classes = 3

# Fake dataset (10 samples, 6 features)
X_dummy = np.random.rand(n_samples, n_features)
# Fake labels (one-hot)
y_dummy_labels = np.random.randint(0, n_classes, size=n_samples)
y_dummy = np.eye(n_classes)[y_dummy_labels]

# Initialize MLP
params = init_mlp(input_dim=n_features, hidden_dim=8, output_dim=n_classes)

# Train for 10 epochs
for epoch in range(10):
    loss, params = train_step(X_dummy, y_dummy, params, lr=0.1)
    print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

