<a href="https://colab.research.google.com/github/p20230445-bits/crux-inductions-2025/blob/main/notebooks/Task1_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
!mkdir -p kan mamba_sentiment scripts data

In [21]:
%%writefile kan/iris_loader.py

Overwriting kan/iris_loader.py


Loading and preprocessing of the Iris Dataset

In [22]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [23]:
#load dataset
iris = load_iris()
X=iris.data
y=iris.target

In [24]:
#split into training(80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
#standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Implementing MLP from scratch using NumPy

In [26]:
import numpy as np

In [27]:
class MLP:
    def __init__(self, input_dim, hidden_dim, output_dim, lr=0.01):
        # Initialize weights (random small numbers) and biases (zeros)
        self.W1 = np.random.randn(input_dim, hidden_dim) * 0.01
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.randn(hidden_dim, output_dim) * 0.01
        self.b2 = np.zeros((1, output_dim))
        self.lr = lr  # learning rate
    # Hidden layer activation
    def relu(self, z):
        return np.maximum(0, z)
    # Softmax activation for output
    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    # Forward pass
    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.softmax(self.z2)
        return self.a2
    def compute_loss(self, y_true, y_pred):
        m = y_true.shape[0]  # number of samples
        # avoid log(0)
        log_likelihood = -np.log(y_pred[range(m), y_true])
        loss = np.sum(log_likelihood) / m
        return loss
    #backward pass
    def backward(self, X, y):
        m = X.shape[0]  # batch size

        # One-hot encode labels
        y_onehot = np.zeros_like(self.a2)
        y_onehot[np.arange(m), y] = 1

        # Gradient for output layer
        dz2 = self.a2 - y_onehot             # (batch_size, output_dim)
        dW2 = np.dot(self.a1.T, dz2) / m     # (hidden_dim, output_dim)
        db2 = np.sum(dz2, axis=0, keepdims=True) / m


        # Gradient for hidden layer
        da1 = np.dot(dz2, self.W2.T)         # (batch_size, hidden_dim)
        dz1 = da1 * (self.z1 > 0)            # derivative of ReLU
        dW1 = np.dot(X.T, dz1) / m           # (input_dim, hidden_dim)
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2

    #training loop
    def train(self, X, y, epochs=1000):
        for epoch in range(epochs):
            # Forward
            y_pred = self.forward(X)

            # Compute loss
            loss = self.compute_loss(y, y_pred)

            # Backward
            self.backward(X, y)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    #testing predictions
    def predict(self, X):
        y_pred = self.forward(X)
        return np.argmax(y_pred, axis=1)

In [28]:
# --- Train and Test ---
mlp = MLP(input_dim=4, hidden_dim=10, output_dim=3, lr=0.1)
mlp.train(X_train, y_train, epochs=1000)

y_pred = mlp.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print("Test Accuracy:", accuracy)

Epoch 0, Loss: 1.0985
Epoch 100, Loss: 0.4990
Epoch 200, Loss: 0.2756
Epoch 300, Loss: 0.1798
Epoch 400, Loss: 0.1268
Epoch 500, Loss: 0.0996
Epoch 600, Loss: 0.0847
Epoch 700, Loss: 0.0755
Epoch 800, Loss: 0.0694
Epoch 900, Loss: 0.0650
Test Accuracy: 1.0


In [29]:
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the iris dataset again
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target

kf = KFold(n_splits=5, shuffle=True, random_state=42)

scores = []

for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
    # Split into train/test
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Scale
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Train a fresh MLP
    mlp = MLP(input_dim=4, hidden_dim=10, output_dim=3, lr=0.1)
    mlp.train(X_train, y_train, epochs=1000)

    # Evaluate
    y_pred = mlp.predict(X_test)
    acc = np.mean(y_pred == y_test)
    scores.append(acc)
    print(f"Fold {fold}: Accuracy = {acc:.4f}")

print("\nCross-validation accuracies:", scores)
print("Mean accuracy:", np.mean(scores))


Epoch 0, Loss: 1.0985
Epoch 100, Loss: 0.4911
Epoch 200, Loss: 0.2690
Epoch 300, Loss: 0.1718
Epoch 400, Loss: 0.1204
Epoch 500, Loss: 0.0950
Epoch 600, Loss: 0.0812
Epoch 700, Loss: 0.0728
Epoch 800, Loss: 0.0672
Epoch 900, Loss: 0.0633
Fold 1: Accuracy = 1.0000
Epoch 0, Loss: 1.0989
Epoch 100, Loss: 0.6325
Epoch 200, Loss: 0.3073
Epoch 300, Loss: 0.1850
Epoch 400, Loss: 0.1267
Epoch 500, Loss: 0.0983
Epoch 600, Loss: 0.0824
Epoch 700, Loss: 0.0722
Epoch 800, Loss: 0.0652
Epoch 900, Loss: 0.0600
Fold 2: Accuracy = 0.9667
Epoch 0, Loss: 1.0988
Epoch 100, Loss: 0.5923
Epoch 200, Loss: 0.2940
Epoch 300, Loss: 0.1775
Epoch 400, Loss: 0.1171
Epoch 500, Loss: 0.0879
Epoch 600, Loss: 0.0722
Epoch 700, Loss: 0.0626
Epoch 800, Loss: 0.0562
Epoch 900, Loss: 0.0519
Fold 3: Accuracy = 0.9333
Epoch 0, Loss: 1.0985
Epoch 100, Loss: 0.4986
Epoch 200, Loss: 0.2639
Epoch 300, Loss: 0.1718
Epoch 400, Loss: 0.1189
Epoch 500, Loss: 0.0903
Epoch 600, Loss: 0.0743
Epoch 700, Loss: 0.0643
Epoch 800, Loss: 0