# <center>Lab Sheet-5</center>
# <center>Multilayer Perceptron & XOR Problem</center>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
np.random.seed(42)
os.makedirs("lab5_plots", exist_ok=True)

# Print the name and roll number
print("Name: Somesh Singh")
print("Roll Number: 233025921")

Name: Somesh Singh
Roll Number: 233025921


In [2]:
# Utility functions
def sigmoid(x): return 1 / (1 + np.exp(-x))
def dsigmoid(x): return sigmoid(x) * (1 - sigmoid(x))

def relu(x): return np.maximum(0, x)
def drelu(x): return (x > 0).astype(float)

def tanh(x): return np.tanh(x)
def dtanh(x): return 1 - np.tanh(x)**2

# Print the name and roll number
print("Name: Somesh Singh")
print("Roll Number: 233025921")

# Dataset: XOR truth table
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])   # target

Name: Somesh Singh
Roll Number: 233025921


**1. Build a Multilayer Perceptron (MLP) with one hidden layer in NumPy.** 

**2. Train the MLP to solve the XOR problem.** 

In [3]:
class MLP:
    def __init__(self, n_in, n_hidden, n_out, activation="sigmoid", lr=0.1):
        self.n_in, self.n_hidden, self.n_out = n_in, n_hidden, n_out
        self.lr = lr
        # weight init small random
        self.W1 = np.random.randn(n_in, n_hidden) * 0.5
        self.b1 = np.zeros((1, n_hidden))
        self.W2 = np.random.randn(n_hidden, n_out) * 0.5
        self.b2 = np.zeros((1, n_out))

        if activation == "sigmoid":
            self.act, self.dact = sigmoid, dsigmoid
        elif activation == "relu":
            self.act, self.dact = relu, drelu
        elif activation == "tanh":
            self.act, self.dact = tanh, dtanh
        else:
            raise ValueError("unknown activation")

    def forward(self, X):
        z1 = X @ self.W1 + self.b1
        a1 = self.act(z1)
        z2 = a1 @ self.W2 + self.b2
        a2 = sigmoid(z2)   # output always sigmoid for binary classification
        return z1,a1,z2,a2

    def train(self, X, y, epochs=5000, log_interval=500):
        loss_hist = []
        log_data = {"W1":[], "W2":[], "a1":[], "out":[]}

        for ep in range(epochs):
            z1,a1,z2,a2 = self.forward(X)
            # binary cross-entropy loss
            loss = -np.mean(y*np.log(a2+1e-9) + (1-y)*np.log(1-a2+1e-9))
            loss_hist.append(loss)

            # backprop
            dz2 = a2 - y                    # derivative of BCE wrt z2
            dW2 = a1.T @ dz2 / len(X)
            db2 = np.mean(dz2,axis=0,keepdims=True)

            da1 = dz2 @ self.W2.T
            dz1 = da1 * self.dact(z1)
            dW1 = X.T @ dz1 / len(X)
            db1 = np.mean(dz1,axis=0,keepdims=True)

            # update
            self.W2 -= self.lr * dW2; self.b2 -= self.lr * db2
            self.W1 -= self.lr * dW1; self.b1 -= self.lr * db1

            # logging
            if ep % log_interval == 0:
                print(f"[{ep}] loss={loss:.4f}")
                log_data["W1"].append(self.W1.copy())
                log_data["W2"].append(self.W2.copy())
                log_data["a1"].append(a1.copy())
                log_data["out"].append(a2.copy())

        return loss_hist, log_data

    def predict(self, X, decision_rule="round"):
        _,_,_,a2 = self.forward(X)
        if decision_rule=="round":   # threshold 0.5
            return (a2>=0.5).astype(int)
        elif decision_rule=="max":   # multi-class style: argmax
            return (a2 == np.max(a2,axis=1,keepdims=True)).astype(int)
        else:
            return a2

# Print the name and roll number
print("Name: Somesh Singh")
print("Roll Number: 233025921")

# Train with Sigmoid activation (default)
mlp = MLP(2, 2, 1, activation="sigmoid", lr=0.5)
loss_hist, logs = mlp.train(X,y,epochs=5000,log_interval=1000)

# plot loss
plt.figure()
plt.plot(loss_hist)
plt.title("MLP on XOR (Sigmoid hidden)")
plt.xlabel("epoch"); plt.ylabel("Loss (BCE)")
plt.savefig("lab5_plots/loss_sigmoid.png",dpi=150,bbox_inches="tight"); plt.close()

preds = mlp.predict(X)
print("Final predictions (sigmoid hidden):")
print(preds.flatten())

Name: Somesh Singh
Roll Number: 233025921
[0] loss=0.6955
[1000] loss=0.6928
[2000] loss=0.4331
[3000] loss=0.3586
[4000] loss=0.3526
Final predictions (sigmoid hidden):
[0 0 1 1]


**3. Add logging for weights, activations, and errors over epochs.** 

In [4]:
# Print the name and roll number
print("Name: Somesh Singh")
print("Roll Number: 233025921")

print("Logged sample W1 at 0, mid, final epoch:")
print("W1[0]:\n", logs["W1"][0])
print("W1[last]:\n", logs["W1"][-1])

Name: Somesh Singh
Roll Number: 233025921
Logged sample W1 at 0, mid, final epoch:
W1[0]:
 [[ 0.24800434 -0.06956843]
 [ 0.32348934  0.76131248]]
W1[last]:
 [[ 4.0567694  -4.50912523]
 [ 7.65546018  7.92441388]]


**4. Compare model accuracy with different activation functions (ReLU, Sigmoid).** 

In [5]:
acts = ["sigmoid","relu","tanh"]
losses = {}
final_preds = {}

for act in acts:
    mlp = MLP(2, 2, 1, activation=act, lr=0.5)
    loss_hist,_ = mlp.train(X,y,epochs=4000,log_interval=2000)
    losses[act] = loss_hist
    final_preds[act] = mlp.predict(X)

plt.figure()
for act in acts:
    plt.plot(losses[act],label=act)
plt.title("Activation comparison (loss curves)")
plt.xlabel("epoch"); plt.ylabel("loss")
plt.legend()
plt.savefig("lab5_plots/activation_compare.png",dpi=150,bbox_inches="tight"); plt.close()

# Print the name and roll number
print("Name: Somesh Singh")
print("Roll Number: 233025921")

print("Final XOR predictions by activation:")
for act in acts:
    print(act, "->", final_preds[act].flatten())

[0] loss=0.7021
[2000] loss=0.3716
[0] loss=0.7009
[2000] loss=0.6931
[0] loss=0.7025
[2000] loss=0.3481
Name: Somesh Singh
Roll Number: 233025921
Final XOR predictions by activation:
sigmoid -> [0 1 0 1]
relu -> [1 1 1 1]
tanh -> [0 0 1 1]


**5. Implement a custom decision rule for multi-class output handling.**

In [6]:
def softmax(x): return np.exp(x)/np.sum(np.exp(x),axis=1,keepdims=True)

class Simple3Class:
    def __init__(self):
        self.W = np.array([[1,-1,0.5],
                           [0.5,1,-1]])
    def forward(self,X):
        return softmax(X @ self.W)

    def predict(self,X,rule="argmax"):
        probs = self.forward(X)
        if rule=="argmax":
            return np.argmax(probs,axis=1)
        elif rule=="threshold":
            return (probs>0.4).astype(int) # multi-label
        else:
            return probs

# Print the name and roll number
print("Name: Somesh Singh")
print("Roll Number: 233025921")

# sample
X3 = np.array([[1,0],[0,1],[1,1]])
model3 = Simple3Class()
print("decision rule argmax:", model3.predict(X3,"argmax"))
print("decision rule threshold:", model3.predict(X3,"threshold"))

Name: Somesh Singh
Roll Number: 233025921
decision rule argmax: [0 1 0]
decision rule threshold: [[1 0 0]
 [0 1 0]
 [1 0 0]]
