In [5]:
import numpy as np

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_derivative(a):
    # a = sigmoid(z), so derivative is a*(1-a)
    return a * (1.0 - a)



In [9]:
class MLP_XOR:
    def __init__(self, lr=0.1, epochs=10000, seed=42):
        self.lr = lr
        self.epochs = epochs
        rng = np.random.default_rng(seed)


        self.W1 = rng.normal(0, 1, (2, 2)) * 0.5
        self.b1 = np.zeros((1, 2))

        self.W2 = rng.normal(0, 1, (2, 1)) * 0.5
        self.b2 = np.zeros((1, 1))

    def forward(self, X):
        self.z1 = X @ self.W1 + self.b1
        self.a1 = sigmoid(self.z1)

        self.z2 = self.a1 @ self.W2 + self.b2
        self.a2 = sigmoid(self.z2)  # prediction probability
        return self.a2

    def compute_loss(self, y_pred, y_true):
        eps = 1e-9
        y_pred = np.clip(y_pred, eps, 1 - eps)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

    def fit(self, X, y):
        for epoch in range(1, self.epochs + 1):
            y_pred = self.forward(X)
            loss = self.compute_loss(y_pred, y)

            # Backprop (vectorized)
            # For sigmoid + BCE, output gradient simplifies:
            dz2 = (y_pred - y)  # shape (n,1)
            dW2 = (self.a1.T @ dz2) / X.shape[0]
            db2 = np.mean(dz2, axis=0, keepdims=True)

            da1 = dz2 @ self.W2.T
            dz1 = da1 * sigmoid_derivative(self.a1)
            dW1 = (X.T @ dz1) / X.shape[0]
            db1 = np.mean(dz1, axis=0, keepdims=True)

            # Gradient descent update
            self.W2 -= self.lr * dW2
            self.b2 -= self.lr * db2
            self.W1 -= self.lr * dW1
            self.b1 -= self.lr * db1

            # Print occasionally (so it doesn't spam)
            if epoch % 2000 == 0 or epoch == 1:
                print(f"Epoch {epoch:5d} , Loss: {loss:.6f}")

        return self

    def predict(self, X):
        probs = self.forward(X)
        return (probs >= 0.5).astype(int)



In [10]:
if __name__ == "__main__":
    # XOR dataset
    X = np.array([[0, 0],
                  [0, 1],
                  [1, 0],
                  [1, 1]], dtype=float)

    y = np.array([[0],
                  [1],
                  [1],
                  [0]], dtype=float)

    model = MLP_XOR(lr=0.5, epochs=10000, seed=1)
    model.fit(X, y)

    probs = model.forward(X)
    preds = model.predict(X)

    print("\nProbabilities:\n", probs.round(4))
    print("Predictions:\n", preds.reshape(-1))
    print("True labels:\n", y.reshape(-1).astype(int))


Epoch     1 , Loss: 0.708344
Epoch  2000 , Loss: 0.038465
Epoch  4000 , Loss: 0.007765
Epoch  6000 , Loss: 0.004254
Epoch  8000 , Loss: 0.002921
Epoch 10000 , Loss: 0.002221

Probabilities:
 [[0.0026]
 [0.998 ]
 [0.998 ]
 [0.0023]]
Predictions:
 [0 1 1 0]
True labels:
 [0 1 1 0]
