In [3]:
import matplotlib.pyplot as plt
import numpy as np

## 1. Prepare the XOR Dataset 

In [None]:

# XOR inputs and outputs
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])
Y = np.array([[0], [1], [1], [0]])


## 2. Activation Functions and Derivatives and MSE

In [None]:


def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def d_sigmoid(x):
    s = sigmoid(x)
    return s * (1 - s)

def relu(x):
    return np.maximum(0, x)

def d_relu(x):
    return (x > 0).astype(float)

def mse(ws, model, X, Y):
    N = len(X)
    err = 0.0
    for i in range(N):
        xi = X[i]
        yi = model(ws, xi)
        err += (Y[i] - yi) ** 2
    return err / N

def grad_desc_mse(K, ws, learning_eps, loss_fn, grad_loss_fn, verbose=False):
    """
    Gradient descent for minimizing MSE loss.

    Args:
        K (int): Number of iterations.
        ws (np.ndarray): Initial weight vector.
        learning_eps (float): Learning rate.
        loss_fn (callable): loss_fn(ws) → scalar loss.
        grad_loss_fn (callable): grad_loss_fn(ws) → gradient array same shape as ws.
        verbose (bool): If True, plots parameter updates in 2D.

    Returns:
        ws (np.ndarray): Final weights.
        history (list of float): Loss value at each iteration (length K+1).
    """
   
    history = [loss_fn(ws)]
    for k in range(K):
        grad_ws = grad_loss_fn(ws)
        old_ws = ws.copy()
        ws = old_ws - learning_eps * grad_ws

        if verbose:
            # Draw a line from old_ws to new ws (assumes ws has at least 2 elements)
            plt.plot(
                [old_ws[0], ws[0]],
                [old_ws[1], ws[1]],
                '-k'
            )

        history.append(loss_fn(ws))
    return ws, history


def accuracy(X, Y, f):
    """
    Compute accuracy of function f on XOR inputs X and labels Y.
    Prints mismatches during evaluation.

    Args:
        X (np.ndarray): shape (N, 2), input data
        Y (np.ndarray): shape (N,), true labels (0 or 1)
        f (callable): function f(x1, x2) → 0 or 1

    Returns:
        float: accuracy as a fraction between 0 and 1
    """
    correct = 0
    for i in range(len(X)):
        x1, x2 = X[i]
        y_pred = f(x1, x2)
        if y_pred == Y[i]:
            correct += 1
        else:
            print(f"f({x1},{x2}) = {y_pred}, but should be {Y[i]}")
    return correct / len(X)


## Simple linear Neural Network model

In [None]:
def forward(ws, x):
    # Unpack weights
    w1, b1, w2, b2 = ws

    # Forward pass
    z1 = np.dot(x, w1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, w2) + b2
    a2 = sigmoid(z2)

    return a2
def backward(ws, x, y):
    # Unpack weights
    w1, b1, w2, b2 = ws

    # Forward pass
    z1 = np.dot(x, w1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, w2) + b2
    a2 = sigmoid(z2)

    # Backward pass
    d_a2 = 2 * (a2 - y) / len(y)
    d_z2 = d_a2 * d_sigmoid(z2)
    d_w2 = np.dot(a1.T, d_z2)
    d_b2 = np.sum(d_z2, axis=0)

    d_a1 = np.dot(d_z2, w2.T)
    d_z1 = d_a1 * d_relu(z1)
    d_w1 = np.dot(x.T, d_z1)
    d_b1 = np.sum(d_z1, axis=0)

    return [d_w1, d_b1, d_w2, d_b2]
def update_weights(ws, grads, lr):
    # Unpack weights
    w1, b1, w2, b2 = ws
    d_w1, d_b1, d_w2, d_b2 = grads

    # Update weights
    w1 -= lr * d_w1
    b1 -= lr * d_b1
    w2 -= lr * d_w2
    b2 -= lr * d_b2

    return [w1, b1, w2, b2]
def train(X, Y, epochs=10000, lr=0.01):
    # Initialize weights
    w1 = np.random.rand(2, 2)
    b1 = np.random.rand(2)
    w2 = np.random.rand(2, 1)
    b2 = np.random.rand(1)

    ws = [w1, b1, w2, b2]

    for epoch in range(epochs):
        # Forward pass
        Y_pred = forward(ws, X)

        # Compute loss
        loss = mse(ws, forward, X, Y)

        # Backward pass
        grads = backward(ws, X, Y)

        # Update weights
        ws = update_weights(ws, grads, lr)

        if epoch % 1000 == 0:
            print(f'Epoch {epoch}, Loss: {loss}')

    return ws
def predict(ws, x):
    # Unpack weights
    w1, b1, w2, b2 = ws

    # Forward pass
    z1 = np.dot(x, w1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, w2) + b2
    a2 = sigmoid(z2)

    return a2
if __name__ == "__main__":
    # Train the model
    ws = train(X, Y, epochs=10000, lr=0.01)

    # Test the model
    for i in range(len(X)):
        x = X[i]
        y = Y[i]
        y_pred = predict(ws, x)
        print(f'Input: {x}, Predicted: {y_pred}, Actual: {y}')
    # Test the model with new data
    new_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    for x in new_data:
        y_pred = predict(ws, x)
        print(f'Input: {x}, Predicted: {y_pred}')
   