In [58]:
import numpy as np

In [59]:
input_size = 2
hidden_size = 2
output_size = 1

def init(input_size, hidden_size, output_size):
    np.random.seed(42)
    weights = {
        'W1': np.random.randn(hidden_size, input_size) * 0.01,
        'b1': np.zeros((hidden_size, 1)),
        'W2': np.random.randn(output_size, hidden_size) * 0.01,
        'b2': np.zeros((output_size, 1))
    }
    return weights

In [60]:
# weights = init(input_size, hidden_size, output_size)

In [61]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def relu(z):
    return np.maximum(0, z)

In [62]:
def forward_propagation(X, weights):
    W1, b1 = weights["W1"], weights["b1"]
    W2, b2 = weights["W2"], weights["b2"]

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    cache = (Z1, A1, W1, b1, Z2, A2, W2, b2)
    return A2, cache

In [63]:
def compute_loss(Y, A2):
    m = Y.shape[1]
    loss = -1/m * np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2))
    return loss

In [64]:
def backpropagation(X, Y, cache):
    (Z1, A1, W1, b1, Z2, A2, W2, b2) = cache
    m = X.shape[1]

    dZ2 = A2 - Y
    dW2 = 1/m * np.dot(dZ2, A1.T)
    db2 = 1/m * np.sum(dZ2, axis = 1, keepdims = True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * (A1 > 0)
    dW1 = 1/m * np.dot(dZ1, X.T)
    db1 = 1/m * np.sum(dZ1, axis=1, keepdims=True)
    
    gradients = {
        'dW1': dW1,
        'db1': db1,
        'dW2': dW2,
        'db2': db2
    }
    
    return gradients

In [65]:
def update_parameters(weights, gradients, lr):
    weights["W1"] -= lr * gradients["dW1"]
    weights["b1"] -= lr * gradients["db1"]
    weights["W2"] -= lr * gradients["dW2"]
    weights["b2"] -= lr * gradients["db2"]

    return weights

In [66]:
# training

def train(X, Y, input_size, hidden_size, output_size, epochs, lr):
    weights = init(input_size, hidden_size, output_size)

    for i in range(epochs):
        A2, cache = forward_propagation(X, weights)

        loss = compute_loss(Y, A2)

        gradients = backpropagation(X, Y, cache)

        weights = update_parameters(weights, gradients, lr)

        if i % 10 == 0:
            print(f"Epoch {i}, Loss: {loss:.4f}")

    return weights

# Example usage
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])  # Input data (4 examples, 2 features)
Y = np.array([[0, 1, 1, 0]])  # XOR output labels

# Train the network
trained_weights = train(X, Y, input_size, hidden_size, output_size, epochs=1000, lr=0.1)

Epoch 0, Loss: 0.6931
Epoch 10, Loss: 0.6931
Epoch 20, Loss: 0.6931
Epoch 30, Loss: 0.6931
Epoch 40, Loss: 0.6931
Epoch 50, Loss: 0.6931
Epoch 60, Loss: 0.6931
Epoch 70, Loss: 0.6931
Epoch 80, Loss: 0.6931
Epoch 90, Loss: 0.6931
Epoch 100, Loss: 0.6931
Epoch 110, Loss: 0.6931
Epoch 120, Loss: 0.6931
Epoch 130, Loss: 0.6931
Epoch 140, Loss: 0.6931
Epoch 150, Loss: 0.6931
Epoch 160, Loss: 0.6931
Epoch 170, Loss: 0.6931
Epoch 180, Loss: 0.6931
Epoch 190, Loss: 0.6931
Epoch 200, Loss: 0.6931
Epoch 210, Loss: 0.6931
Epoch 220, Loss: 0.6931
Epoch 230, Loss: 0.6931
Epoch 240, Loss: 0.6931
Epoch 250, Loss: 0.6931
Epoch 260, Loss: 0.6931
Epoch 270, Loss: 0.6931
Epoch 280, Loss: 0.6931
Epoch 290, Loss: 0.6931
Epoch 300, Loss: 0.6931
Epoch 310, Loss: 0.6930
Epoch 320, Loss: 0.6930
Epoch 330, Loss: 0.6930
Epoch 340, Loss: 0.6930
Epoch 350, Loss: 0.6930
Epoch 360, Loss: 0.6929
Epoch 370, Loss: 0.6929
Epoch 380, Loss: 0.6929
Epoch 390, Loss: 0.6928
Epoch 400, Loss: 0.6928
Epoch 410, Loss: 0.6927
Epo

In [67]:
def predict(X, weights):
    A2, _ = forward_propagation(X, weights)
    predictions = A2 > 0.5
    return predictions

# Make predictions
predictions = predict(X, trained_weights)
print(f"Predictions: {predictions}")

Predictions: [[ True  True  True False]]
