In [4]:
import numpy as np

# Linear layer
def linear(x, W, b):
    return x @ W.T + b

# ReLU activation
def relu(x):
    return np.maximum(0, x)

# Sigmoid activation for binary output
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Binary cross-entropy loss
def binary_loss(y_pred, y_true):
    eps = 1e-8  # prevent log(0)
    return -np.mean(y_true * np.log(y_pred + eps) + (1 - y_true) * np.log(1 - y_pred + eps))

# Forward pass
def forward(x, params):
    z1 = linear(x, params['W1'], params['b1'])
    a1 = relu(z1)
    z2 = linear(a1, params['W2'], params['b2'])
    y_pred = sigmoid(z2)  # probability of class 1
    return y_pred, a1

# Backward pass (gradient descent)
def backward(x, y, params, a1, y_pred, lr=0.1):
    m = y.shape[0]
    dz2 = y_pred - y.reshape(-1, 1)
    dW2 = dz2.T @ a1 / m
    db2 = np.mean(dz2, axis=0)
    
    da1 = dz2 @ params['W2']
    dz1 = da1 * (a1 > 0)  # ReLU derivative
    dW1 = dz1.T @ x / m
    db1 = np.mean(dz1, axis=0)
    
    # Gradient descent update
    params['W1'] -= lr * dW1
    params['b1'] -= lr * db1
    params['W2'] -= lr * dW2
    params['b2'] -= lr * db2

# Example dataset (XOR-like)
x = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])
y = np.array([0, 1, 1, 0])  # labels

# Initialize parameters
hidden_size = 4
input_size = 2
output_size = 1

params = {
    'W1': np.random.randn(hidden_size, input_size) * 0.01,
    'b1': np.zeros(hidden_size),
    'W2': np.random.randn(output_size, hidden_size) * 0.01,
    'b2': np.zeros(output_size)
}

# Training loop
for epoch in range(1000):
    y_pred, a1 = forward(x, params)
    loss = binary_loss(y_pred, y)
    backward(x, y, params, a1, y_pred, lr=0.5)
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Predictions
y_pred, _ = forward(x, params)
print("Predicted probabilities:\n", y_pred)
print("Predicted classes:\n", (y_pred > 0.5).astype(int))


Epoch 0, Loss: 0.6931
Epoch 100, Loss: 0.6932
Epoch 200, Loss: 0.9480
Epoch 300, Loss: 1.3369
Epoch 400, Loss: 1.5615
Epoch 500, Loss: 1.6917
Epoch 600, Loss: 1.7819
Epoch 700, Loss: 1.7834
Epoch 800, Loss: 1.9027
Epoch 900, Loss: 1.9213
Predicted probabilities:
 [[0.49967872]
 [0.99881337]
 [0.49967872]
 [0.00173151]]
Predicted classes:
 [[0]
 [1]
 [0]
 [0]]
