In [None]:
import numpy as np

# ReLU activation
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

# Huber loss
def huber_loss(y_true, y_pred, delta=1.0):
    error = y_true - y_pred
    loss = np.where(np.abs(error) <= delta,
                    0.5 * error**2,
                    delta * (np.abs(error) - 0.5 * delta))
    return np.mean(loss)

# Sample data
X = np.array([[1], [2], [3], [4]], dtype=float)
y = np.array([[2], [4], [6], [8]], dtype=float)

# Initialize weights
np.random.seed(1)
W1 = np.random.randn(1, 3)
b1 = np.zeros((1, 3))
W2 = np.random.randn(3, 1)
b2 = np.zeros((1, 1))

lr = 0.01 # Training loop
for epoch in range(500):
    # Forward pass
    z1 = X @ W1 + b1
    a1 = relu(z1)
    y_pred = a1 @ W2 + b2

    # Loss
    loss = huber_loss(y, y_pred)

    # Backpropagation
    error = y_pred - y
    dW2 = a1.T @ error / len(X)
    db2 = np.mean(error, axis=0, keepdims=True)

    da1 = error @ W2.T
    dz1 = da1 * relu_derivative(z1)
    dW1 = X.T @ dz1 / len(X)
    db1 = np.mean(dz1, axis=0, keepdims=True)

    # Update weights
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

Epoch 0, Loss: 8.8572
Epoch 100, Loss: 0.0138
Epoch 200, Loss: 0.0063
Epoch 300, Loss: 0.0028
Epoch 400, Loss: 0.0013
