In [15]:
import numpy as np

np.random.seed(42)

# 1. Synthetic Dataset
def generate_data(n_samples=200):
    X = np.random.randn(n_samples, 10)
    true_weights = np.random.randn(10, 1)
    y = X @ true_weights + np.random.randn(n_samples, 1) * 0.1
    return X, y

# 2. BatchNorm Layer
class BatchNorm:
    def __init__(self, dim, eps=1e-5):
        self.gamma = np.ones((1, dim))
        self.beta = np.zeros((1, dim))
        self.eps = eps

    def forward(self, X):
        self.mean = np.mean(X, axis=0, keepdims=True)
        self.variance = np.var(X, axis=0, keepdims=True)
        self.X_norm = (X - self.mean) / np.sqrt(self.variance + self.eps)
        return self.gamma * self.X_norm + self.beta

# 3. Neural Network
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dim, use_bn=True, dropout_rate=0.2, lambda_l1=0.0, lambda_l2=0.0):
        self.W1 = np.random.randn(input_dim, hidden_dim) * 0.1
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.randn(hidden_dim, 1) * 0.1
        self.b2 = np.zeros((1, 1))

        self.use_bn = use_bn
        self.dropout_rate = dropout_rate
        self.lambda_l1 = lambda_l1
        self.lambda_l2 = lambda_l2

        if self.use_bn:
            self.bn1 = BatchNorm(hidden_dim)

    def relu(self, X):
        return np.maximum(0, X)

    def dropout(self, X):
        self.dropout_mask = (np.random.rand(*X.shape) > self.dropout_rate).astype(float)
        return X * self.dropout_mask / (1.0 - self.dropout_rate)

    def forward(self, X, training=True):
        z1 = X @ self.W1 + self.b1
        if self.use_bn:
            z1 = self.bn1.forward(z1)
        a1 = self.relu(z1)
        if training:
            a1 = self.dropout(a1)
        self.a1 = a1
        self.output = a1 @ self.W2 + self.b2
        return self.output

    def compute_loss(self, y_true, y_pred):
        mse = np.mean((y_true - y_pred) ** 2)
        l1 = self.lambda_l1 * (np.sum(np.abs(self.W1)) + np.sum(np.abs(self.W2)))
        l2 = self.lambda_l2 * (np.sum(self.W1 ** 2) + np.sum(self.W2 ** 2))
        return mse + l1 + l2

    def backward(self, X, y, lr=0.01):
        m = y.shape[0]
        d_output = 2 * (self.output - y) / m

        dW2 = self.a1.T @ d_output + self.lambda_l1 * np.sign(self.W2) + 2 * self.lambda_l2 * self.W2
        db2 = np.sum(d_output, axis=0, keepdims=True)

        da1 = d_output @ self.W2.T
        da1[self.a1 <= 0] = 0  # ReLU gradient
        da1 *= self.dropout_mask / (1.0 - self.dropout_rate)  # Dropout gradient

        dW1 = X.T @ da1 + self.lambda_l1 * np.sign(self.W1) + 2 * self.lambda_l2 * self.W1
        db1 = np.sum(da1, axis=0, keepdims=True)

        # Update
        self.W1 -= lr * dW1
        self.b1 -= lr * db1
        self.W2 -= lr * dW2
        self.b2 -= lr * db2

# 4. Early Stopping
def early_stopping(val_losses, patience):
    if len(val_losses) < patience:
        return False
    best_loss = min(val_losses[:-patience+1])
    return all(loss >= best_loss for loss in val_losses[-patience:])

# 5. Training Loop
def train(X_train, y_train, X_val, y_val, epochs=1000, patience=10, **nn_kwargs):
    model = NeuralNetwork(input_dim=X_train.shape[1], hidden_dim=32, **nn_kwargs)
    val_losses = []
    best_weights = None
    best_loss = float('inf')

    for epoch in range(epochs):
        y_pred = model.forward(X_train, training=True)
        loss = model.compute_loss(y_train, y_pred)
        model.backward(X_train, y_train, lr=0.01)

        # Validation
        y_val_pred = model.forward(X_val, training=False)
        val_loss = model.compute_loss(y_val, y_val_pred)
        val_losses.append(val_loss)

        print(f"Epoch {epoch}, Train Loss: {loss:.4f}, Val Loss: {val_loss:.4f}")

        # Save best model
        if val_loss < best_loss:
            best_loss = val_loss
            best_weights = (model.W1.copy(), model.b1.copy(), model.W2.copy(), model.b2.copy())

        # Early stopping
        if early_stopping(val_losses, patience):
            print("Early stopping triggered.")
            break

    # Load best weights
    model.W1, model.b1, model.W2, model.b2 = best_weights
    return model

# 6. Run Everything
X, y = generate_data()
X_train, X_val = X[:150], X[150:]
y_train, y_val = y[:150], y[150:]

model = train(
    X_train, y_train, X_val, y_val,
    lambda_l1=0.001,
    lambda_l2=0.001,
    dropout_rate=0.2,
    use_bn=True,
    patience=10
)

# Test model
y_test_pred = model.forward(X_val, training=False)
print("Final Validation Loss:", model.compute_loss(y_val, y_test_pred))


Epoch 0, Train Loss: 6.7069, Val Loss: 5.0102
Epoch 1, Train Loss: 6.5053, Val Loss: 4.7570
Epoch 2, Train Loss: 6.1274, Val Loss: 4.5262
Epoch 3, Train Loss: 5.8693, Val Loss: 4.3067
Epoch 4, Train Loss: 5.5021, Val Loss: 4.0971
Epoch 5, Train Loss: 5.3790, Val Loss: 3.9013
Epoch 6, Train Loss: 4.9178, Val Loss: 3.7222
Epoch 7, Train Loss: 4.8988, Val Loss: 3.5420
Epoch 8, Train Loss: 4.4178, Val Loss: 3.3831
Epoch 9, Train Loss: 4.2595, Val Loss: 3.2180
Epoch 10, Train Loss: 4.1767, Val Loss: 3.0583
Epoch 11, Train Loss: 4.0214, Val Loss: 2.9098
Epoch 12, Train Loss: 3.7574, Val Loss: 2.7732
Epoch 13, Train Loss: 3.6414, Val Loss: 2.6385
Epoch 14, Train Loss: 3.4068, Val Loss: 2.5080
Epoch 15, Train Loss: 3.1310, Val Loss: 2.3756
Epoch 16, Train Loss: 3.1123, Val Loss: 2.2555
Epoch 17, Train Loss: 3.0098, Val Loss: 2.1400
Epoch 18, Train Loss: 2.8519, Val Loss: 2.0299
Epoch 19, Train Loss: 2.6951, Val Loss: 1.9272
Epoch 20, Train Loss: 2.4628, Val Loss: 1.8239
Epoch 21, Train Loss: 2