In [None]:
import numpy as np
import matplotlib.pyplot as plt

**Gradient Checking**

In [2]:
def gradient_check(model, x, y, epsilon=1e-7):
    activations = model.forward(x)
    grad_w, grad_b = model.backward(activations, y)
    
    # Compute numerical gradients
    numerical_grad_w = []
    for i, w in enumerate(model.weights):
        num_grad = np.zeros_like(w)
        for j in range(w.size):
            w_flat = w.flatten()
            w_flat[j] += epsilon
            w_perturbed = w_flat.reshape(w.shape)
            
            model.weights[i] = w_perturbed
            activations_pert = model.forward(x)
            loss_pert = np.mean((activations_pert[-1] - y) ** 2)
            
            # Reset weight
            w_flat[j] -= 2 * epsilon
            w_perturbed = w_flat.reshape(w.shape)
            model.weights[i] = w_perturbed
            activations_pert = model.forward(X)
            loss_pert2 = np.mean((activations_pert[-1] - y) ** 2)
            
            # Numerical gradient
            num_grad.flat[j] = (loss_pert - loss_pert2) / (2 * epsilon)
            
            # Reset weight
            w_flat[j] += epsilon
            model.weights[i] = w_flat.reshape(w.shape)
        
        numerical_grad_w.append(num_grad)
        
        # Compare
        diff = np.abs(grad_w[i] - num_grad)
        print(f"Layer {i} gradient difference: {np.max(diff):.2e}")
    
    return numerical_grad_w

**Monitoring Training**

In [None]:
def monitor_training(model, X_train, y_train, X_val, y_val, epochs=1000):
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        activations = model.forward(X_train)
        grad_w, grad_b = model.backward(activations, y_train)
        
        # Update weights
        for i in range(len(model.weights)):
            model.weights[i] -= model.learning_rate * grad_w[i]
            model.biases[i] -= model.learning_rate * grad_b[i]
        
        # Compute losses
        train_pred = model.predict(X_train)
        val_pred = model.predict(X_val)
        
        train_loss = np.mean((train_pred - y_train) ** 2)
        val_loss = np.mean((val_pred - y_val) ** 2)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
    
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Training Loss', linewidth=2)
    plt.plot(val_losses, label='Validation Loss', linewidth=2)
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.title('Training Progress', fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    return train_losses, val_losses