In [None]:
"""
Notebook para verificar que backpropagation está bien implementado
usando gradient checking numérico
"""
import sys
sys.path.append('../')

import numpy as np
from src.neural_network import NeuralNetwork
from src.utils import one_hot_encode

def numerical_gradient(model, X, y, epsilon=1e-7):
    """
    Calcula gradientes numéricamente para verificar backprop
    """
    params = model.get_params()
    numerical_grads = []
    
    for param in params:
        grad = np.zeros_like(param)
        
        # Iterar sobre cada elemento del parámetro
        it = np.nditer(param, flags=['multi_index'])
        while not it.finished:
            idx = it.multi_index
            old_value = param[idx]
            
            # f(x + epsilon)
            param[idx] = old_value + epsilon
            y_pred_plus = model.forward(X)
            loss_plus = model.compute_loss(y_pred_plus, y)
            
            # f(x - epsilon)
            param[idx] = old_value - epsilon
            y_pred_minus = model.forward(X)
            loss_minus = model.compute_loss(y_pred_minus, y)
            
            # Gradiente numérico: (f(x+eps) - f(x-eps)) / (2*eps)
            grad[idx] = (loss_plus - loss_minus) / (2 * epsilon)
            
            # Restaurar valor
            param[idx] = old_value
            it.iternext()
        
        numerical_grads.append(grad)
    
    return numerical_grads

def gradient_check(model, X, y, epsilon=1e-7, threshold=1e-7):
    """
    Compara gradientes analíticos (backprop) vs numéricos
    """
    # Forward y backward
    y_pred = model.forward(X)
    model.backward(y)
    analytical_grads = model.get_grads()
    
    # Gradientes numéricos
    print("Calculando gradientes numéricos (puede tardar)...")
    numerical_grads = numerical_gradient(model, X, y, epsilon)
    
    # Comparar
    print("\nComparación de gradientes:")
    print("="*60)
    
    param_names = []
    for i, layer in enumerate(model.layers):
        param_names.extend([f"Layer {i} W", f"Layer {i} b"])
    
    all_close = True
    for i, (name, analytical, numerical) in enumerate(
        zip(param_names, analytical_grads, numerical_grads)):
        
        # Diferencia relativa
        numerator = np.linalg.norm(analytical - numerical)
        denominator = np.linalg.norm(analytical) + np.linalg.norm(numerical)
        relative_error = numerator / (denominator + 1e-10)
        
        status = "✓ OK" if relative_error < threshold else "✗ ERROR"
        print(f"{name:20s} - Error relativo: {relative_error:.2e} {status}")
        
        if relative_error >= threshold:
            all_close = False
    
    print("="*60)
    if all_close:
        print("✓ Backpropagation implementado correctamente")
    else:
        print("✗ Hay errores en backpropagation")
    
    return all_close

# Test con XOR
print("TEST 1: XOR")
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = one_hot_encode(np.array([0, 1, 1, 0]), 2)

architecture = [(2, None), (4, 'sigmoid'), (2, 'softmax')]
model = NeuralNetwork(architecture)

gradient_check(model, X, y)

print("\n" + "="*60 + "\n")

# Test con datos aleatorios más complejos
print("TEST 2: Datos aleatorios (3 capas)")
np.random.seed(42)
X = np.random.randn(10, 5)
y = one_hot_encode(np.random.randint(0, 3, 10), 3)

architecture = [(5, None), (8, 'relu'), (6, 'sigmoid'), (3, 'softmax')]
model = NeuralNetwork(architecture)

gradient_check(model, X, y)