# Práctica 0: Implementación de Perceptrón Multicapa desde Cero

Implementación de un perceptrón multicapa (MLP) sin usar frameworks de deep learning, solo NumPy.

**Objetivos:**
- Implementar forward pass y backpropagation manualmente
- Probar con XOR (problema no linealmente separable)
- Aplicar a problema de regresión

In [None]:
# Configuración inicial
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

np.random.seed(42)
plt.style.use('seaborn-v0_8')


In [None]:
# Implementación del Perceptrón Multicapa
class MultiLayerPerceptron:
    def __init__(self, input_size, hidden_size, output_size, 
                 learning_rate=0.01, epochs=5000, activation='relu', task='classification'):
        # Inicialización Xavier
        xavier_std = np.sqrt(2.0 / (input_size + hidden_size))
        self.W1 = np.random.normal(0, xavier_std, (input_size, hidden_size))
        self.b1 = np.zeros((1, hidden_size))
        
        xavier_std2 = np.sqrt(2.0 / (hidden_size + output_size))
        self.W2 = np.random.normal(0, xavier_std2, (hidden_size, output_size))
        self.b2 = np.zeros((1, output_size))
        
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.activation = activation
        self.task = task
        
        self.train_errors = []
        self.validation_errors = []
    
    def _activation_function(self, x):
        if self.activation == 'relu':
            return np.maximum(0, x)
        elif self.activation == 'sigmoid':
            return 1 / (1 + np.exp(-np.clip(x, -250, 250)))
        elif self.activation == 'tanh':
            return np.tanh(x)
    
    def _activation_derivative(self, x):
        if self.activation == 'relu':
            return (x > 0).astype(float)
        elif self.activation == 'sigmoid':
            return x * (1 - x)
        elif self.activation == 'tanh':
            return 1 - x ** 2
    
    def _output_function(self, x):
        if self.task == 'classification':
            return 1 / (1 + np.exp(-np.clip(x, -250, 250)))
        else:
            return x
    
    def _output_derivative(self, x):
        if self.task == 'classification':
            return x * (1 - x)
        else:
            return np.ones_like(x)
    
    def forward(self, X):
        # Capa oculta
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self._activation_function(self.z1)
        
        # Capa de salida
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.output = self._output_function(self.z2)
        
        return self.output
    
    def backward(self, X, y):
        m = X.shape[0]
        # Error capa de salida
        error = y - self.output
        d_output = error * self._output_derivative(self.output)
        # Error capa oculta
        error_hidden = d_output.dot(self.W2.T)
        d_hidden = error_hidden * self._activation_derivative(self.a1)
        # Actualizar pesos
        self.W2 += (self.a1.T.dot(d_output) / m) * self.learning_rate
        self.b2 += np.mean(d_output, axis=0, keepdims=True) * self.learning_rate
        self.W1 += (X.T.dot(d_hidden) / m) * self.learning_rate
        self.b1 += np.mean(d_hidden, axis=0, keepdims=True) * self.learning_rate
    
    def fit(self, X, y, X_val=None, y_val=None, verbose=True):
        start_time = time.time()
        for epoch in range(self.epochs):
            self.forward(X)
            self.backward(X, y)
            mse_train = np.mean((y - self.output) ** 2)
            self.train_errors.append(mse_train)
            if X_val is not None and y_val is not None:
                val_pred = self.forward(X_val)
                mse_val = np.mean((y_val - val_pred) ** 2)
                self.validation_errors.append(mse_val)
        return time.time() - start_time
    def predict(self, X):
        output = self.forward(X)
        if self.task == 'classification':
            return (output > 0.5).astype(int)
        else:
            return output


In [None]:
# Problema XOR - verificación del MLP
# Datos XOR
X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_xor = np.array([[0], [1], [1], [0]])
# Configs a probar
configs_xor = [
    {'hidden_size': 2, 'learning_rate': 0.1, 'epochs': 1000, 'activation': 'sigmoid'},
    {'hidden_size': 4, 'learning_rate': 0.1, 'epochs': 1000, 'activation': 'sigmoid'},
    {'hidden_size': 8, 'learning_rate': 0.05, 'epochs': 2000, 'activation': 'relu'},
    {'hidden_size': 10, 'learning_rate': 0.01, 'epochs': 5000, 'activation': 'tanh'}
]
mejor_accuracy = 0
mejor_modelo_xor = None
for config in configs_xor:
    mlp = MultiLayerPerceptron(input_size=2, output_size=1, task='classification', **config)
    mlp.fit(X_xor, y_xor, verbose=False)
    pred = mlp.predict(X_xor)
    acc = np.mean(y_xor == pred)
    if acc > mejor_accuracy:
        mejor_accuracy = acc
        mejor_modelo_xor = mlp


In [None]:
# Visualización frontera XOR y curva de aprendizaje
def plot_decision_boundary(model, X, y, title="Frontera de Decisión"):
    h = 0.01
    x_min, x_max = -0.1, 1.1
    y_min, y_max = -0.1, 1.1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    mesh_points = np.c_[xx.ravel(), yy.ravel()]
    Z = model.forward(mesh_points)
    Z = Z.reshape(xx.shape)
    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, levels=50, alpha=0.8, cmap='RdBu')
    plt.colorbar(label='Salida')
    colors = ['red' if label == 0 else 'blue' for label in y.flatten()]
    plt.scatter(X[:, 0], X[:, 1], c=colors, s=200, edgecolors='black', linewidth=2)
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.title(title)
    plt.grid(True, alpha=0.3)
    plt.show()
plt.figure(figsize=(10, 4))
plt.plot(mejor_modelo_xor.train_errors, linewidth=2)
plt.xlabel('Época')
plt.ylabel('MSE')
plt.title('Curva de Aprendizaje - XOR')
plt.yscale('log')
plt.grid(True, alpha=0.3)
plt.show()
plot_decision_boundary(mejor_modelo_xor, X_xor, y_xor, "Frontera de Decisión XOR")

In [None]:
# Problema de Regresión
try:
    data_train = pd.read_parquet('data_train.parquet')
    data_test = pd.read_parquet('data_test.parquet')
except:
    X_range = np.linspace(-5, 5, 1000)
    y_function = X_range ** 3 * 0.001 + np.sin(X_range * 2) * 0.5 + np.random.normal(0, 0.1, 1000)
    split = int(0.8 * len(X_range))
    data_train = pd.DataFrame({'X': X_range[:split], 'Y': y_function[:split]})
    data_test = pd.DataFrame({'X': X_range[split:], 'Y': y_function[split:]})
X_train = data_train[['X']].values
y_train = data_train[['Y']].values
X_test = data_test[['X']].values
y_test = data_test[['Y']].values
X_mean, X_std = X_train.mean(), X_train.std()
y_mean, y_std = y_train.mean(), y_train.std()
X_train_norm = (X_train - X_mean) / X_std
X_test_norm = (X_test - X_mean) / X_std
y_train_norm = (y_train - y_mean) / y_std
y_test_norm = (y_test - y_mean) / y_std


In [None]:
# Hiperparámetros regresión
configs_regression = [
    {'hidden_size': 8, 'learning_rate': 0.01, 'epochs': 3000, 'activation': 'relu'},
    {'hidden_size': 16, 'learning_rate': 0.005, 'epochs': 4000, 'activation': 'relu'},
    {'hidden_size': 32, 'learning_rate': 0.003, 'epochs': 5000, 'activation': 'relu'},
    {'hidden_size': 16, 'learning_rate': 0.01, 'epochs': 3000, 'activation': 'tanh'},
    {'hidden_size': 24, 'learning_rate': 0.008, 'epochs': 4000, 'activation': 'tanh'},
    {'hidden_size': 12, 'learning_rate': 0.02, 'epochs': 2500, 'activation': 'sigmoid'}
]
mejor_r2 = -float('inf')
mejor_modelo_reg = None
for config in configs_regression:
    mlp = MultiLayerPerceptron(input_size=1, output_size=1, task='regression', **config)
    mlp.fit(X_train_norm, y_train_norm, verbose=False)
    y_pred_norm = mlp.predict(X_test_norm)
    y_pred = y_pred_norm * y_std + y_mean
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    if r2 > mejor_r2:
        mejor_r2 = r2
        mejor_modelo_reg = mlp


In [None]:
# Visualización regresión
plt.figure(figsize=(10, 4))
plt.plot(mejor_modelo_reg.train_errors, linewidth=2)
plt.xlabel('Época')
plt.ylabel('MSE')
plt.title('Curva de Aprendizaje - Regresión')
plt.yscale('log')
plt.grid(True, alpha=0.3)
plt.show()
plt.figure(figsize=(12, 6))
plt.plot(X_train, y_train, '.', label='Train')
plt.plot(X_test, y_test, '.', label='Test')
plt.plot(X_test, mejor_modelo_reg.predict(X_test_norm) * y_std + y_mean, linewidth=2, label='Predicción MLP')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Regresión: Predicción del MLP')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
