# Práctica 0: Implementación de Perceptrón Multicapa desde Cero

Implementación de un perceptrón multicapa (MLP) sin usar frameworks de deep learning, solo NumPy.

Objetivos:
- Implementar forward pass y backpropagation manualmente
- Probar con XOR (problema no linealmente separable)
- Aplicar a problema de regresión


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

np.random.seed(42)
plt.style.use('seaborn-v0_8')
print('OK setup')


In [None]:
class MultiLayerPerceptron:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1, epochs=2000, activation='tanh', task='regression'):
        xavier_std = np.sqrt(2.0 / (input_size + hidden_size))
        self.W1 = np.random.normal(0, xavier_std, (input_size, hidden_size))
        self.b1 = np.zeros((1, hidden_size))
        xavier_std2 = np.sqrt(2.0 / (hidden_size + output_size))
        self.W2 = np.random.normal(0, xavier_std2, (hidden_size, output_size))
        self.b2 = np.zeros((1, output_size))
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.activation = activation
        self.task = task
        self.train_errors = []
    def _act(self,x):
        if self.activation=='tanh': return np.tanh(x)
        if self.activation=='sigmoid': return 1/(1+np.exp(-np.clip(x,-250,250)))
        return np.maximum(0,x)
    def _act_d(self,a):
        if self.activation=='tanh': return 1-a*a
        if self.activation=='sigmoid': return a*(1-a)
        return (a>0).astype(float)
    def forward(self,X):
        self.z1 = X@self.W1 + self.b1
        self.a1 = self._act(self.z1)
        self.z2 = self.a1@self.W2 + self.b2
        self.out = self.z2 if self.task=='regression' else 1/(1+np.exp(-np.clip(self.z2,-250,250)))
        return self.out
    def backward(self,X,y):
        m = X.shape[0]
        err = y - self.out
        d_out = err if self.task=='regression' else err*self.out*(1-self.out)
        d_h = (d_out@self.W2.T)*self._act_d(self.a1)
        self.W2 += (self.a1.T@d_out)/m * self.learning_rate
        self.b2 += d_out.mean(0,keepdims=True)*self.learning_rate
        self.W1 += (X.T@d_h)/m * self.learning_rate
        self.b1 += d_h.mean(0,keepdims=True)*self.learning_rate
    def fit(self,X,y,verbose=False):
        for _ in range(self.epochs):
            self.forward(X); self.backward(X,y); self.train_errors.append(np.mean((y-self.out)**2))
    def predict(self,X):
        return self.forward(X)


In [None]:
# Datos de regresión simples
X = np.linspace(-2,2,600).reshape(-1,1)
y = 0.8*X[:,0]**2 + 0.6*np.sin(2*X[:,0]) + 0.2*X[:,0] + np.random.normal(0,0.15,600)
y = y.reshape(-1,1)
split=int(0.8*len(X)); Xtr,Xte=X[:split],X[split:]; ytr,yte=y[:split],y[split:]
# Normalización
Xmu,Xsd=Xtr.mean(),Xtr.std(); Ymu,Ysd=ytr.mean(),ytr.std()
XtrN=(Xtr-Xmu)/Xsd; XteN=(Xte-Xmu)/Xsd; ytrN=(ytr-Ymu)/Ysd
# Entrenar
mlp=MultiLayerPerceptron(1,32,1,learning_rate=0.1,epochs=2500,activation='tanh',task='regression')
mlp.fit(XtrN,ytrN)
ypN=mlp.predict(XteN); yp=ypN*Ysd+Ymu
print('R2:', r2_score(yte,yp))
