##NOTEBOOK 1 ##
IMPLEMENTACION DE UNA RED NEURONAL FEEDFORWARD DESDE CERO (NumPy)

El objetivo es construir una red con 2+ capas ocultas, activaciones Sigmoid/ReLU/Tanh, inicialización Xavier/He, forward, backward y entrenamiento con gradiente descendente.

El notebook es reutilizable para el pipeline de clasificación de sentimientos del Proyecto 2 (se conectará en el Notebook 02 con TF-IDF)

1. Imports y utilidades

In [None]:
import numpy as np
import time


2. Activaciones y derivadas

In [None]:
def sigmoid(z): return 1/(1+np.exp(-z))
def dsigmoid(a): return a*(1-a)

def tanh(z): return np.tanh(z)
def dtanh(a): return 1 - a**2

def relu(z): return np.maximum(0, z)
def drelu(z): return (z > 0).astype(float)

def softmax(z):
    z = z - np.max(z, axis=1, keepdims=True)
    exp = np.exp(z)
    return exp / np.sum(exp, axis=1, keepdims=True)



3. Inicialización Xavier/He

In [None]:
def init_weights(fan_in, fan_out, init="he"):
    if init == "he":
        scale = np.sqrt(2.0 / fan_in)
    else:  # xavier
        scale = np.sqrt(1.0 / fan_in)
    return np.random.randn(fan_in, fan_out) * scale


4. Clase NeuralNetwork (2+ capas ocultas)

In [None]:
class NeuralNetwork:
    def __init__(self, layers, activation="relu", init="he", seed=42):
        """
        layers: lista ej. [n_in, 128, 64, n_out]
        activation: "relu" | "tanh" | "sigmoid"
        init: "he" | "xavier"
        """
        np.random.seed(seed)
        self.layers = layers
        self.activation_name = activation
        self.init = init

        self.W = []
        self.b = []
        for i in range(len(layers)-1):
            self.W.append(init_weights(layers[i], layers[i+1], init=init))
            self.b.append(np.zeros((1, layers[i+1])))

    def _act(self, z):
        if self.activation_name == "relu": return relu(z)
        if self.activation_name == "tanh": return tanh(z)
        return sigmoid(z)

    def _dact(self, z, a):
        if self.activation_name == "relu": return drelu(z)
        if self.activation_name == "tanh": return dtanh(a)
        return dsigmoid(a)

    def forward(self, X):
        A = X
        self.cache = {"A": [X], "Z": []}
        # capas ocultas
        for i in range(len(self.W)-1):
            Z = A @ self.W[i] + self.b[i]
            A = self._act(Z)
            self.cache["Z"].append(Z)
            self.cache["A"].append(A)
        # salida (softmax)
        ZL = A @ self.W[-1] + self.b[-1]
        AL = softmax(ZL)
        self.cache["Z"].append(ZL)
        self.cache["A"].append(AL)
        return AL

    def compute_loss(self, y_true, y_pred):
        # y_true one-hot
        eps = 1e-9
        return -np.mean(np.sum(y_true * np.log(y_pred + eps), axis=1))

    def backward(self, X, y_true):
        """
        Backprop para softmax + cross-entropy:
        dZL = (y_pred - y_true)/m
        """
        m = X.shape[0]
        A_list = self.cache["A"]
        Z_list = self.cache["Z"]
        y_pred = A_list[-1]

        dW = [None]*len(self.W)
        db = [None]*len(self.b)

        dZ = (y_pred - y_true) / m  # salida

        # última capa
        A_prev = A_list[-2]
        dW[-1] = A_prev.T @ dZ
        db[-1] = np.sum(dZ, axis=0, keepdims=True)

        # capas ocultas hacia atrás
        dA_prev = dZ @ self.W[-1].T

        for i in reversed(range(len(self.W)-1)):
            Z = Z_list[i]
            A = A_list[i+1]
            dZ = dA_prev * self._dact(Z, A)
            A_prev = A_list[i]
            dW[i] = A_prev.T @ dZ
            db[i] = np.sum(dZ, axis=0, keepdims=True)
            if i != 0:
                dA_prev = dZ @ self.W[i].T

        return dW, db

    def train(self, X, y, epochs=20, lr=0.01, batch_size=32, verbose=True):
        # one-hot
        n_classes = len(np.unique(y))
        y_onehot = np.eye(n_classes)[y]

        history = {"loss": [], "acc": []}
        for epoch in range(1, epochs+1):
            # shuffle
            idx = np.random.permutation(len(X))
            Xs, ys = X[idx], y_onehot[idx]
            y_labels = y[idx]

            t0 = time.time()
            for i in range(0, len(Xs), batch_size):
                Xb = Xs[i:i+batch_size]
                yb = ys[i:i+batch_size]
                yb_labels = y_labels[i:i+batch_size]

                y_pred = self.forward(Xb)
                dW, db = self.backward(Xb, yb)

                # update
                for k in range(len(self.W)):
                    self.W[k] -= lr * dW[k]
                    self.b[k] -= lr * db[k]

            # métricas
            y_pred_full = self.forward(X)
            loss = self.compute_loss(y_onehot, y_pred_full)
            acc = np.mean(np.argmax(y_pred_full, axis=1) == y)

            history["loss"].append(loss)
            history["acc"].append(acc)

            if verbose and (epoch == 1 or epoch % 5 == 0):
                print(f"Epoch {epoch:02d} | loss={loss:.4f} | acc={acc:.4f} | {time.time()-t0:.1f}s")
        return history

    def predict(self, X):
        probs = self.forward(X)
        return np.argmax(probs, axis=1), probs


5. Mini prueba rápida

In [None]:
# Datos sintéticos: 3 clases
np.random.seed(0)
X_demo = np.random.randn(300, 20)
y_demo = np.random.choice([0,1,2], size=300)

nn = NeuralNetwork(layers=[20, 64, 32, 3], activation="relu", init="he")
hist = nn.train(X_demo, y_demo, epochs=10, lr=0.01, batch_size=32)


Epoch 01 | loss=1.3654 | acc=0.3333 | 0.0s
Epoch 05 | loss=1.1918 | acc=0.3533 | 0.0s
Epoch 10 | loss=1.0990 | acc=0.4200 | 0.0s
