# **1. CONFIGURACIÓN GENERAL**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import time

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.manual_seed(42)

BATCH_SIZE = 64
LR = 1e-3
EPOCHS = 5

# **2. CARGA DE DATOS**

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = torchvision.datasets.MNIST(
    root="./data",
    train=True,
    transform=transform,
    download=True
)

test_dataset = torchvision.datasets.MNIST(
    root="./data",
    train=False,
    transform=transform,
    download=True
)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






# **3. DEFINICIÓN DE MODELOS**

In [11]:
class MLP(nn.Module):
    def __init__(self,
                 input_size=784,   # 28 x 28
                 hidden_layers=[128, 64],
                 output_size=10,
                 activation='relu',
                 dropout=0.0):
        super(MLP, self).__init__()
        
        layers = []
        in_features = input_size
        
        if activation.lower() == 'relu':
            activation_fn = nn.ReLU()
        elif activation.lower() == 'tanh':
            activation_fn = nn.Tanh()
        else:
            raise ValueError("Error en la función de activación.")

        for h in hidden_layers:
            layers.append(nn.Linear(in_features, h))
            layers.append(activation_fn)
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            in_features = h
        
        layers.append(nn.Linear(in_features, output_size))
        
        self.net = nn.Sequential(*layers)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.net(x)

class SimpleCNN(nn.Module):
    def __init__(self,
                 num_filters=[32, 64],
                 fc_sizes=[128],
                 output_size=10,
                 dropout=0.0):
        super(SimpleCNN, self).__init__()
        layers = []
        
        in_channels = 1
        for nf in num_filters:
            layers.append(nn.Conv2d(in_channels, nf, kernel_size=3, padding=1))
            layers.append(nn.ReLU())
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            if dropout > 0:
                layers.append(nn.Dropout2d(dropout))
            in_channels = nf
        
        self.conv = nn.Sequential(*layers)
        self.flatten_size = num_filters[-1] * (28 // 2**len(num_filters))**2
        
        fc_layers = []
        in_features = self.flatten_size
        for fs in fc_sizes:
            fc_layers.append(nn.Linear(in_features, fs))
            fc_layers.append(nn.ReLU())
            if dropout > 0:
                fc_layers.append(nn.Dropout(dropout))
            in_features = fs
        
        fc_layers.append(nn.Linear(in_features, output_size))
        
        self.fc = nn.Sequential(*fc_layers)
    
    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1) 
        x = self.fc(x)
        return x

# **4. FUNCIONES DE ENTRENAMIENTO Y VALIDACIÓN**

In [12]:
def train_one_epoch(model, dataloader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def evaluate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

# **5. EXPERIMENTACIÓN CON MLP**

In [13]:
def run_mlp_experiment(hidden_layers_list, activations, dropouts, optimizers, lrs, batch_sizes, num_epochs):
    best_acc = 0.0
    best_config = None
    
    for hl in hidden_layers_list:
        for act in activations:
            for dr in dropouts:
                for opt_name in optimizers:
                    for lr_ in lrs:
                        for bs in batch_sizes:
                            
                            train_loader_tmp = DataLoader(train_dataset, batch_size=bs, shuffle=True)
                            test_loader_tmp = DataLoader(test_dataset, batch_size=bs, shuffle=False)
                            
                            model = MLP(hidden_layers=hl, activation=act, dropout=dr).to(device)
                            criterion = nn.CrossEntropyLoss()
                            
                            if opt_name == 'sgd':
                                optimizer = optim.SGD(model.parameters(), lr=lr_, momentum=0.9)
                            elif opt_name == 'adam':
                                optimizer = optim.Adam(model.parameters(), lr=lr_)
                            else:
                                raise ValueError("Optimizador no soportado.")
                            
                            print(f"\nEntrenando MLP con config: {hl}, act={act}, dropout={dr}, "
                                  f"opt={opt_name}, lr={lr_}, batch_size={bs}")
                            
                            start_time = time.time()
                            for epoch in range(num_epochs):
                                train_loss, train_acc = train_one_epoch(model, train_loader_tmp, criterion, optimizer)
                                val_loss, val_acc = evaluate(model, test_loader_tmp, criterion)
                                print(f"[Epoch {epoch+1}/{num_epochs}] "
                                      f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
                                      f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
                            
                            elapsed = time.time() - start_time
                            print(f"Tiempo de entrenamiento: {elapsed:.2f} seg\n")
                            
                            if val_acc > best_acc:
                                best_acc = val_acc
                                best_config = (hl, act, dr, opt_name, lr_, bs, val_acc, elapsed)
    
    print("Mejor configuración MLP encontrada:")
    if best_config is not None:
        hl, act, dr, opt_name, lr_, bs, val_acc, time_ = best_config
        print(f"  hidden_layers={hl}, activation={act}, dropout={dr}, optimizer={opt_name}, "
              f"  lr={lr_}, batch_size={bs}, best_val_acc={val_acc:.4f}, time={time_:.2f}s")
    else:
        print("  No se encontraron configuraciones válidas (revisa tus parámetros).")

# **6. EXPERIMENTACIÓN CON CNN**

In [14]:
def run_cnn_experiment(filters_list, fc_sizes_list, dropouts, optimizers, lrs, batch_sizes, num_epochs):
    best_acc = 0.0
    best_config = None
    
    for filt in filters_list:
        for fc in fc_sizes_list:
            for dr in dropouts:
                for opt_name in optimizers:
                    for lr_ in lrs:
                        for bs in batch_sizes:
                            
                            train_loader_tmp = DataLoader(train_dataset, batch_size=bs, shuffle=True)
                            test_loader_tmp = DataLoader(test_dataset, batch_size=bs, shuffle=False)
                            
                            model = SimpleCNN(num_filters=filt, fc_sizes=fc, dropout=dr).to(device)
                            criterion = nn.CrossEntropyLoss()
                            
                            if opt_name == 'sgd':
                                optimizer = optim.SGD(model.parameters(), lr=lr_, momentum=0.9)
                            elif opt_name == 'adam':
                                optimizer = optim.Adam(model.parameters(), lr=lr_)
                            else:
                                raise ValueError("Optimizador no soportado.")
                            
                            print(f"\nEntrenando CNN con config: filtros={filt}, fc={fc}, dropout={dr}, "
                                  f"opt={opt_name}, lr={lr_}, batch_size={bs}")
                            
                            start_time = time.time()
                            for epoch in range(num_epochs):
                                train_loss, train_acc = train_one_epoch(model, train_loader_tmp, criterion, optimizer)
                                val_loss, val_acc = evaluate(model, test_loader_tmp, criterion)
                                print(f"[Epoch {epoch+1}/{num_epochs}] "
                                      f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
                                      f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
                            
                            elapsed = time.time() - start_time
                            print(f"Tiempo de entrenamiento: {elapsed:.2f} seg\n")
                            
                            if val_acc > best_acc:
                                best_acc = val_acc
                                best_config = (filt, fc, dr, opt_name, lr_, bs, val_acc, elapsed)
    
    print("Mejor configuración CNN encontrada:")
    if best_config is not None:
        filt, fc, dr, opt_name, lr_, bs, val_acc, time_ = best_config
        print(f"  filtros={filt}, fc={fc}, dropout={dr}, optimizer={opt_name}, "
              f"  lr={lr_}, batch_size={bs}, best_val_acc={val_acc:.4f}, time={time_:.2f}s")
    else:
        print("  No se encontraron configuraciones válidas (revisa tus parámetros).")


# **7. COMPARACIÓN DE RESULTADOS**

In [17]:
if __name__ == "__main__":
    # --------------------------------------------------
    # 7.1 Experimentación con MLP
    # --------------------------------------------------
    hidden_layers_list = [[128, 64]]
    activations = ['relu']
    dropouts = [0.0, 0.2]
    optimizers_list = ['sgd', 'adam']
    lrs = [1e-3]
    batch_sizes = [64]
    num_epochs = 3  
    
    run_mlp_experiment(
        hidden_layers_list=hidden_layers_list,
        activations=activations,
        dropouts=dropouts,
        optimizers=optimizers_list,
        lrs=lrs,
        batch_sizes=batch_sizes,
        num_epochs=num_epochs
    )
    
    # --------------------------------------------------
    # 7.2 Experimentación con CNN
    # --------------------------------------------------
    filters_list = [
        [32, 64],
        [64, 128]
    ]
    fc_sizes_list = [
        [128]
    ]
    dropouts = [0.0]
    
    run_cnn_experiment(
        filters_list=filters_list,
        fc_sizes_list=fc_sizes_list,
        dropouts=dropouts,
        optimizers=optimizers_list,
        lrs=lrs,
        batch_sizes=batch_sizes,
        num_epochs=num_epochs
    )



Entrenando MLP con config: [128, 64], act=relu, dropout=0.0, opt=sgd, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.8595, Train Acc: 0.7761 | Val Loss: 0.3468, Val Acc: 0.9009
[Epoch 2/3] Train Loss: 0.3152, Train Acc: 0.9091 | Val Loss: 0.2679, Val Acc: 0.9233
[Epoch 3/3] Train Loss: 0.2577, Train Acc: 0.9254 | Val Loss: 0.2284, Val Acc: 0.9334
Tiempo de entrenamiento: 78.87 seg


Entrenando MLP con config: [128, 64], act=relu, dropout=0.0, opt=adam, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.2708, Train Acc: 0.9207 | Val Loss: 0.1317, Val Acc: 0.9593
[Epoch 2/3] Train Loss: 0.1139, Train Acc: 0.9654 | Val Loss: 0.0974, Val Acc: 0.9715
[Epoch 3/3] Train Loss: 0.0794, Train Acc: 0.9748 | Val Loss: 0.1138, Val Acc: 0.9647
Tiempo de entrenamiento: 64.90 seg


Entrenando MLP con config: [128, 64], act=relu, dropout=0.2, opt=sgd, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 1.0137, Train Acc: 0.7016 | Val Loss: 0.3722, Val Acc: 0.8983
[Epoch 2/3] Train Loss: 0.4242, Train Ac

## **RESULTADOS COMPLETOS (EN CASO DE QUE NO SE VEA ARRIBA)**

Entrenando MLP con config: [128, 64], act=relu, dropout=0.0, opt=sgd, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.8595, Train Acc: 0.7761 | Val Loss: 0.3468, Val Acc: 0.9009
[Epoch 2/3] Train Loss: 0.3152, Train Acc: 0.9091 | Val Loss: 0.2679, Val Acc: 0.9233
[Epoch 3/3] Train Loss: 0.2577, Train Acc: 0.9254 | Val Loss: 0.2284, Val Acc: 0.9334
Tiempo de entrenamiento: 78.87 seg


Entrenando MLP con config: [128, 64], act=relu, dropout=0.0, opt=adam, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.2708, Train Acc: 0.9207 | Val Loss: 0.1317, Val Acc: 0.9593
[Epoch 2/3] Train Loss: 0.1139, Train Acc: 0.9654 | Val Loss: 0.0974, Val Acc: 0.9715
[Epoch 3/3] Train Loss: 0.0794, Train Acc: 0.9748 | Val Loss: 0.1138, Val Acc: 0.9647
Tiempo de entrenamiento: 64.90 seg


Entrenando MLP con config: [128, 64], act=relu, dropout=0.2, opt=sgd, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 1.0137, Train Acc: 0.7016 | Val Loss: 0.3722, Val Acc: 0.8983
[Epoch 2/3] Train Loss: 0.4242, Train Acc: 0.8755 | Val Loss: 0.2757, Val Acc: 0.9189
[Epoch 3/3] Train Loss: 0.3358, Train Acc: 0.9026 | Val Loss: 0.2283, Val Acc: 0.9321
Tiempo de entrenamiento: 69.94 seg


Entrenando MLP con config: [128, 64], act=relu, dropout=0.2, opt=adam, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.3520, Train Acc: 0.8935 | Val Loss: 0.1380, Val Acc: 0.9570
[Epoch 2/3] Train Loss: 0.1689, Train Acc: 0.9493 | Val Loss: 0.1066, Val Acc: 0.9684
[Epoch 3/3] Train Loss: 0.1315, Train Acc: 0.9602 | Val Loss: 0.0958, Val Acc: 0.9708
Tiempo de entrenamiento: 68.81 seg

Mejor configuración MLP encontrada:
  hidden_layers=[128, 64], activation=relu, dropout=0.2, optimizer=adam,   lr=0.001, batch_size=64, best_val_acc=0.9708, time=68.81s

Entrenando CNN con config: filtros=[32, 64], fc=[128], dropout=0.0, opt=sgd, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.5691, Train Acc: 0.8423 | Val Loss: 0.1958, Val Acc: 0.9410
[Epoch 2/3] Train Loss: 0.1582, Train Acc: 0.9532 | Val Loss: 0.1184, Val Acc: 0.9625
[Epoch 3/3] Train Loss: 0.1008, Train Acc: 0.9700 | Val Loss: 0.0837, Val Acc: 0.9753
Tiempo de entrenamiento: 192.52 seg


Entrenando CNN con config: filtros=[32, 64], fc=[128], dropout=0.0, opt=adam, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.1360, Train Acc: 0.9581 | Val Loss: 0.0381, Val Acc: 0.9875
[Epoch 2/3] Train Loss: 0.0420, Train Acc: 0.9867 | Val Loss: 0.0395, Val Acc: 0.9871
[Epoch 3/3] Train Loss: 0.0286, Train Acc: 0.9911 | Val Loss: 0.0303, Val Acc: 0.9898
Tiempo de entrenamiento: 180.03 seg


Entrenando CNN con config: filtros=[64, 128], fc=[128], dropout=0.0, opt=sgd, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.4442, Train Acc: 0.8799 | Val Loss: 0.1640, Val Acc: 0.9529
[Epoch 2/3] Train Loss: 0.1304, Train Acc: 0.9614 | Val Loss: 0.0888, Val Acc: 0.9735
[Epoch 3/3] Train Loss: 0.0879, Train Acc: 0.9740 | Val Loss: 0.0665, Val Acc: 0.9804
Tiempo de entrenamiento: 369.55 seg


Entrenando CNN con config: filtros=[64, 128], fc=[128], dropout=0.0, opt=adam, lr=0.001, batch_size=64
[Epoch 1/3] Train Loss: 0.1183, Train Acc: 0.9636 | Val Loss: 0.0410, Val Acc: 0.9861
[Epoch 2/3] Train Loss: 0.0401, Train Acc: 0.9871 | Val Loss: 0.0353, Val Acc: 0.9883
[Epoch 3/3] Train Loss: 0.0267, Train Acc: 0.9915 | Val Loss: 0.0290, Val Acc: 0.9891
Tiempo de entrenamiento: 399.08 seg

Mejor configuración CNN encontrada:
  filtros=[32, 64], fc=[128], dropout=0.0, optimizer=adam,   lr=0.001, batch_size=64, best_val_acc=0.9898, time=180.03s

##### **MLP (Perceptrón Multicapa):**
Mejor configuración encontrada:

hidden_layers: [128, 64]
activation: relu
dropout: 0.2
optimizer: adam
learning_rate: 0.001
batch_size: 64
Validation Accuracy: 97.08%
Training Time: 68.81 segundos
Observaciones:

* La inclusión de dropout (0.2) con el optimizador Adam ayudó a mejorar la capacidad de generalización del modelo, alcanzando una alta precisión en validación con una penalización mínima en tiempo de entrenamiento.
* Adam mostró un mejor desempeño comparado con SGD debido a su capacidad de ajustar la tasa de aprendizaje dinámicamente durante el entrenamiento.
* La configuración con dropout=0.2 manejó mejor el sobreajuste, logrando mejores resultados en validación que con dropout=0.0.

##### **CNN (Red Neuronal Convolucional):**
Mejor configuración encontrada:

filtros: [32, 64]
fc_layers: [128]
dropout: 0.0
optimizer: adam
learning_rate: 0.001
batch_size: 64
Validation Accuracy: 98.98%
Training Time: 180.03 segundos
Observaciones:

* La arquitectura más simple ([32, 64] filtros) combinada con Adam permitió un tiempo de entrenamiento razonable y alcanzó una validación significativamente alta (98.98%).
* Al aumentar la cantidad de filtros ([64, 128]), aunque se logró una leve mejora en entrenamiento, no se tradujo en una validación significativamente mejor, pero el tiempo de entrenamiento aumentó considerablemente.
* Adam superó nuevamente a SGD debido a su eficiencia en la optimización para arquitecturas más profundas.

### **Comparación MLP vs. CNN**

##### **Precisión:**

* CNN supera al MLP en validación por un margen de aproximadamente 1.9%, lo cual es significativo en tareas con imágenes o datos estructurados.

##### **Tiempo de Entrenamiento:**

* El MLP tiene una ventaja significativa en tiempo de entrenamiento, siendo 2.6 veces más rápido que la CNN.
* Esto sugiere que para casos donde el tiempo es crítico y una precisión ligeramente menor es aceptable, MLP podría ser preferible.

##### **Robustez y Complejidad:**

* CNN maneja características espaciales mejor que MLP, lo que explica su mejor desempeño en validación.
* MLP es más simple y menos costoso computacionalmente, lo que lo hace más adecuado para conjuntos de datos más pequeños o tabulares.