In [5]:
!pip install scipy opencv-python



In [6]:
import os
import pandas as pd
from PIL import Image
import torch
from kagglehub import dataset_download
import numpy as np
import cv2
from scipy.ndimage import gaussian_filter
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import kagglehub

In [11]:
# Download latest version
path = kagglehub.dataset_download("agungpambudi/mnist-multiple-dataset-comprehensive-analysis")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/mnist-multiple-dataset-comprehensive-analysis


In [12]:
dataset_path = "/kaggle/input/mnist-multiple-dataset-comprehensive-analysis"
base_path = os.path.join(dataset_path, "PolyMNIST", "MMNIST")

data = []

for split in ["train", "test"]:
    split_path = os.path.join(base_path, split)
    if not os.path.exists(split_path):
        continue
    for modality in os.listdir(split_path):
        modality_path = os.path.join(split_path, modality)
        if not os.path.isdir(modality_path):
            continue
        for file in os.listdir(modality_path):
            if file.endswith(".png"):
                try:
                    label = int(file.split(".")[1])  # Ej: 1234.5.png → 5
                    full_path = os.path.join(modality_path, file)
                    data.append({
                        "file_path": full_path,
                        "label": label,
                        "modality": modality,
                        "split": split
                    })
                except Exception as e:
                    print(f"Error con archivo: {file} → {e}")

df = pd.DataFrame(data)
print("DataFrame generado con", len(df), "registros.")
print(df.head())

DataFrame generado con 350000 registros.
                                           file_path  label modality  split
0  /kaggle/input/mnist-multiple-dataset-comprehen...      9       m4  train
1  /kaggle/input/mnist-multiple-dataset-comprehen...      0       m4  train
2  /kaggle/input/mnist-multiple-dataset-comprehen...      0       m4  train
3  /kaggle/input/mnist-multiple-dataset-comprehen...      9       m4  train
4  /kaggle/input/mnist-multiple-dataset-comprehen...      9       m4  train


In [19]:
class ElasticTransform:
    def __init__(self, alpha=36, sigma=6):
        self.alpha = alpha
        self.sigma = sigma

    def __call__(self, img: Image.Image) -> Image.Image:
        arr = np.array(img)
        shape = arr.shape[:2]
        # campos de desplazamiento
        dx = (np.random.rand(*shape)*2 - 1)
        dy = (np.random.rand(*shape)*2 - 1)
        dx = gaussian_filter(dx, self.sigma) * self.alpha
        dy = gaussian_filter(dy, self.sigma) * self.alpha
        # mallas
        x, y = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
        map_x = (x + dx).astype(np.float32)
        map_y = (y + dy).astype(np.float32)
        # aplica remap canal por canal
        warped = np.stack([
            cv2.remap(arr[...,c], map_x, map_y,
                      interpolation=cv2.INTER_LINEAR,
                      borderMode=cv2.BORDER_REFLECT_101)
            for c in range(3)
        ], axis=2)
        return Image.fromarray(warped)

# Transforms para train / test
transform_train = transforms.Compose([
    transforms.Resize((28,28)),
    ElasticTransform(alpha=36, sigma=6),
    transforms.ToTensor(),                                # [0,1]
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))     # [-1,1]
])
transform_test = transforms.Compose([
    transforms.Resize((28,28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

In [23]:
class RGBMNISTDataset(Dataset):
    def __init__(self, df, split, transform):
        self.sub = df[df['split']==split].reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.sub)

    def __getitem__(self, i):
        row = self.sub.loc[i]
        img = Image.open(row.file_path).convert("RGB")
        return self.transform(img), row.label

train_ds = RGBMNISTDataset(df, 'train', transform_train)
test_ds  = RGBMNISTDataset(df, 'test',  transform_test)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True,  num_workers=8)
test_loader  = DataLoader(test_ds,  batch_size=64, shuffle=False, num_workers=8)



In [24]:
class TwoLayerNet(nn.Module):
    def __init__(self, input_dim=3*28*28, hidden=800, num_classes=10):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden)
        self.fc2 = nn.Linear(hidden, num_classes)

    def forward(self, x):
        # x: [B,3,28,28]
        x = x.view(x.size(0), -1)    # aplanar → [B,3*28*28]
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TwoLayerNet().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

def evaluate(loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x,y in loader:
            x,y = x.to(device), y.to(device)
            preds = model(x).argmax(dim=1)
            correct += (preds==y).sum().item()
            total   += y.size(0)
    return 100*correct/total

# Bucle de entrenamiento
epochs = 5
for epoch in range(1, epochs+1):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        out  = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * y.size(0)

    train_acc = evaluate(train_loader)
    test_acc  = evaluate(test_loader)
    avg_loss  = total_loss / len(train_loader.dataset)

    print(f"Epoch {epoch}/{epochs} | "
          f"Loss: {avg_loss:.4f} | "
          f"Train Acc: {train_acc:.2f}% | "
          f"Test Acc: {test_acc:.2f}%")

Epoch 1/5 | Loss: 0.9284 | Train Acc: 77.19% | Test Acc: 85.29%
Epoch 2/5 | Loss: 0.7175 | Train Acc: 80.32% | Test Acc: 87.60%
Epoch 3/5 | Loss: 0.6560 | Train Acc: 81.60% | Test Acc: 88.28%
Epoch 4/5 | Loss: 0.6225 | Train Acc: 82.70% | Test Acc: 89.22%
Epoch 5/5 | Loss: 0.6000 | Train Acc: 81.12% | Test Acc: 87.69%
