# Intel Image Classification — PyTorch CNN with Augmentations

Este cuaderno implementa **todas** las partes del ejercicio:
- Carga de imágenes con `ImageFolder`.
- Visualización de muestras originales y transformadas.
- **≥5** técnicas de *data augmentation* con `torchvision.transforms`.
- CNN con ≥2 capas convolucionales, pooling y capa totalmente conectada.
- Entrenamiento (30 épocas) con imágenes **aumentadas**.
- Evaluación en imágenes **sin aumento** (precisión y pérdida).

**Nota:** Antes de ejecutar, descarga y descomprime el dataset *Intel Image Classification* de Kaggle.
Normalmente la estructura es `.../seg_train/` y `.../seg_test/` con 6 clases.


In [None]:
# ==== Imports y configuración ====

import os

import time

import math

from pathlib import Path

import numpy as np

import torch

import torch.nn as nn

import torch.optim as optim

from torch.utils.data import DataLoader, Subset

import torchvision

from torchvision import transforms

from torchvision.datasets import ImageFolder

import matplotlib.pyplot as plt



# Comprobación de versión de torchvision (evita conflictos de v2)

try:

    # En torchvision>=0.15 existe transforms.v2; aquí usamos el API clásico para compatibilidad

    _ = transforms.Compose

except Exception as e:

    raise RuntimeError("Se requiere torchvision con 'transforms'.")



DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

DEVICE


## 1) Rutas del dataset

In [None]:
# Ajusta esta ruta a la carpeta donde descomprimiste el dataset

# Ejemplos de estructuras válidas:

# - DATA_DIR/seg_train, DATA_DIR/seg_test

# - DATA_DIR/train, DATA_DIR/test

DATA_DIR = Path('/path/a/tu/Intel')  # <-- CAMBIA ESTA RUTA



def find_split_dirs(root: Path):

    """Devuelve (train_dir, test_dir) intentando varias convenciones de nombres."""

    candidates = [

        (root/'seg_train', root/'seg_test'),

        (root/'train', root/'test'),

        (root/'segmentation_data'/'seg_train', root/'segmentation_data'/'seg_test'),

    ]

    for tr, te in candidates:

        if tr.exists() and te.exists():

            return tr, te

    raise FileNotFoundError(

        f"No se encontraron carpetas de train/test en {root}. Asegúrate de que existan, p.ej. seg_train y seg_test.")



train_dir, test_dir = find_split_dirs(DATA_DIR)

print('Train dir:', train_dir)

print('Test dir :', test_dir)


## 2) Transformaciones
Se definen dos *pipelines*:
- **`tfm_plain`** (sin aumento): *Resize(150×150) → ToTensor → Normalize*.
- **`tfm_aug`** (con ≥5 aumentos):
  1. `RandomResizedCrop(150)`
  2. `RandomHorizontalFlip(0.5)`
  3. `RandomRotation(15°)`
  4. `ColorJitter` (brillo/contraste/saturación/tono)
  5. `RandomGrayscale(p=0.1)`
  6. (opcional) `GaussianBlur` si está disponible en tu versión de torchvision


In [None]:
IMG_SIZE = 150

mean = [0.485, 0.456, 0.406]

std  = [0.229, 0.224, 0.225]



# Sin aumento (para validación/test y visualización de 'originales')

tfm_plain = transforms.Compose([

    transforms.Resize((IMG_SIZE, IMG_SIZE)),

    transforms.ToTensor(),

    transforms.Normalize(mean, std)

])



# Con aumento (para entrenamiento)

aug_list = [

    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0), ratio=(0.9, 1.1)),

    transforms.RandomHorizontalFlip(p=0.5),

    transforms.RandomRotation(degrees=15),

    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),

    transforms.RandomGrayscale(p=0.1)

]

# GaussianBlur es opcional según versión

try:

    aug_list.append(transforms.GaussianBlur(kernel_size=3))

except Exception:

    pass



tfm_aug = transforms.Compose(aug_list + [

    transforms.ToTensor(),

    transforms.Normalize(mean, std)

])



# Datasets

train_ds_aug   = ImageFolder(train_dir, transform=tfm_aug)

train_ds_plain = ImageFolder(train_dir, transform=tfm_plain)

test_ds        = ImageFolder(test_dir,  transform=tfm_plain)



class_names = train_ds_aug.classes

num_classes = len(class_names)

class_names, num_classes


## 3) DataLoaders

In [None]:
BATCH_SIZE = 4  # requerido por el enunciado para la visualización

NUM_WORKERS = 2  # ajusta según tu entorno (en Windows puede ser 0)



train_loader = DataLoader(train_ds_aug, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

test_loader  = DataLoader(test_ds,     batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

len(train_loader), len(test_loader)


## 4) Visualización: originales vs aumentadas

In [None]:
# Para comparar 'original' vs 'aumentada' sobre las mismas imágenes base,
# usamos índices fijos de un pequeño subset del train.


indices = list(range(min(8, len(train_ds_plain))))

subset_plain = Subset(train_ds_plain, indices)

subset_aug   = Subset(train_ds_aug,   indices)



plain_loader = DataLoader(subset_plain, batch_size=4, shuffle=False)

aug_loader   = DataLoader(subset_aug,   batch_size=4, shuffle=False)



def denormalize(img_tensor, mean, std):

    m = torch.tensor(mean).view(3,1,1)

    s = torch.tensor(std).view(3,1,1)

    return img_tensor * s + m



# Mostrar un lote 'plain'
images_p, labels_p = next(iter(plain_loader))
images_a, labels_a = next(iter(aug_loader))


fig = plt.figure(figsize=(10, 5))

fig.suptitle('Originales (arriba) vs Aumentadas (abajo)')



# Fila 1: originales
for i in range(images_p.size(0)):

    ax = plt.subplot(2, images_p.size(0), i+1)

    img = denormalize(images_p[i], mean, std).permute(1,2,0).numpy()

    ax.imshow(np.clip(img, 0, 1))

    ax.set_title(class_names[labels_p[i].item()])

    ax.axis('off')



# Fila 2: aumentadas
for i in range(images_a.size(0)):

    ax = plt.subplot(2, images_a.size(0), images_a.size(0) + i + 1)

    img = denormalize(images_a[i], mean, std).permute(1,2,0).numpy()

    ax.imshow(np.clip(img, 0, 1))

    ax.axis('off')



plt.tight_layout()

plt.show()


## 5) Definición de la CNN

In [None]:
class SimpleCNN(nn.Module):

    """CNN con ≥2 conv, pooling y FC. Uso de AdaptiveAvgPool2d para evitar cálculos manuales.

    Entrada: (B, 3, 150, 150)
    """

    def __init__(self, num_classes):

        super().__init__()

        self.features = nn.Sequential(

            nn.Conv2d(3, 32, kernel_size=3, padding=1),

            nn.ReLU(inplace=True),

            nn.MaxPool2d(2),  # 150 -> 75


            nn.Conv2d(32, 64, kernel_size=3, padding=1),

            nn.ReLU(inplace=True),

            nn.MaxPool2d(2),  # 75 -> 37


            # Bloque extra opcional para mayor capacidad
            nn.Conv2d(64, 128, kernel_size=3, padding=1),

            nn.ReLU(inplace=True),

        )

        # Reducimos espacialmente a un tamaño fijo 4x4 para no depender exacto de 150x150

        self.avgpool = nn.AdaptiveAvgPool2d((4,4))

        self.classifier = nn.Sequential(

            nn.Flatten(),

            nn.Linear(128*4*4, 256),

            nn.ReLU(inplace=True),

            nn.Dropout(p=0.3),

            nn.Linear(256, num_classes)

        )

    def forward(self, x):

        x = self.features(x)

        x = self.avgpool(x)

        x = self.classifier(x)

        return x



model = SimpleCNN(num_classes).to(DEVICE)

model


## 6) Entrenamiento (30 épocas) con aumentos y evaluación en test sin aumentos

In [None]:
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

EPOCHS = 30



def train_one_epoch(model, loader, optimizer, criterion, device):

    model.train()

    running_loss = 0.0

    correct = 0

    total = 0

    for imgs, labels in loader:

        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(imgs)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer.step()

        running_loss += loss.item() * imgs.size(0)

        _, preds = torch.max(outputs, 1)

        correct += (preds == labels).sum().item()

        total += labels.size(0)

    return running_loss/total, correct/total



@torch.no_grad()

def evaluate(model, loader, criterion, device):

    model.eval()

    running_loss = 0.0

    correct = 0

    total = 0

    for imgs, labels in loader:

        imgs, labels = imgs.to(device), labels.to(device)

        outputs = model(imgs)

        loss = criterion(outputs, labels)

        running_loss += loss.item() * imgs.size(0)

        _, preds = torch.max(outputs, 1)

        correct += (preds == labels).sum().item()

        total += labels.size(0)

    return running_loss/total, correct/total



history = { 'train_loss': [], 'train_acc': [], 'test_loss': [], 'test_acc': [] }



start = time.time()

for epoch in range(1, EPOCHS+1):

    tr_loss, tr_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)

    te_loss, te_acc = evaluate(model, test_loader, criterion, DEVICE)

    history['train_loss'].append(tr_loss)

    history['train_acc'].append(tr_acc)

    history['test_loss'].append(te_loss)

    history['test_acc'].append(te_acc)

    print(f"Epoch {epoch:02d}/{EPOCHS} | Train Loss: {tr_loss:.4f} Acc: {tr_acc:.4f} | Test Loss: {te_loss:.4f} Acc: {te_acc:.4f}")

end = time.time()

print(f"Tiempo total: {end-start:.1f}s")


## 7) Curvas de pérdida y precisión

In [None]:
# Gráficas (sin estilos ni colores específicos)

epochs = range(1, EPOCHS+1)

plt.figure(figsize=(8,4))

plt.plot(epochs, history['train_loss'], label='Train Loss')

plt.plot(epochs, history['test_loss'],  label='Test Loss')

plt.xlabel('Época')

plt.ylabel('Pérdida')

plt.title('Pérdida durante entrenamiento/evaluación')

plt.legend()

plt.show()



plt.figure(figsize=(8,4))

plt.plot(epochs, history['train_acc'], label='Train Acc')

plt.plot(epochs, history['test_acc'],  label='Test Acc')

plt.xlabel('Época')

plt.ylabel('Precisión')

plt.title('Precisión durante entrenamiento/evaluación')

plt.legend()

plt.show()


## 8) Precisión final

In [None]:
final_test_loss, final_test_acc = evaluate(model, test_loader, criterion, DEVICE)

print({

    'final_test_loss': round(final_test_loss, 4),

    'final_test_acc' : round(final_test_acc, 4)

})
