
# Taller 04: Tareas de Pretexto

* María Sofía Uribe
* Javier Daza Olivella


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset
import numpy as np
from PIL import Image
from tqdm import tqdm


Implementar un modelo CNN o ViT.

• Puede ser un modelo de alguna librería (Keras) o una implementación “vanilla” de una CNN

• Inventar una tarea de pretexto

• Entrenar en ImageNet


Definimos La tarea de pretexto: Rotacion
4 Angulos, 0°, 90°, 180° y 270°


Construimos Clase Dataset, que sera la encargada de gestionar el __getitem__ que sera luego usado en el dataloader

In [None]:

class RotationDataset(Dataset):
    def __init__(self, root, transform=None):
        self.dataset = datasets.ImageFolder(root=root)
        self.transform = transform

    def __len__(self):
        return len(self.dataset) * 4  # 4 rotaciones por imagen

    def __getitem__(self, idx):
        img_idx = idx // 4
        rot_class = idx % 4
        img, _ = self.dataset[img_idx]

        angle = [0, 90, 180, 270][rot_class]
        img = img.rotate(angle)

        if self.transform:
            img = self.transform(img)

        return img, rot_class



Con el compose buscamos transformar cada imagen que entra al dataloader:

- Hacemos un Resize

- Luego hacemos un crop al centro de la imagen 

- Luego convertimos a tensor

- Finalmente Normalizamos usando los valores del dataset original de tiny Imagenet

In [None]:

transform = transforms.Compose([
    transforms.Resize(64),
    transforms.CenterCrop(64),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


train_dir = "./data/tiny-imagenet-200/train" 
dataset = RotationDataset(train_dir, transform)

# Construimos nuestro dataloader que tendra un batch_size de 128
dataloader = DataLoader(dataset, batch_size=128, shuffle=True, num_workers=4)


Usaremos el modelo resnet18

Descongelar ultimas 2 capas layer3 y layer4 y agregamos una nueva capa lineal, que nos dara la prediccion final, las rotaciones


In [None]:
model = models.resnet18(pretrained=True)

# Congelar todo el modelo
for param in model.parameters():
    param.requires_grad = False

# Descongelar ultimas 2 capas layer3 y layer4 y agregamos una nueva capa lineal, que nos dara la prediccion final, las rotaciones
for param in model.layer3.parameters():
    param.requires_grad = True
    
for param in model.layer4.parameters():
    param.requires_grad = True

num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 4)  

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



Construimos nuestro criterion, que sera el encargado de obtener la loss y donde se calculan los gradientes

y seleccionamos el optimizer Adam, que ajustara la direcion y los pasos basado en el gradiente.

In [None]:

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4) 

# Iniciamos Entrenamiento
EPOCHS = 10
for epoch in range(EPOCHS):
    model.train()
    total_loss, correct, total = 0, 0, 0
    
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{EPOCHS}", unit="batch")
    for imgs, labels in progress_bar:
        imgs, labels = imgs.to(device), labels.to(device)
        
        
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        batch_loss = loss.item()
        total_loss += batch_loss * imgs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        
        running_loss = total_loss / total
        running_acc = 100 * correct / total
        progress_bar.set_postfix({"Batch Loss": f"{batch_loss:.4f}", "Running Loss": f"{running_loss:.4f}", "Running Acc": f"{running_acc:.2f}%"})
    
    epoch_loss = total_loss / total
    epoch_acc = 100 * correct / total
    print(f"Epoch {epoch+1} completed: Loss={epoch_loss:.4f}, Acc={epoch_acc:.2f}%")


Epoch 1/10: 100%|██████████| 3125/3125 [02:13<00:00, 23.43batch/s, Batch Loss=0.6643, Running Loss=0.7818, Running Acc=68.23%]


Epoch 1 completed: Loss=0.7818, Acc=68.23%


Epoch 2/10: 100%|██████████| 3125/3125 [02:37<00:00, 19.90batch/s, Batch Loss=0.5236, Running Loss=0.5988, Running Acc=76.41%]


Epoch 2 completed: Loss=0.5988, Acc=76.41%


Epoch 3/10: 100%|██████████| 3125/3125 [02:32<00:00, 20.48batch/s, Batch Loss=0.3857, Running Loss=0.4639, Running Acc=81.99%]


Epoch 3 completed: Loss=0.4639, Acc=81.99%


Epoch 4/10: 100%|██████████| 3125/3125 [02:37<00:00, 19.88batch/s, Batch Loss=0.2470, Running Loss=0.3235, Running Acc=87.60%]


Epoch 4 completed: Loss=0.3235, Acc=87.60%


Epoch 5/10: 100%|██████████| 3125/3125 [02:14<00:00, 23.21batch/s, Batch Loss=0.1997, Running Loss=0.2136, Running Acc=91.93%]


Epoch 5 completed: Loss=0.2136, Acc=91.93%


Epoch 6/10: 100%|██████████| 3125/3125 [01:40<00:00, 31.08batch/s, Batch Loss=0.1441, Running Loss=0.1498, Running Acc=94.44%]


Epoch 6 completed: Loss=0.1498, Acc=94.44%


Epoch 7/10: 100%|██████████| 3125/3125 [01:40<00:00, 31.05batch/s, Batch Loss=0.1127, Running Loss=0.1179, Running Acc=95.67%]


Epoch 7 completed: Loss=0.1179, Acc=95.67%


Epoch 8/10: 100%|██████████| 3125/3125 [01:53<00:00, 27.64batch/s, Batch Loss=0.1064, Running Loss=0.0983, Running Acc=96.41%]


Epoch 8 completed: Loss=0.0983, Acc=96.41%


Epoch 9/10: 100%|██████████| 3125/3125 [01:40<00:00, 31.16batch/s, Batch Loss=0.1027, Running Loss=0.0874, Running Acc=96.83%]


Epoch 9 completed: Loss=0.0874, Acc=96.83%


Epoch 10/10: 100%|██████████| 3125/3125 [01:40<00:00, 31.11batch/s, Batch Loss=0.0657, Running Loss=0.0783, Running Acc=97.19%]

Epoch 10 completed: Loss=0.0783, Acc=97.19%





## Resultados

- Logramos un accuracy de 97%
- Usamos un backbone bastante simple: Resnet18 
- 10 Epochs



In [None]:
torch.save(model.state_dict(), "resnet18_rotation_pretext.pth")