# Les imports

In [1]:
from torch.utils.data import DataLoader
import torchvision.transforms as T
from torch.utils.data import Dataset
from PIL import Image
import os
import matplotlib.pyplot as plt

import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks import ModelCheckpoint

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

ModuleNotFoundError: No module named 'torchvision'

In [2]:
torch.cuda.is_available()

False

check GPU stats :

In [None]:
!nvidia-smi

# Los geht's !

### Definition des classes d'objets

#### Classe de nos datas

In [None]:
class GeoEye1(Dataset):

    # mapping between label class names and indices
    LABEL_CLASSES = {
        'no_damage': 0,
        'damage': 1,
    }

    def __init__(self, root_dir="data/ipeo_hurricane_for_students", split='train', transforms=None):
        self.transforms = transforms

        # Chemin du dossier correspondant au split (train/test/validation)
        split_dir = os.path.join(root_dir, split)

        # Liste qui contiendra (chemin_image, label)
        self.data = []

        # Pour chaque classe (damage / no_damage)
        for class_name, class_idx in self.LABEL_CLASSES.items():
            class_dir = os.path.join(split_dir, class_name)

            if not os.path.isdir(class_dir):
                continue

            # Récupération de toutes les images du dossier
            for img_name in os.listdir(class_dir):
                if img_name.lower().endswith(".jpeg"):
                    img_path = os.path.join(class_dir, img_name)
                    self.data.append((img_path, class_idx))

        # Optionnel: trier pour reproductibilité
        self.data.sort()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]

        img = Image.open(img_path).convert("RGB")
        if self.transforms is not None:
            img = self.transforms(img)

        return img, label


NameError: name 'Dataset' is not defined

##### Calculs préliminaires de la mean et std des train data

In [3]:
# Dataset sans augmentation et sans Normalize pour calculer mean/std
transforms_for_stats = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor()
])

train_dataset_raw = GeoEye1(
    root_dir="data/ipeo_hurricane_for_students",
    split="train",
    transforms=transforms_for_stats
)

loader = DataLoader(train_dataset_raw, batch_size=32, shuffle=False, num_workers=4)

# Compute mean & std
mean = 0.
std = 0.
total_images = 0

for imgs, _ in loader:
    batch_size = imgs.size(0)
    imgs = imgs.view(batch_size, imgs.size(1), -1)
    mean += imgs.mean(2).sum(0)
    std += imgs.std(2).sum(0)
    total_images += batch_size

mean /= total_images
std /= total_images

print("Mean =", mean)
print("Std  =", std)


NameError: name 'T' is not defined

#### preprocess des images, data augmentation...

In [None]:
normalize = T.Normalize(mean, std)

std_inv = 1 / (std + 1e-7)
unnormalize = T.Normalize(-mean * std_inv, std_inv)

transforms_train = T.Compose([
  T.RandomResizedCrop(size=(224, 224), antialias=True),
  T.RandomGrayscale(p=0.7),
  T.RandomHorizontalFlip(p=0.8),
  T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
  T.RandomPosterize(bits=6, p=0.5),
  T.RandomVerticalFlip(p=0.5),
  T.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5 , hue=0.3),
  T.Resize((224, 224)),
  T.ToTensor(),
  normalize
])

transforms_val = T.Compose([
  T.Resize((224, 224)),
  T.ToTensor(),
  normalize
])


#### Visualiser quelques images du dataset

In [None]:
# dataset original (sans augmentations)
raw_ds = GeoEye1("data/ipeo_hurricane_for_students", "train",
                 transforms=T.Compose([T.Resize((224,224)), T.ToTensor()]))

# dataset avec augmentations
aug_ds = GeoEye1("data/ipeo_hurricane_for_students", "train",
                 transforms=transforms_train)

def show_tensor_image(t):
    img = unnormalize(t).clamp(0,1).permute(1,2,0).cpu().numpy()
    plt.imshow(img)
    plt.axis("off")

plt.figure(figsize=(10,6))
for i in range(4):
    raw_img, _ = raw_ds[i]
    aug_img, _ = aug_ds[i]

    plt.subplot(4,2,2*i+1)
    plt.title("Brute")
    show_tensor_image(raw_img)

    plt.subplot(4,2,2*i+2)
    plt.title("Transformée")
    show_tensor_image(aug_img)

plt.tight_layout()
plt.show()

a `torch.utils.data.DataLoader` is a wrapper around a `torch.utils.data.Dataset` class that allows us to load multiple images in a batch **in parallel** (on CPU and RAM) which speeds up the training by loading data faster.

In [None]:
"""class LightningClassifierModelWrapper(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = F.cross_entropy(y_hat, y)
        acc = (y_hat.argmax(1) == y).float().mean()

        self.log("val_loss", loss, prog_bar=True)
        self.log("val_accuracy", acc, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=0.01, momentum=0.9)"""

### Instanciation des objets

In [None]:
"""train_dataset = GeoEye1("data/ipeo_hurricane_for_students", "train", transforms_train)
val_dataset = GeoEye1("data/ipeo_hurricane_for_students", "validation", transforms_val)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=8)
val_loader   = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

model = torchvision.models.resnet18(weights="IMAGENET1K_V1")
model.fc = torch.nn.Linear(model.fc.in_features, 2)

lightning_model = LightningClassifierModelWrapper(model)

trainer = pl.Trainer(
    max_epochs=50,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    logger=pl.loggers.TensorBoardLogger("logs/", name="hurricane"),
    callbacks=[
        ModelCheckpoint(
            dirpath="checkpoints",
            filename="resnet18-{epoch}-{val_accuracy:.2f}",
            monitor="val_accuracy",
            mode="max"
        )
    ]
)
"""

'model = torchvision.models.resnet18(num_classes=2)\npytorch_lightning_model = LightningClassifierModelWrapper(model)\n\ntb_logger = pl_loggers.TensorBoardLogger(save_dir="logs/", name="experimentname")\n\ncheckpoint_callback = ModelCheckpoint(\n    dirpath=\'checkpoints\',\n    filename=\'alexnet-{epoch}-{val_accuracy:.2f}\',\n    monitor="val_accuracy",\n    mode="max"\n    )\n\ntrainer = pl.Trainer(max_epochs=100, accelerator="cpu", devices=1,        # replace by accelerator="gpu", devices=[0]\n                     logger=tb_logger, callbacks=[checkpoint_callback])'

### Appel des fonctions

In [None]:
"""trainer.fit(lightning_model, train_loader, val_loader)"""