In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt

# -----------------------------
# 1. Data augmentation
# -----------------------------
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(32, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
    transforms.RandomErasing(p=0.2)
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# -----------------------------
# 2. Importation des données de CIPHAR-10
# -----------------------------
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=train_transforms, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=test_transforms, download=True)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# -----------------------------
# 3. Définition du modèle
# -----------------------------
def get_model_improved(device='cpu'):
    model = nn.Sequential(
    # Bloc 1
    nn.Conv2d(3, 32, kernel_size=4, padding=1, bias=True),
    nn.ReLU(),
    nn.BatchNorm2d(32),

    nn.Conv2d(32, 32, kernel_size=4, padding=1, bias=True),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.MaxPool2d(2, 2),
    nn.Dropout2d(0.2),

    # Bloc 2
    nn.Conv2d(32, 64, kernel_size=4, padding=1, bias=True),
    nn.ReLU(),
    nn.BatchNorm2d(64),

    nn.Conv2d(64, 64, kernel_size=4, padding=1, bias=True),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(2, 2),
    nn.Dropout2d(0.3),

    # Bloc 3
    nn.Conv2d(64, 128, kernel_size=4, padding=1, bias=True),
    nn.ReLU(),
    nn.BatchNorm2d(128),

    nn.Conv2d(128, 128, kernel_size=1, padding=0, bias=True),  # bottleneck
    nn.ReLU(),
    nn.BatchNorm2d(128),
    nn.AdaptiveAvgPool2d((1,1)),

    nn.Flatten(),

    # Classifier compact
    nn.Linear(128, 128),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(128, 10)
    ).to(device)



    loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

    return model, loss_fn, optimizer, scheduler


# -----------------------------
# 4. Entraînement + Early stopping + Tracking
# -----------------------------
def train_model(model, loss_fn, optimizer, scheduler, train_loader, val_loader, device='cpu', epochs=30, patience=5):
    best_val_loss = np.inf
    patience_counter = 0

    # Historique pour les courbes
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}

    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * X.size(0)
            _, predicted = torch.max(outputs, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()

        train_acc = 100 * correct / total
        train_loss = running_loss / total

        # Validation
        model.eval()
        val_loss, correct, total = 0.0, 0, 0
        with torch.no_grad():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)
                outputs = model(X)
                loss = loss_fn(outputs, y)
                val_loss += loss.item() * X.size(0)
                _, predicted = torch.max(outputs, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()

        val_acc = 100 * correct / total
        val_loss /= total

        scheduler.step(val_loss)

        # Enregistrement
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)

        print(f"Epoch [{epoch+1}/{epochs}] - "
              f"Train loss: {train_loss:.4f} | Train acc: {train_acc:.2f}% | "
              f"Val loss: {val_loss:.4f} | Val acc: {val_acc:.2f}%")

        # Early stopping
        if val_loss > best_val_loss - 1e-3:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break
        else:
            patience_counter = 0
            best_val_loss = val_loss

    # -----------------------------
    # 5. Affichage des courbes
    # -----------------------------
    epochs_range = range(1, len(history['train_loss']) + 1)

    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, history['train_loss'], label='Train Loss')
    plt.plot(epochs_range, history['val_loss'], label='Val Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss évolution')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, history['train_acc'], label='Train Accuracy')
    plt.plot(epochs_range, history['val_acc'], label='Val Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy ')
    plt.title('Accuracy évolution')
    plt.legend()

    plt.tight_layout()
    plt.show()


# -----------------------------
# 5. Lancement
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, loss_fn, optimizer, scheduler = get_model_improved(device)
train_model(model, loss_fn, optimizer, scheduler, train_loader, val_loader, device)

In [None]:
#Une fois le modèle entrainé, sauvegarde de ses poids
torch.save(model.state_dict(), "model_weights.pth")
files.download("model_weights.pth")