In [None]:
# Chapitre 8 – Architectures de réseaux de neurones profonds

In [None]:
# Objectif : découvrir les principales architectures classiques en deep learning  
# Distinguer architecture (brique réutilisable) et modèle (assemblage pour une tâche)  
# Lien utile : https://www.asimovinstitute.org/neural-network-zoo/

In [None]:
## 1. Architectures vs Modèles

# - Architecture = bloc réutilisable (ex: bloc convolutif, bloc résiduel, couche d'attention)  
# - Modèle       = assemblage organisé pour une tâche précise (ex: ResNet-50, BERT, YOLOv8)

In [None]:
# 2.1 Multi-Layer Perceptron (MLP) – bloc de base feed-forward
import torch
import torch.nn as nn

class MLPBlock(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super().__init__()
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.fc2(x)

# Usage typique : données tabulaires ou tête de classification/régression

In [None]:
# 2.2 Bloc convolutif – brique de base des CNN
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size,
                              stride=stride, padding=padding, bias=False)
        self.bn   = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))

# Ordre classique : Conv → BatchNorm → ReLU  (parfois + MaxPool)

In [None]:
# 2.3 Bloc résiduel (Residual Block) – clé de ResNet
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1, bias=False)
        self.bn1   = nn.BatchNorm2d(channels)
        self.relu  = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1, bias=False)
        self.bn2   = nn.BatchNorm2d(channels)

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity                 # skip connection
        return self.relu(out)

# Formule clé : y = F(x) + x   → permet réseaux très profonds

In [None]:
## 2.4 Mécanisme d’auto-attention (cœur des Transformers)

# Chaque token pondère dynamiquement l’importance des autres tokens  
# Étapes : Query / Key / Value → scores → softmax → weighted sum  
# Avantage : capture de dépendances longues sans couches successives

In [None]:
## 3. Quelques modèles emblématiques

# - LeNet-5     (1998)  → premier CNN moderne – MNIST – ~60k paramètres
# - VGG-16/19   (2014)  → couches 3×3 empilées – très profond pour l’époque – ~138M paramètres
# - ResNet-50/101/152 (2015) → blocs résiduels → réseaux profonds sans dégradation

In [None]:
## 4. Transfer Learning & Fine-tuning

# Charger un modèle pré-entraîné (souvent ImageNet) → adapter à sa tâche  
# Stratégies :
# - Feature extraction : geler le backbone, entraîner seulement la tête
# - Fine-tuning       : dégeler certaines couches + petit learning rate

In [None]:
import torchvision.models as models
from torchvision.models import VGG16_Weights

model = models.vgg16(weights=VGG16_Weights.DEFAULT)
# ou : model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

In [None]:
# Exercice 1 – Residual Block avec projection + mini-ResNet pour CIFAR-10

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels, out_channels, kernel_size=3,
            stride=stride, padding=1, bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(
            out_channels, out_channels, kernel_size=3,
            stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Skip connection (identity ou projection 1×1)
        self.shortcut = nn.Identity()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(
                    in_channels, out_channels, kernel_size=1,
                    stride=stride, bias=False
                ),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = self.shortcut(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        return F.relu(out)

In [None]:
class MiniResNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1   = nn.BatchNorm2d(64)
        self.relu  = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64,  blocks=2, stride=1)
        self.layer2 = self._make_layer(128, blocks=2, stride=2)
        self.layer3 = self._make_layer(256, blocks=2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc      = nn.Linear(256, num_classes)

    def _make_layer(self, out_channels, blocks, stride):
        layers = []
        # Premier bloc du groupe (peut downsampler)
        layers.append(ResidualBlock(self.in_channels, out_channels, stride))
        self.in_channels = out_channels

        # Blocs suivants (même dimension)
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels, stride=1))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [None]:
# ────────────────────────────────────────────────
#  Test rapide
# ────────────────────────────────────────────────
if __name__ == "__main__":
    model = MiniResNet(num_classes=10)
    dummy_input = torch.randn(4, 3, 32, 32)
    output = model(dummy_input)

    print("Shape de sortie :", output.shape)                     # doit être torch.Size([4, 10])
    print(f"Nombre de paramètres : {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")