# üöÄ Google Colab Setup

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ogautier1980/sandbox-ml/blob/main/cours/07_deep_learning_cnn/07_demo_lenet_cifar.ipynb)

**Si vous ex√©cutez ce notebook sur Google Colab**, ex√©cutez la cellule suivante pour installer les d√©pendances.

In [None]:
# Installation des d√©pendances (Google Colab uniquement)import sysIN_COLAB = 'google.colab' in sys.modulesif IN_COLAB:    print('üì¶ Installation des packages...')        # Packages ML de base    !pip install -q numpy pandas matplotlib seaborn scikit-learn        # D√©tection du chapitre et installation des d√©pendances sp√©cifiques    notebook_name = '07_demo_lenet_cifar.ipynb'  # Sera remplac√© automatiquement        # Ch 06-08 : Deep Learning    if any(x in notebook_name for x in ['06_', '07_', '08_']):        !pip install -q torch torchvision torchaudio        # Ch 08 : NLP    if '08_' in notebook_name:        !pip install -q transformers datasets tokenizers        if 'rag' in notebook_name:            !pip install -q sentence-transformers faiss-cpu rank-bm25        # Ch 09 : Reinforcement Learning    if '09_' in notebook_name:        !pip install -q gymnasium[classic-control]        # Ch 04 : Boosting    if '04_' in notebook_name and 'boosting' in notebook_name:        !pip install -q xgboost lightgbm catboost        # Ch 05 : Clustering avanc√©    if '05_' in notebook_name:        !pip install -q umap-learn        # Ch 11 : S√©ries temporelles    if '11_' in notebook_name:        !pip install -q statsmodels prophet        # Ch 12 : Vision avanc√©e    if '12_' in notebook_name:        !pip install -q ultralytics timm segmentation-models-pytorch        # Ch 13 : Recommandation    if '13_' in notebook_name:        !pip install -q scikit-surprise implicit        # Ch 14 : MLOps    if '14_' in notebook_name:        !pip install -q mlflow fastapi pydantic        print('‚úÖ Installation termin√©e !')else:    print('‚ÑπÔ∏è  Environnement local d√©tect√©, les packages sont d√©j√† install√©s.')

# Chapitre 07 - D√©monstration : LeNet-5 et CIFAR-10

**Objectif** : Impl√©menter LeNet-5 (architecture CNN historique) et l'appliquer sur CIFAR-10.

**Contenu** :
1. Architecture LeNet-5 (Conv ‚Üí Pool ‚Üí Conv ‚Üí Pool ‚Üí FC)
2. Training sur CIFAR-10 (images couleur 32x32)
3. Visualisation des filtres et feature maps
4. Data Augmentation pour am√©liorer performance
5. Comparaison avec MLP

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

torch.manual_seed(42)
np.random.seed(42)

## 1. Chargement CIFAR-10

In [None]:
# Transformations (normalisation)
transform_basic = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalisation [-1, 1]
])

# Data Augmentation pour training
transform_augmented = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Chargement CIFAR-10
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, 
                                              download=True, transform=transform_augmented)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, 
                                             download=True, transform=transform_basic)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)

# Classes CIFAR-10
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

print(f"Train samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Classes: {classes}")

In [None]:
# Visualisation √©chantillons
def imshow(img, title=None):
    img = img / 2 + 0.5  # D√©normalisation
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    if title:
        plt.title(title)
    plt.axis('off')

# Afficher 25 images
dataiter = iter(train_loader)
images, labels = next(dataiter)

fig, axes = plt.subplots(5, 5, figsize=(10, 10))
for i, ax in enumerate(axes.flat):
    ax.imshow(images[i].permute(1, 2, 0) / 2 + 0.5)
    ax.set_title(classes[labels[i]])
    ax.axis('off')
plt.tight_layout()
plt.show()

## 2. Architecture LeNet-5 (adapt√©e pour CIFAR-10)

In [None]:
class LeNet5(nn.Module):
    """LeNet-5 modifi√©e pour CIFAR-10 (images 32x32 RGB)."""
    
    def __init__(self, num_classes=10):
        super(LeNet5, self).__init__()
        
        # Feature extraction (Convolutional layers)
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5, padding=0)  # 3 canaux RGB
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, padding=0)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Classifier (Fully connected layers)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)
        
        self.relu = nn.ReLU()
    
    def forward(self, x):
        # Conv1 + Pool1: 32x32x3 -> 28x28x6 -> 14x14x6
        x = self.pool1(self.relu(self.conv1(x)))
        
        # Conv2 + Pool2: 14x14x6 -> 10x10x16 -> 5x5x16
        x = self.pool2(self.relu(self.conv2(x)))
        
        # Flatten: 5x5x16 = 400
        x = x.view(-1, 16 * 5 * 5)
        
        # Fully connected
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

# Instanciation
model = LeNet5(num_classes=10).to(device)

# R√©sum√©
print(model)
print(f"\nNombre de param√®tres: {sum(p.numel() for p in model.parameters()):,}")

# Test forward pass
dummy_input = torch.randn(1, 3, 32, 32).to(device)
dummy_output = model(dummy_input)
print(f"\nInput shape: {dummy_input.shape}")
print(f"Output shape: {dummy_output.shape}")

## 3. Entra√Ænement

In [None]:
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    
    return total_loss / len(loader), correct / total

def validate_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validation", leave=False):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    
    return total_loss / len(loader), correct / total

In [None]:
# Hyperparam√®tres
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

epochs = 20
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print("\nD√©but de l'entra√Ænement LeNet-5 sur CIFAR-10...")
for epoch in range(epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate_epoch(model, test_loader, criterion, device)
    
    scheduler.step(val_loss)
    
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

print("\nEntra√Ænement termin√©!")

## 4. Visualisation des courbes d'apprentissage

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(history['train_loss'], label='Train Loss', marker='o')
axes[0].plot(history['val_loss'], label='Val Loss', marker='s')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Loss pendant l\'entra√Ænement')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(history['train_acc'], label='Train Accuracy', marker='o')
axes[1].plot(history['val_acc'], label='Val Accuracy', marker='s')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Accuracy pendant l\'entra√Ænement')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

print(f"\nAccuracy finale: {history['val_acc'][-1]:.4f}")

## 5. √âvaluation et matrice de confusion

In [None]:
# Pr√©dictions sur test set
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())

# Rapport de classification
print("\nRapport de classification:")
print(classification_report(all_labels, all_preds, target_names=classes, digits=4))

# Matrice de confusion
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.title('Matrice de Confusion - CIFAR-10')
plt.xlabel('Pr√©diction')
plt.ylabel('V√©rit√©')
plt.show()

## 6. Visualisation des filtres (Conv1)

In [None]:
# Extraction des poids de Conv1
conv1_weights = model.conv1.weight.data.cpu().numpy()  # Shape: (6, 3, 5, 5)

fig, axes = plt.subplots(2, 3, figsize=(12, 8))
for i, ax in enumerate(axes.flat):
    # Visualiser les 3 canaux RGB du filtre i
    filter_rgb = conv1_weights[i].transpose(1, 2, 0)  # (5, 5, 3)
    
    # Normalisation pour affichage
    filter_rgb = (filter_rgb - filter_rgb.min()) / (filter_rgb.max() - filter_rgb.min())
    
    ax.imshow(filter_rgb)
    ax.set_title(f"Filtre Conv1 #{i+1}")
    ax.axis('off')

plt.suptitle('Filtres appris par Conv1 (6 filtres 5x5 RGB)', fontsize=14)
plt.tight_layout()
plt.show()

## 7. Visualisation des feature maps

In [None]:
# Hook pour capturer les activations
activations = {}

def get_activation(name):
    def hook(model, input, output):
        activations[name] = output.detach()
    return hook

# Enregistrer hooks
model.conv1.register_forward_hook(get_activation('conv1'))
model.conv2.register_forward_hook(get_activation('conv2'))

# Forward pass sur une image
dataiter = iter(test_loader)
images, labels = next(dataiter)
sample_image = images[0:1].to(device)
sample_label = labels[0]

with torch.no_grad():
    output = model(sample_image)
    _, predicted = torch.max(output, 1)

# Visualisation
fig = plt.figure(figsize=(18, 8))

# Image originale
ax1 = plt.subplot(2, 7, 1)
img = sample_image[0].cpu().permute(1, 2, 0) / 2 + 0.5
ax1.imshow(img)
ax1.set_title(f"Original\n{classes[sample_label]}")
ax1.axis('off')

# Feature maps Conv1 (6 feature maps)
conv1_features = activations['conv1'][0].cpu().numpy()  # (6, 28, 28)
for i in range(6):
    ax = plt.subplot(2, 7, i + 2)
    ax.imshow(conv1_features[i], cmap='viridis')
    ax.set_title(f"Conv1 FM{i+1}")
    ax.axis('off')

# Feature maps Conv2 (8 des 16 feature maps)
conv2_features = activations['conv2'][0].cpu().numpy()  # (16, 10, 10)
for i in range(7):
    ax = plt.subplot(2, 7, i + 8)
    ax.imshow(conv2_features[i], cmap='viridis')
    ax.set_title(f"Conv2 FM{i+1}")
    ax.axis('off')

plt.suptitle(f"Feature Maps pour: {classes[sample_label]} (Pr√©dit: {classes[predicted[0]]})", fontsize=14)
plt.tight_layout()
plt.show()

## 8. Comparaison CNN vs MLP

In [None]:
# MLP simple pour comparaison
class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(32*32*3, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = x.view(-1, 32*32*3)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

mlp = SimpleMLP().to(device)
optimizer_mlp = optim.Adam(mlp.parameters(), lr=0.001)

print(f"\nLeNet-5 param√®tres: {sum(p.numel() for p in model.parameters()):,}")
print(f"MLP param√®tres: {sum(p.numel() for p in mlp.parameters()):,}")

# Entra√Æner MLP (5 epochs seulement pour comparaison)
print("\nEntra√Ænement MLP (5 epochs)...")
mlp_history = {'train_acc': [], 'val_acc': []}

for epoch in range(5):
    train_loss, train_acc = train_epoch(mlp, train_loader, criterion, optimizer_mlp, device)
    val_loss, val_acc = validate_epoch(mlp, test_loader, criterion, device)
    
    mlp_history['train_acc'].append(train_acc)
    mlp_history['val_acc'].append(val_acc)
    
    print(f"Epoch {epoch+1}/5 | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

# Comparaison
print("\n--- Comparaison CNN vs MLP ---")
print(f"LeNet-5 (20 epochs) - Val Acc: {history['val_acc'][-1]:.4f}")
print(f"MLP (5 epochs) - Val Acc: {mlp_history['val_acc'][-1]:.4f}")
print("\nCNN capture la structure spatiale des images -> Meilleure performance!")

## Conclusion

**Points cl√©s** :
1. **LeNet-5** : Architecture pionni√®re des CNN (LeCun, 1998)
2. **Convolution** : Extrait des features locales (contours, textures)
3. **Pooling** : R√©duit la dimensionnalit√©, invariance spatiale
4. **Feature maps** : Visualisation des activations montre ce que le r√©seau "voit"
5. **Data Augmentation** : Am√©liore g√©n√©ralisation (flip, crop, rotation)

**R√©sultats CIFAR-10** :
- LeNet-5 : ~65-70% accuracy (20 epochs)
- MLP : ~45-50% accuracy (5 epochs)
- CNN > MLP car capture structure spatiale

**Limitations LeNet-5** :
- Architecture peu profonde (2 couches conv)
- Peu de filtres (6 et 16)
- Pas de BatchNorm, Dropout, Skip Connections

**Prochaines √©tapes** :
- Architectures modernes : **VGG**, **ResNet**, **EfficientNet**
- **Transfer Learning** : Pr√©-entra√Ænement sur ImageNet
- **Data Augmentation avanc√©e** : Cutout, Mixup, AutoAugment