# üöÄ Google Colab Setup

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ogautier1980/sandbox-ml/blob/main/cours/06_reseaux_neurones_fondamentaux/06_demo_mlp_pytorch.ipynb)

**Si vous ex√©cutez ce notebook sur Google Colab**, ex√©cutez la cellule suivante pour installer les d√©pendances.

In [None]:
# Installation des d√©pendances (Google Colab uniquement)import sysIN_COLAB = 'google.colab' in sys.modulesif IN_COLAB:    print('üì¶ Installation des packages...')        # Packages ML de base    !pip install -q numpy pandas matplotlib seaborn scikit-learn        # D√©tection du chapitre et installation des d√©pendances sp√©cifiques    notebook_name = '06_demo_mlp_pytorch.ipynb'  # Sera remplac√© automatiquement        # Ch 06-08 : Deep Learning    if any(x in notebook_name for x in ['06_', '07_', '08_']):        !pip install -q torch torchvision torchaudio        # Ch 08 : NLP    if '08_' in notebook_name:        !pip install -q transformers datasets tokenizers        if 'rag' in notebook_name:            !pip install -q sentence-transformers faiss-cpu rank-bm25        # Ch 09 : Reinforcement Learning    if '09_' in notebook_name:        !pip install -q gymnasium[classic-control]        # Ch 04 : Boosting    if '04_' in notebook_name and 'boosting' in notebook_name:        !pip install -q xgboost lightgbm catboost        # Ch 05 : Clustering avanc√©    if '05_' in notebook_name:        !pip install -q umap-learn        # Ch 11 : S√©ries temporelles    if '11_' in notebook_name:        !pip install -q statsmodels prophet        # Ch 12 : Vision avanc√©e    if '12_' in notebook_name:        !pip install -q ultralytics timm segmentation-models-pytorch        # Ch 13 : Recommandation    if '13_' in notebook_name:        !pip install -q scikit-surprise implicit        # Ch 14 : MLOps    if '14_' in notebook_name:        !pip install -q mlflow fastapi pydantic        print('‚úÖ Installation termin√©e !')else:    print('‚ÑπÔ∏è  Environnement local d√©tect√©, les packages sont d√©j√† install√©s.')

# Chapitre 06 - D√©monstration : MLP avec PyTorch

**Objectif** : Impl√©menter un MLP avec PyTorch en utilisant les outils modernes (DataLoader, Optimizer, GPU).

**Contenu** :
1. Architecture avec `nn.Module`
2. DataLoader et augmentation
3. Optimizers (Adam, SGD)
4. Dropout et Batch Normalization
5. Early stopping et checkpointing
6. TensorBoard pour visualisation

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
from tqdm import tqdm

# Device (GPU si disponible)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

# Seed pour reproductibilit√©
torch.manual_seed(42)
np.random.seed(42)

## 1. Chargement et pr√©paration des donn√©es (MNIST)

In [None]:
# Chargement MNIST
print("Chargement MNIST...")
mnist = fetch_openml('mnist_784', version=1, parser='auto')
X = mnist.data.astype('float32').values / 255.0  # Normalisation
y = mnist.target.astype('int').values

# Sous-√©chantillon (20% pour vitesse)
X_small = X[:14000]
y_small = y[:14000]

# Split train/val/test
X_temp, X_test, y_temp, y_test = train_test_split(X_small, y_small, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

print(f"Train: {X_train.shape}")
print(f"Val: {X_val.shape}")
print(f"Test: {X_test.shape}")

In [None]:
# Conversion en tensors PyTorch
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train)

X_val_tensor = torch.FloatTensor(X_val)
y_val_tensor = torch.LongTensor(y_val)

X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f"Nombre de batches train: {len(train_loader)}")

## 2. Architecture MLP avec PyTorch

In [None]:
class MLP(nn.Module):
    """Multi-Layer Perceptron avec Dropout et Batch Normalization."""
    
    def __init__(self, input_size=784, hidden_sizes=[256, 128], num_classes=10, dropout=0.3):
        super(MLP, self).__init__()
        
        layers = []
        prev_size = input_size
        
        for hidden_size in hidden_sizes:
            # Linear -> BatchNorm -> ReLU -> Dropout
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            prev_size = hidden_size
        
        # Couche de sortie
        layers.append(nn.Linear(prev_size, num_classes))
        
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)

# Instanciation
model = MLP(input_size=784, hidden_sizes=[256, 128, 64], num_classes=10, dropout=0.3).to(device)

# R√©sum√© du mod√®le
print(model)
print(f"\nNombre de param√®tres: {sum(p.numel() for p in model.parameters()):,}")

## 3. Fonction d'entra√Ænement et validation

In [None]:
def train_epoch(model, loader, criterion, optimizer, device):
    """Entra√Æne le mod√®le sur une epoch."""
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        # Forward
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward
        loss.backward()
        optimizer.step()
        
        # M√©triques
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)
    
    return total_loss / len(loader), correct / total

def validate_epoch(model, loader, criterion, device):
    """√âvalue le mod√®le sur le set de validation."""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)
    
    return total_loss / len(loader), correct / total

print("Fonctions d'entra√Ænement d√©finies.")

## 4. Entra√Ænement avec Early Stopping

In [None]:
# Hyperparam√®tres
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

# TensorBoard
writer = SummaryWriter('runs/mlp_mnist')

# Early stopping
best_val_loss = float('inf')
patience = 10
patience_counter = 0
epochs = 50

history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print("\nD√©but de l'entra√Ænement...")
for epoch in range(epochs):
    # Entra√Ænement
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validation
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
    
    # Learning rate scheduler
    scheduler.step(val_loss)
    
    # Historique
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # TensorBoard
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('Loss/val', val_loss, epoch)
    writer.add_scalar('Accuracy/train', train_acc, epoch)
    writer.add_scalar('Accuracy/val', val_acc, epoch)
    writer.add_scalar('Learning Rate', optimizer.param_groups[0]['lr'], epoch)
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        # Sauvegarde du meilleur mod√®le
        torch.save(model.state_dict(), 'best_mlp_model.pth')
    else:
        patience_counter += 1
    
    # Affichage
    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | LR: {optimizer.param_groups[0]['lr']:.6f}")
    
    # Stop si patience d√©pass√©e
    if patience_counter >= patience:
        print(f"\nEarly stopping d√©clench√© √† l'epoch {epoch+1}")
        break

writer.close()
print("\nEntra√Ænement termin√©!")

## 5. Visualisation des courbes d'apprentissage

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss
axes[0].plot(history['train_loss'], label='Train Loss', marker='o')
axes[0].plot(history['val_loss'], label='Val Loss', marker='s')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Loss pendant l\'entra√Ænement')
axes[0].legend()
axes[0].grid(True)

# Accuracy
axes[1].plot(history['train_acc'], label='Train Accuracy', marker='o')
axes[1].plot(history['val_acc'], label='Val Accuracy', marker='s')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Accuracy pendant l\'entra√Ænement')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## 6. √âvaluation sur le test set

In [None]:
# Chargement du meilleur mod√®le
model.load_state_dict(torch.load('best_mlp_model.pth'))
model.eval()

# Pr√©dictions sur test set
all_preds = []
all_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(y_batch.numpy())

test_acc = accuracy_score(all_labels, all_preds)
print(f"\nAccuracy test: {test_acc:.4f}")

# Rapport de classification
print("\nRapport de classification:")
print(classification_report(all_labels, all_preds, digits=4))

In [None]:
# Matrice de confusion
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(10), yticklabels=range(10))
plt.title('Matrice de Confusion - MNIST')
plt.xlabel('Pr√©diction')
plt.ylabel('V√©rit√©')
plt.show()

## 7. Visualisation des pr√©dictions

In [None]:
# Pr√©dictions sur 20 √©chantillons al√©atoires
n_samples = 20
indices = np.random.choice(len(X_test), n_samples, replace=False)

fig, axes = plt.subplots(4, 5, figsize=(15, 12))
for i, ax in enumerate(axes.flat):
    idx = indices[i]
    image = X_test[idx].reshape(28, 28)
    true_label = y_test[idx]
    
    # Pr√©diction
    with torch.no_grad():
        X_sample = torch.FloatTensor(X_test[idx:idx+1]).to(device)
        output = model(X_sample)
        probs = torch.softmax(output, dim=1).cpu().numpy()[0]
        pred_label = np.argmax(probs)
        confidence = probs[pred_label]
    
    # Affichage
    ax.imshow(image, cmap='gray')
    color = 'green' if pred_label == true_label else 'red'
    ax.set_title(f"True: {true_label} | Pred: {pred_label}\nConf: {confidence:.2f}", color=color)
    ax.axis('off')

plt.tight_layout()
plt.show()

## 8. Analyse des erreurs

In [None]:
# Trouver les erreurs de classification
errors_idx = [i for i, (true, pred) in enumerate(zip(all_labels, all_preds)) if true != pred]
print(f"Nombre d'erreurs: {len(errors_idx)} / {len(all_labels)} ({len(errors_idx)/len(all_labels)*100:.2f}%)")

# Visualiser 10 erreurs
n_errors = min(10, len(errors_idx))
error_samples = np.random.choice(errors_idx, n_errors, replace=False)

fig, axes = plt.subplots(2, 5, figsize=(15, 6))
for i, ax in enumerate(axes.flat):
    if i < n_errors:
        idx = error_samples[i]
        image = X_test[idx].reshape(28, 28)
        true_label = all_labels[idx]
        pred_label = all_preds[idx]
        
        # Probabilit√©s
        with torch.no_grad():
            X_sample = torch.FloatTensor(X_test[idx:idx+1]).to(device)
            output = model(X_sample)
            probs = torch.softmax(output, dim=1).cpu().numpy()[0]
        
        ax.imshow(image, cmap='gray')
        ax.set_title(f"True: {true_label} | Pred: {pred_label}\nConf: {probs[pred_label]:.2f}", color='red')
        ax.axis('off')
    else:
        ax.axis('off')

plt.suptitle('Erreurs de classification', fontsize=16, color='red')
plt.tight_layout()
plt.show()

## 9. Analyse des activations (feature maps)

In [None]:
# Hook pour capturer les activations
activations = {}

def get_activation(name):
    def hook(model, input, output):
        activations[name] = output.detach()
    return hook

# Enregistrer hooks sur les couches cach√©es
model.network[0].register_forward_hook(get_activation('layer1'))
model.network[4].register_forward_hook(get_activation('layer2'))

# Forward pass sur un √©chantillon
sample_idx = 0
with torch.no_grad():
    X_sample = torch.FloatTensor(X_test[sample_idx:sample_idx+1]).to(device)
    output = model(X_sample)

# Visualisation des activations
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Image originale
axes[0].imshow(X_test[sample_idx].reshape(28, 28), cmap='gray')
axes[0].set_title(f"Image originale (Label: {y_test[sample_idx]})")
axes[0].axis('off')

# Activations Layer 1 (256 neurones)
act1 = activations['layer1'].cpu().numpy()[0]
axes[1].bar(range(len(act1[:50])), act1[:50])  # Afficher 50 premiers neurones
axes[1].set_title('Activations Layer 1 (50 premiers)')
axes[1].set_xlabel('Neurone')
axes[1].set_ylabel('Activation')

# Activations Layer 2 (128 neurones)
act2 = activations['layer2'].cpu().numpy()[0]
axes[2].bar(range(len(act2[:50])), act2[:50])
axes[2].set_title('Activations Layer 2 (50 premiers)')
axes[2].set_xlabel('Neurone')
axes[2].set_ylabel('Activation')

plt.tight_layout()
plt.show()

## Conclusion

**Avantages PyTorch vs NumPy** :
1. **Automatisation** : `autograd` calcule automatiquement les gradients
2. **GPU** : Acc√©l√©ration mat√©rielle transparente
3. **DataLoader** : Gestion efficace des mini-batches
4. **Optimizers** : Adam, SGD, RMSprop impl√©ment√©s
5. **R√©gularisation** : Dropout, BatchNorm int√©gr√©s
6. **Checkpointing** : Sauvegarde/chargement facile

**R√©sultats MNIST** :
- Accuracy ~97-98% avec architecture (784-256-128-64-10)
- Early stopping √©vite l'overfitting
- Learning rate scheduler am√©liore la convergence
- BatchNorm + Dropout = stabilit√© + g√©n√©ralisation

**Prochaines √©tapes** :
- Tester d'autres architectures (plus profondes)
- Utiliser **Weight Decay** (L2 regularization)
- Impl√©menter **Data Augmentation**
- Passer aux **Convolutional Neural Networks (CNN)**