In [31]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Fonction pour charger les images MNIST
def load_mnist_images(filename):
    with open(filename, 'rb') as f:
        _ = int.from_bytes(f.read(4), byteorder='big')  # Magic number
        num_images = int.from_bytes(f.read(4), byteorder='big')  # Nombre d'images
        rows = int.from_bytes(f.read(4), byteorder='big')  # Nombre de lignes
        cols = int.from_bytes(f.read(4), byteorder='big')  # Nombre de colonnes
        images = np.frombuffer(f.read(), dtype=np.uint8).reshape(num_images, rows, cols)
        images = images.astype(np.float32)  # Conversion des images en float32
        images = images / 255.0  # Normalisation entre 0 et 1
        return images

# Fonction pour charger les labels MNIST
def load_mnist_labels(filename):
    with open(filename, 'rb') as f:
        _ = int.from_bytes(f.read(4), byteorder='big')  # Magic number
        num_labels = int.from_bytes(f.read(4), byteorder='big')  # Nombre de labels
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels

# Chemins vers les fichiers MNIST (remplacer par vos chemins locaux)
base_path = r"C:\Users\DeLL\Downloads\archive"  # Remplacez par votre propre chemin

train_images_path = os.path.join(base_path, "train-images.idx3-ubyte")
train_labels_path = os.path.join(base_path, "train-labels.idx1-ubyte")
test_images_path = os.path.join(base_path, "t10k-images.idx3-ubyte")
test_labels_path = os.path.join(base_path, "t10k-labels.idx1-ubyte")

# Charger les images et labels
train_images = load_mnist_images(train_images_path)
train_labels = load_mnist_labels(train_labels_path)
test_images = load_mnist_images(test_images_path)
test_labels = load_mnist_labels(test_labels_path)

# Convertir en tensors PyTorch
train_images_tensor = torch.tensor(train_images).unsqueeze(1)  # Ajouter une dimension pour le canal (grayscale)
train_labels_tensor = torch.tensor(train_labels)
test_images_tensor = torch.tensor(test_images).unsqueeze(1)  # Ajouter une dimension pour le canal (grayscale)
test_labels_tensor = torch.tensor(test_labels)

# Créer des datasets PyTorch
train_dataset = TensorDataset(train_images_tensor, train_labels_tensor)
test_dataset = TensorDataset(test_images_tensor, test_labels_tensor)

# Créer les DataLoader pour l'entraînement et les tests
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Vérification des formes des tensors
print(f"Train images shape: {train_images_tensor.shape}")
print(f"Train labels shape: {train_labels_tensor.shape}")
print(f"Test images shape: {test_images_tensor.shape}")
print(f"Test labels shape: {test_labels_tensor.shape}")

# Définir le modèle CNN
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Définir les couches convolutionnelles
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)  # Sortie: 32x28x28
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Réduit de moitié les dimensions spatiales
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)  # Sortie: 64x14x14
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)  # Sortie: 128x7x7
        
        # Définir les couches fully connected
        self.fc1 = None  # Nous initialiserons cette couche plus tard après le calcul de la taille

    def forward(self, x):
        # Passer par les couches convolutionnelles + pooling
        x = self.pool(torch.relu(self.conv1(x)))  # 32x28x28 -> 32x14x14
        print(f"After conv1 and pool: {x.shape}")  # Afficher la forme après conv1 et pooling
        x = self.pool(torch.relu(self.conv2(x)))  # 64x14x14 -> 64x7x7
        print(f"After conv2 and pool: {x.shape}")  # Afficher la forme après conv2 et pooling
        x = torch.relu(self.conv3(x))  # 128x7x7
        print(f"After conv3: {x.shape}")  # Afficher la forme après conv3
        
        # Calculer dynamiquement la taille de la couche fully connected
        x = x.view(x.size(0), -1)  # Aplatir les données
        print(f"After flattening: {x.shape}")  # Afficher la forme après aplatissement
        
        if self.fc1 is None:
            # Initialiser la couche fully connected après avoir calculé la taille
            self.fc1 = nn.Linear(x.size(1), 512)  # La taille de l'entrée dépend de la forme de x après l'aplatissement
            self.fc2 = nn.Linear(512, 10)  # 10 classes pour MNIST

        # Passer à travers les couches fully connected
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialiser le modèle
model = CNNModel()

# Définir la fonction de perte et l'optimiseur
criterion = nn.CrossEntropyLoss()  # Classification multi-classes
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Vérifier si un GPU est disponible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Fonction d'entraînement
def train_model(model, train_loader, criterion, optimizer, num_epochs=5):
    model.to(device)  # Déplacer le modèle sur le GPU si disponible
    model.train()  # Mettre le modèle en mode entraînement
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Déplacer les données vers le GPU
            
            # Zéro les gradients des paramètres
            optimizer.zero_grad()
            
            # Passer les données dans le modèle
            outputs = model(inputs)
            
            # Calculer la perte
            loss = criterion(outputs, labels)
            
            # Calculer les gradients
            loss.backward()
            
            # Mettre à jour les poids
            optimizer.step()
            
            running_loss += loss.item()
            
            # Calculer les prédictions
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        # Afficher les statistiques à chaque époque
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# Entraîner le modèle
train_model(model, train_loader, criterion, optimizer, num_epochs=5)


Train images shape: torch.Size([60000, 1, 28, 28])
Train labels shape: torch.Size([60000])
Test images shape: torch.Size([10000, 1, 28, 28])
Test labels shape: torch.Size([10000])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64,

In [32]:
import torch
import torchvision
from torchvision.transforms import Compose, ToTensor, Normalize
from torch.utils.data import DataLoader
from torch import nn
from torch.optim import Adam

# Hyperparamètres
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.001
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Préparer le dataset MNIST
transform = Compose([
    ToTensor(),
    Normalize((0.5,), (0.5,))  # Normalisation pour le MNIST
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Définition du modèle RCNN pour classification
class RCNNClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(RCNNClassifier, self).__init__()
        
        # Backbone convolutionnel (feature extractor)
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),  # Sortie: 32x28x28
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Sortie: 32x14x14
            
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # Sortie: 64x14x14
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Sortie: 64x7x7
        )
        
        # Region Proposal Network (simplifié pour classification uniquement)
        self.rpn = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),  # Sortie: 128x7x7
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # Sortie: 128x3x3
        )
        
        # Fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 3 * 3, 256),
            nn.ReLU(),
            nn.Dropout(0.5),  # Régularisation
            nn.Linear(256, num_classes)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)  # Extraction de caractéristiques
        x = self.rpn(x)          # RPN (simplifié ici pour extraire plus de caractéristiques)
        x = self.fc_layers(x)    # Classification finale
        return x

# Initialisation du modèle
model = RCNNClassifier(num_classes=10).to(DEVICE)

# Optimiseur et fonction de perte
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

# Fonction d'entraînement
def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            
            # Zéro du gradient
            optimizer.zero_grad()
            
            # Passe avant
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Rétropropagation
            loss.backward()
            optimizer.step()
            
            # Suivi de la perte
            total_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}")

# Fonction d'évaluation
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f"Accuracy: {accuracy:.2f}%")

# Entraîner et évaluer
train_model(model, train_loader, criterion, optimizer, EPOCHS)
evaluate_model(model, test_loader)


Epoch [1/10], Loss: 0.1948
Epoch [2/10], Loss: 0.0577
Epoch [3/10], Loss: 0.0401
Epoch [4/10], Loss: 0.0333
Epoch [5/10], Loss: 0.0270
Epoch [6/10], Loss: 0.0243
Epoch [7/10], Loss: 0.0199
Epoch [8/10], Loss: 0.0189
Epoch [9/10], Loss: 0.0150
Epoch [10/10], Loss: 0.0153
Accuracy: 99.29%


In [33]:
import time
import torch
from sklearn.metrics import f1_score

# Fonction pour évaluer un modèle
def evaluate_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())
    
    accuracy = 100 * correct / total
    f1 = f1_score(all_labels, all_preds, average='weighted')  # F1 score pondéré
    return accuracy, f1

# Fonction pour entraîner un modèle et mesurer le temps d'entraînement
def train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, epochs, device):
    start_time = time.time()  # Mesurer le temps de début de l'entraînement
    
    model.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Zéro du gradient
            optimizer.zero_grad()
            
            # Passe avant
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Rétropropagation
            loss.backward()
            optimizer.step()
            
            # Suivi de la perte
            total_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}")
    
    # Calculer le temps d'entraînement total
    training_time = time.time() - start_time
    print(f"Training Time: {training_time:.2f} seconds")
    
    # Évaluation sur le jeu de test
    accuracy, f1 = evaluate_model(model, test_loader, device)
    print(f"Test Accuracy: {accuracy:.2f}%")
    print(f"F1 Score: {f1:.4f}")
    
    return accuracy, f1, total_loss / len(train_loader), training_time

# Charger les données MNIST
transform = Compose([ToTensor(), Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Vérifier si un GPU est disponible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Comparaison des modèles

# Model 1: CNNModel
model1 = CNNModel().to(device)
optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
criterion1 = nn.CrossEntropyLoss()

print("\nTraining Model 1 (CNNModel)...")
accuracy1, f1_1, loss1, training_time1 = train_and_evaluate(model1, train_loader, test_loader, criterion1, optimizer1, epochs=5, device=device)

# Model 2: RCNNClassifier
model2 = RCNNClassifier(num_classes=10).to(device)
optimizer2 = Adam(model2.parameters(), lr=0.001)
criterion2 = nn.CrossEntropyLoss()

print("\nTraining Model 2 (RCNNClassifier)...")
accuracy2, f1_2, loss2, training_time2 = train_and_evaluate(model2, train_loader, test_loader, criterion2, optimizer2, epochs=5, device=device)

# Comparer les résultats des deux modèles
print("\nComparison Results:")
print(f"Model 1 (CNNModel) - Accuracy: {accuracy1:.2f}%, F1 Score: {f1_1:.4f}, Loss: {loss1:.4f}, Training Time: {training_time1:.2f} seconds")
print(f"Model 2 (RCNNClassifier) - Accuracy: {accuracy2:.2f}%, F1 Score: {f1_2:.4f}, Loss: {loss2:.4f}, Training Time: {training_time2:.2f} seconds")

# Choisir le modèle le plus performant (en fonction de l'accuracy ou F1 score)
best_model = "Model 1 (CNNModel)" if accuracy1 > accuracy2 else "Model 2 (RCNNClassifier)"
print(f"\nBest Model: {best_model}")



Training Model 1 (CNNModel)...
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After conv2 and pool: torch.Size([64, 64, 7, 7])
After conv3: torch.Size([64, 128, 7, 7])
After flattening: torch.Size([64, 6272])
After conv1 and pool: torch.Size([64, 32, 14, 14])
After c