<a href="https://colab.research.google.com/github/meedastitou/CNN_MNIST/blob/main/faster%20R-CMN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install idx2numpy



In [2]:
import torch
# import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import idx2numpy


In [3]:

# Charger les données IDX
train_images_path = "/content/drive/MyDrive/Colab Notebooks/train-images.idx3-ubyte"
train_labels_path = "/content/drive/MyDrive/Colab Notebooks/train-labels.idx1-ubyte"
test_images_path = "/content/drive/MyDrive/Colab Notebooks/t10k-images.idx3-ubyte"
test_labels_path = "/content/drive/MyDrive/Colab Notebooks/t10k-labels.idx1-ubyte"

In [4]:
train_images = idx2numpy.convert_from_file(train_images_path)[:500]
train_labels = idx2numpy.convert_from_file(train_labels_path)[:500]
test_images = idx2numpy.convert_from_file(test_images_path)[:100]
test_labels = idx2numpy.convert_from_file(test_labels_path)[:100]

In [5]:
# Normaliser les images pour les adapter à Faster R-CNN
def normalize_images(images):
    return images / 255.0

In [6]:
train_images = normalize_images(train_images)
test_images = normalize_images(test_images)

In [7]:
# Définir un Dataset pour MNIST avec bounding boxes fictives
class MNISTFasterRCNNDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # Simuler une bounding box autour du chiffre
        boxes = torch.tensor([[5, 5, 23, 23]], dtype=torch.float32)  # Ex.: box centrale pour 28x28
        labels = torch.tensor([label], dtype=torch.int64)

        # Convertir en format compatible avec Faster R-CNN
        target = {"boxes": boxes, "labels": labels}

        # Convertir l'image au format tensor
        image = torch.tensor(image, dtype=torch.float32).unsqueeze(0)  # Ajouter canal

        return image, target


In [8]:
# Préparer les datasets et loaders
train_dataset = MNISTFasterRCNNDataset(train_images, train_labels)
test_dataset = MNISTFasterRCNNDataset(test_images, test_labels)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [9]:
# Charger un modèle Faster R-CNN pré-entraîné
model = fasterrcnn_resnet50_fpn(pretrained=True)



In [10]:
# Ajuster la tête du réseau pour MNIST (10 classes)
num_classes = 10  # MNIST contient 10 classes (chiffres 0-9)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [11]:
# Définir l'optimiseur
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

# Déplacer le modèle sur GPU si disponible
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [12]:
import tqdm

In [18]:
# Fonction d'entraînement
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    epoch_loss = 0
    for images, targets in data_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()
        print(f"loss : {losses.item()}, for epoch {epoch}")
    print(f"Epoch {epoch}, Loss: {epoch_loss / len(data_loader)}")

In [None]:
# Entraînement
num_epochs = 2
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)

# Sauvegarder le modèle
# torch.save(model.state_dict(), "faster_rcnn_mnist.pth")


In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
def evaluate_model(model, test_loader):

    # Évaluation
    model.eval()
    predicted_labels = []
    true_labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images = [img.to(device) for img in images]
            outputs = model(images)

            for output, target in zip(outputs, targets):
                # Prendre le label prédit avec la probabilité la plus élevée
                if len(output["labels"]) > 0:  # Vérifier qu'une prédiction existe
                    predicted_labels.append(output["labels"][0].item())
                else:
                    predicted_labels.append(-1)  # Aucun objet détecté
                true_labels.append(target["labels"].item())

    # Supprimer les exemples où aucun objet n'est détecté
    valid_indices = [i for i, label in enumerate(predicted_labels) if label != -1]
    predicted_labels = [predicted_labels[i] for i in valid_indices]
    true_labels = [true_labels[i] for i in valid_indices]

    # Calculer Accuracy et F1 Score
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels, average="weighted")

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    return accuracy, f1

In [None]:
accuracy, f1 = evaluate_model(model, test_loader)