## Imports + chemins WAID

In [1]:
import os
import glob
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import DetrForObjectDetection, DetrImageProcessor
from tqdm import tqdm

# --- Base robuste (si notebook lanc√© depuis /notebooks) ---
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))

# --- WAID (YOLO format) ---
WAID_ROOT = os.path.join(PROJECT_ROOT, "data", "external", "WAID", "WAID")
IMG_TRAIN = os.path.join(WAID_ROOT, "images", "train")
LBL_TRAIN = os.path.join(WAID_ROOT, "labels", "train")
IMG_VAL   = os.path.join(WAID_ROOT, "images", "valid")
LBL_VAL   = os.path.join(WAID_ROOT, "labels", "valid")

# --- Classes (dans ton repo) ---
CLASSES_PATH = os.path.join(PROJECT_ROOT, "data", "classes.txt")

print("PROJECT_ROOT =", PROJECT_ROOT)
print("WAID_ROOT    =", WAID_ROOT)
print("CLASSES_PATH =", CLASSES_PATH)
print("Exists classes.txt ?", os.path.exists(CLASSES_PATH))

with open(CLASSES_PATH, "r", encoding="utf-8") as f:
    class_names = [line.strip() for line in f if line.strip()]

NUM_CLASSES = len(class_names)
print("Classes:", NUM_CLASSES, class_names)

print("Train images:", len(glob.glob(os.path.join(IMG_TRAIN, "*"))))
print("Val images:",   len(glob.glob(os.path.join(IMG_VAL, "*"))))


  from .autonotebook import tqdm as notebook_tqdm


PROJECT_ROOT = c:\Users\paola\OneDrive\Documents\ESILV\S9\CV and Deep Learning\detection-and-identification-of-wildlife-populations-from-drone-images
WAID_ROOT    = c:\Users\paola\OneDrive\Documents\ESILV\S9\CV and Deep Learning\detection-and-identification-of-wildlife-populations-from-drone-images\data\external\WAID\WAID
CLASSES_PATH = c:\Users\paola\OneDrive\Documents\ESILV\S9\CV and Deep Learning\detection-and-identification-of-wildlife-populations-from-drone-images\data\classes.txt
Exists classes.txt ? True
Classes: 6 ['sheep', 'cattle', 'seal', 'kiang', 'camelus', 'zebra']
Train images: 10056
Val images: 2873


## Fonction YOLO txt ‚Üí boxes XYXY (pixels)

In [2]:
def yolo_to_xyxy_pixels(label_path, img_w, img_h):
    """
    Convertit un fichier .txt YOLO -> liste de boxes en pixels (xyxy) + class ids.
    Format YOLO: class x_center y_center w h (normalis√©s 0..1)
    """
    class_ids = []
    boxes = []

    if not os.path.exists(label_path):
        return class_ids, np.zeros((0, 4), dtype=np.float32)

    with open(label_path, "r", encoding="utf-8") as f:
        lines = [l.strip() for l in f if l.strip()]

    for line in lines:
        parts = line.split()
        if len(parts) != 5:
            continue

        cid, xc, yc, bw, bh = parts
        cid = int(cid)
        xc, yc, bw, bh = map(float, (xc, yc, bw, bh))

        x_center = xc * img_w
        y_center = yc * img_h
        box_w = bw * img_w
        box_h = bh * img_h

        x_min = x_center - box_w / 2
        y_min = y_center - box_h / 2
        x_max = x_center + box_w / 2
        y_max = y_center + box_h / 2

        # clamp
        x_min = max(0, min(img_w - 1, x_min))
        y_min = max(0, min(img_h - 1, y_min))
        x_max = max(0, min(img_w - 1, x_max))
        y_max = max(0, min(img_h - 1, y_max))

        if x_max <= x_min or y_max <= y_min:
            continue

        class_ids.append(cid)
        boxes.append([x_min, y_min, x_max, y_max])

    if len(boxes) == 0:
        return class_ids, np.zeros((0, 4), dtype=np.float32)

    return class_ids, np.array(boxes, dtype=np.float32)


## Dataset PyTorch (WAID YOLO) pour DETR

In [3]:
class WAIDYoloDataset(Dataset):
    """
    Dataset WAID au format YOLO (images + labels .txt).
    Conversion √† la vol√©e vers annotations COCO-like pour DETR.
    """
    def __init__(self, img_dir, lbl_dir, max_items=None):
        self.img_dir = img_dir
        self.lbl_dir = lbl_dir

        self.img_paths = sorted([
            p for p in glob.glob(os.path.join(img_dir, "*"))
            if p.lower().endswith((".jpg", ".jpeg", ".png"))
        ])

        # üî• LIMITATION DU DATASET (pour training rapide)
        if max_items is not None:
            self.img_paths = self.img_paths[:max_items]

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        base = os.path.splitext(os.path.basename(img_path))[0]
        label_path = os.path.join(self.lbl_dir, base + ".txt")

        img_bgr = cv2.imread(img_path)
        if img_bgr is None:
            raise ValueError(f"Cannot read image: {img_path}")

        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        h, w = img_rgb.shape[:2]

        class_ids, boxes_xyxy = yolo_to_xyxy_pixels(label_path, w, h)

        annotations = []
        for cid, (x1, y1, x2, y2) in zip(class_ids, boxes_xyxy):
            annotations.append({
                "category_id": int(cid),
                "bbox": [float(x1), float(y1), float(x2 - x1), float(y2 - y1)],
                "area": float((x2 - x1) * (y2 - y1)),
                "iscrowd": 0
            })

        target = {"image_id": idx, "annotations": annotations}
        return img_rgb, target


In [4]:
"""
class WAIDYoloDataset(Dataset):
    
    # Dataset WAID au format YOLO (images + labels .txt).
    # On convertit √† la vol√©e en annotations COCO-like attendues par DetrImageProcessor.
    
    def __init__(self, img_dir, lbl_dir):
        self.img_dir = img_dir
        self.lbl_dir = lbl_dir
        self.img_paths = sorted([
            p for p in glob.glob(os.path.join(img_dir, "*"))
            if p.lower().endswith((".jpg", ".jpeg", ".png"))
        ])

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        base = os.path.splitext(os.path.basename(img_path))[0]
        label_path = os.path.join(self.lbl_dir, base + ".txt")

        img_bgr = cv2.imread(img_path)
        if img_bgr is None:
            raise ValueError(f"Cannot read image: {img_path}")

        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        h, w = img_rgb.shape[:2]

        class_ids, boxes_xyxy = yolo_to_xyxy_pixels(label_path, w, h)

        # DETR/processor attend un dict COCO-like:
        # {"image_id": ..., "annotations": [{"category_id":..., "bbox":[x,y,w,h], "area":..., "iscrowd":0}, ...]}
        annotations = []
        for cid, (x1, y1, x2, y2) in zip(class_ids, boxes_xyxy):
            bbox_xywh = [float(x1), float(y1), float(x2 - x1), float(y2 - y1)]
            area = float((x2 - x1) * (y2 - y1))
            annotations.append({
                "category_id": int(cid),
                "bbox": bbox_xywh,
                "area": area,
                "iscrowd": 0
            })

        target = {"image_id": idx, "annotations": annotations}
        return img_rgb, target
"""

'\nclass WAIDYoloDataset(Dataset):\n\n    # Dataset WAID au format YOLO (images + labels .txt).\n    # On convertit √† la vol√©e en annotations COCO-like attendues par DetrImageProcessor.\n\n    def __init__(self, img_dir, lbl_dir):\n        self.img_dir = img_dir\n        self.lbl_dir = lbl_dir\n        self.img_paths = sorted([\n            p for p in glob.glob(os.path.join(img_dir, "*"))\n            if p.lower().endswith((".jpg", ".jpeg", ".png"))\n        ])\n\n    def __len__(self):\n        return len(self.img_paths)\n\n    def __getitem__(self, idx):\n        img_path = self.img_paths[idx]\n        base = os.path.splitext(os.path.basename(img_path))[0]\n        label_path = os.path.join(self.lbl_dir, base + ".txt")\n\n        img_bgr = cv2.imread(img_path)\n        if img_bgr is None:\n            raise ValueError(f"Cannot read image: {img_path}")\n\n        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)\n        h, w = img_rgb.shape[:2]\n\n        class_ids, boxes_xyxy = y

## Collate + Processor + Dataloaders

In [5]:
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

def collate_fn(batch):
    images, targets = zip(*batch)
    encoding = processor(list(images), annotations=list(targets), return_tensors="pt")
    return encoding

train_ds = WAIDYoloDataset(IMG_TRAIN, LBL_TRAIN, max_items=300)
val_ds   = WAIDYoloDataset(IMG_VAL, LBL_VAL, max_items=100)

train_dl = DataLoader(
    train_ds,
    batch_size=2,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=0
)

val_dl = DataLoader(
    val_ds,
    batch_size=2,
    shuffle=False,
    collate_fn=collate_fn,
    num_workers=0
)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)
print("Train samples:", len(train_ds))
print("Val samples:", len(val_ds))


Device: cpu
Train samples: 300
Val samples: 100


## Entra√Ænement simple (CPU friendly)

## Cr√©ation du mod√®le DETR

In [6]:
model = DetrForObjectDetection.from_pretrained(
    "facebook/detr-resnet-50",
    num_labels=NUM_CLASSES,
    ignore_mismatched_sizes=True
).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DetrForObjectDetection were not initialized from the model checkpoin

In [7]:
EPOCHS = 2
model.train()

try: 
    for epoch in range(EPOCHS):
        pbar = tqdm(train_dl, desc=f"Epoch {epoch+1}/{EPOCHS}")
        running_loss = 0.0

        for batch in pbar:
            # ‚úÖ d√©placer seulement les tensors
            pixel_values = batch["pixel_values"].to(device)
            pixel_mask = batch.get("pixel_mask")
            if pixel_mask is not None:
                pixel_mask = pixel_mask.to(device)

            labels = batch["labels"]  # ‚ùó liste de dicts -> ne pas faire .to()

            outputs = model(
                pixel_values=pixel_values,
                pixel_mask=pixel_mask,
                labels=labels
            )

            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            pbar.set_postfix(loss=running_loss / (pbar.n + 1))

finally:
    OUT_DIR = os.path.join(PROJECT_ROOT, "runs", "detr")
    os.makedirs(OUT_DIR, exist_ok=True)
    model.save_pretrained(OUT_DIR)
    processor.save_pretrained(OUT_DIR)
    print("Saved to:", OUT_DIR)
# print("Training finished.")


Epoch 1/2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 150/150 [14:44<00:00,  5.90s/it, loss=2.75]
Epoch 2/2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 150/150 [14:54<00:00,  5.96s/it, loss=2.21]


Saved to: c:\Users\paola\OneDrive\Documents\ESILV\S9\CV and Deep Learning\detection-and-identification-of-wildlife-populations-from-drone-images\runs\detr


## Sauvegarder le mod√®le (comme YOLO)

In [8]:
OUT_DIR = os.path.join(PROJECT_ROOT, "runs", "detr")
os.makedirs(OUT_DIR, exist_ok=True)

model.save_pretrained(OUT_DIR)
processor.save_pretrained(OUT_DIR)

print("Saved to:", OUT_DIR)


Saved to: c:\Users\paola\OneDrive\Documents\ESILV\S9\CV and Deep Learning\detection-and-identification-of-wildlife-populations-from-drone-images\runs\detr


## Inference rapide sur 5 images de validation

In [9]:
import matplotlib.pyplot as plt

model.eval()

sample_imgs = sorted(glob.glob(os.path.join(IMG_VAL, "*")))[:5]
OUT_PRED_DIR = os.path.join(PROJECT_ROOT, "runs", "detr", "pred_vis")
os.makedirs(OUT_PRED_DIR, exist_ok=True)

with torch.no_grad():
    for img_path in sample_imgs:
        img_bgr = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        inputs = processor(images=[img_rgb], return_tensors="pt")
        pixel_values = inputs["pixel_values"].to(device)
        pixel_mask = inputs.get("pixel_mask")
        if pixel_mask is not None:
            pixel_mask = pixel_mask.to(device)

        outputs = model(pixel_values=pixel_values, pixel_mask=pixel_mask)

        target_sizes = torch.tensor([img_rgb.shape[:2]]).to(device)  # (h, w)
        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.5)[0]

        # draw
        vis = img_rgb.copy()
        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
            x1, y1, x2, y2 = box.int().tolist()
            cv2.rectangle(vis, (x1, y1), (x2, y2), (255, 0, 0), 2)
            text = f"{class_names[int(label)]} {float(score):.2f}"
            cv2.putText(vis, text, (x1, max(15, y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2)

        out_path = os.path.join(OUT_PRED_DIR, os.path.basename(img_path))
        cv2.imwrite(out_path, cv2.cvtColor(vis, cv2.COLOR_RGB2BGR))

print("Saved prediction visuals to:", OUT_PRED_DIR)


Saved prediction visuals to: c:\Users\paola\OneDrive\Documents\ESILV\S9\CV and Deep Learning\detection-and-identification-of-wildlife-populations-from-drone-images\runs\detr\pred_vis
