<a href="https://colab.research.google.com/github/madelezhia/Re-ID/blob/main/Re-ID.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧪 Re-ID *from scratch*
**Objectif** : comprendre « embedding layer » en 15 min.<br>
**Dataset** : 10 personnes de Market-1501 (17 Mo)<br>
**Modèle** : ResNet50 → Global Average Pooling → Linear(2048→256) → Triplet-Loss

In [None]:
# 1. Installs (Colab uniquement)
!pip install torch torchvision tqdm wget

Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=55df61cee2cf806913b0f09ad37a5f7b01572bc435b74c7e229502bb341af74d
  Stored in directory: /root/.cache/pip/wheels/01/46/3b/e29ffbe4ebe614ff224bad40fc6a5773a67a163251585a13a9
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [None]:
from google.colab import drive # AJOUTE
drive.mount('/gdrive')

os.chdir(f'/gdrive/MyDrive/Colab Notebooks/2025-09 Re-ID/')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [None]:
# 2. Copier-coller 10 identités depuis Market-1501 complet
import shutil, os, random
source_dir = 'Market-1501-v15.09.15/Market-1501-v15.09.15/bounding_box_train'  # chemin après décompression
target_dir = 'mini_market'
os.makedirs(target_dir, exist_ok=True)

# liste des 10 premières identités (tu peux changer)
ids = sorted({name[:4] for name in os.listdir(source_dir) if name.endswith('.jpg')})[:10]
print('IDs retenues :', ids)

for id_ in ids:
    for img in os.listdir(source_dir):
        if img.startswith(id_):
            shutil.copy(os.path.join(source_dir, img), os.path.join(target_dir, img))

print('✅ Mini-dataset créé :', len(os.listdir(target_dir)), 'images')

HTTPError: HTTP Error 404: Not Found

In [None]:
# 3. Imports
import torch, torchvision, random, PIL
from torch import nn
from torchvision import transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device :', device)

In [None]:
# 4. Dataset & Dataloader
transform = transforms.Compose([
    transforms.Resize((256,128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

dataset = torchvision.datasets.ImageFolder('mini_market', transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True, drop_last=True)
print('Nombre d’identités :', len(dataset.classes))

In [None]:
# 5. Modèle : ResNet50 → embedding 256-D
class ReIDNet(nn.Module):
    def __init__(self, embed_dim=256):
        super().__init__()
        base = torchvision.models.resnet50(pretrained=True)
        self.backbone = nn.Sequential(*list(base.children())[:-2])  # pas de classifier
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.embed = nn.Linear(2048, embed_dim)   # ⬅️ embedding layer

    def forward(self, x):
        feat = self.backbone(x)      # [B,2048,8,4]
        feat = self.pool(feat).flatten(1)  # [B,2048]
        return self.embed(feat)      # [B,256]

model = ReIDNet().to(device)
print('Modèle créé – 256-D embedding')

In [None]:
# 6. Triplet-Loss simplifiée (Batch-All strategy)
class TripletLoss(nn.Module):
    def __init__(self, margin=0.3):
        super().__init__()
        self.margin = margin

    def forward(self, embeds, labels):
        pairwise_dist = torch.cdist(embeds, embeds, p=2)
        mask_pos = labels.unsqueeze(0) == labels.unsqueeze(1)
        mask_neg = ~mask_pos
        triplet_loss = 0.0
        count = 0
        for i in range(len(labels)):
            pos = pairwise_dist[i][mask_pos[i]]
            neg = pairwise_dist[i][mask_neg[i]]
            if pos.numel()==0 or neg.numel()==0: continue
            hardest_pos = pos.max()
            hardest_neg = neg.min()
            loss = torch.relu(hardest_pos - hardest_neg + self.margin)
            triplet_loss += loss
            count += 1
        return triplet_loss / (count + 1e-8)

criterion = TripletLoss()

In [None]:
# 7. Optimiseur & boucle d’entraînement
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
model.train()
for epoch in range(5):
    running_loss = 0.0
    for imgs, labels in tqdm(dataloader, desc=f'Epoch {epoch+1}'):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        embeds = model(imgs)
        loss = criterion(embeds, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Loss : {running_loss/len(dataloader):.4f}')

In [None]:
# 8. Visualisation rapide : distance entre 2 images
model.eval()
img1, label1 = dataset[0]   # identité 0
img2, label2 = dataset[80]  # identité 1 (différente)
with torch.no_grad():
    e1 = model(img1.unsqueeze(0).to(device))
    e2 = model(img2.unsqueeze(0).to(device))
    dist = torch.cdist(e1, e2).item()
print(f'Distance embedding : {dist:.3f}  (petit = même personne)')

plt.figure(figsize=(6,3))
plt.subplot(1,2,1); plt.imshow(transforms.ToPILImage()(img1)); plt.title(f'ID {label1}')
plt.subplot(1,2,2); plt.imshow(transforms.ToPILImage()(img2)); plt.title(f'ID {label2}')
plt.suptitle(f'Distance = {dist:.3f}'); plt.show()

### ✅ Tu viens de :  
- créer un **embedding layer**  
- l’entraîner avec **Triplet-Loss**  
- visualiser la **distance** entre deux images  

**Prochaine étape** : on supprime les visages (GDPR) et on exporte le modèle (ONNX).  
Dis-moi quand tu as exécuté ce notebook !