# Analyse ROC et Visualisation des Distances
Ce notebook permet de visualiser la distribution des distances entre paires ancrage-positive et ancrage-négative, et de tracer la courbe ROC pour guider le choix d'un seuil de similarité.

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from insightface.app import FaceAnalysis
from sklearn.metrics.pairwise import cosine_distances
from sklearn.metrics import roc_curve, auc
from PIL import Image
import os

# Initialisation du modèle
app = FaceAnalysis(name="buffalo_l")

app.prepare(ctx_id=0, det_size=(640, 640))




Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\s.herivalisoa/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\s.herivalisoa/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\s.herivalisoa/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\s.herivalisoa/.insightface\models\buffalo_l\genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\s.herivalisoa/.insightface\models\buffalo_l\w600k_r50.onn

In [13]:
# Charger le fichier triplet CSV
df = pd.read_csv("../static/triplets.csv", sep=',')
df.head()


Unnamed: 0,anchor,id1,pos,id2,neg,id3
0,056279.jpg,1,108998.jpg,1,030848.jpg,496
1,024091.jpg,1,000023.jpg,1,093653.jpg,9313
2,122082.jpg,3,045833.jpg,3,188283.jpg,7200
3,110393.jpg,3,021233.jpg,3,178433.jpg,4643
4,101388.jpg,4,056784.jpg,4,105432.jpg,2988


In [19]:
df_1 = df.sample(10)

In [23]:
import cv2

In [36]:
def get_embedding(image_path):
    full_path = os.path.join("../static/triplets_images", image_path)
    if not os.path.exists(full_path):
        return None
    #img = np.array(Image.open(full_path).convert("RGB"))
    img = cv2.imread(full_path)
    print(img.shape)
    # Resize si trop petit
    if img.shape[0] < 160 or img.shape[1] < 160:
        img = cv2.resize(img, (224, 224))  # ou (224, 224)
    print(img.shape)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    faces = app.get(img)
    if not faces:
        print("Not found")
    return faces[0].embedding if faces else None


In [37]:
distances, labels = [], []

for _, row in df_1.iterrows():
    print(_)
    emb_anchor = get_embedding(row['anchor'])
    emb_pos = get_embedding(row['pos'])
    emb_neg = get_embedding(row['neg'])

    if emb_anchor is not None and emb_pos is not None:
        d_ap = cosine_distances([emb_anchor], [emb_pos])[0][0]
        distances.append(d_ap)
        labels.append(1)  # Positive

    if emb_anchor is not None and emb_neg is not None:
        d_an = cosine_distances([emb_anchor], [emb_neg])[0][0]
        distances.append(d_an)
        labels.append(0)  # Negative


9470
(116, 82, 3)
(224, 224, 3)
Not found
(117, 84, 3)
(224, 224, 3)
Not found
(118, 84, 3)
(224, 224, 3)
Not found
9257
(110, 78, 3)
(224, 224, 3)
Not found
(137, 103, 3)
(224, 224, 3)
Not found
(121, 88, 3)
(224, 224, 3)
Not found
10714
(110, 84, 3)
(224, 224, 3)
Not found
(110, 81, 3)
(224, 224, 3)
Not found
(115, 89, 3)
(224, 224, 3)
Not found
5096
(107, 80, 3)
(224, 224, 3)
Not found
(122, 81, 3)
(224, 224, 3)
Not found
(118, 90, 3)
(224, 224, 3)
Not found
8484
(128, 92, 3)
(224, 224, 3)
Not found
(110, 81, 3)
(224, 224, 3)
Not found
(111, 83, 3)
(224, 224, 3)
Not found
12265
(117, 84, 3)
(224, 224, 3)
Not found
(120, 88, 3)
(224, 224, 3)
Not found
(102, 74, 3)
(224, 224, 3)
Not found
2187
(120, 87, 3)
(224, 224, 3)
Not found
(108, 85, 3)
(224, 224, 3)
Not found
(119, 87, 3)
(224, 224, 3)
Not found
568
(114, 82, 3)
(224, 224, 3)
Not found
(110, 83, 3)
(224, 224, 3)
Not found
(114, 84, 3)
(224, 224, 3)
Not found
1816
(116, 84, 3)
(224, 224, 3)
Not found
(113, 82, 3)
(224, 224, 3)
N

In [22]:
len(distances)

0

In [8]:
# Tracer histogrammes + courbe ROC
fpr, tpr, thresholds = roc_curve(labels, distances, pos_label=0)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist([d for l, d in zip(labels, distances) if l == 1], bins=30, alpha=0.6, label='Positive')
plt.hist([d for l, d in zip(labels, distances) if l == 0], bins=30, alpha=0.6, label='Negative')
plt.title("Distribution des distances")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
plt.plot([0,1], [0,1], '--', color='gray')
plt.title("Courbe ROC")
plt.xlabel("Taux de faux positifs (FPR)")
plt.ylabel("Taux de vrais positifs (TPR)")
plt.legend()

plt.tight_layout()
plt.show()


IndexError: cannot do a non-empty take from an empty axes.