In [4]:
from ultralytics import YOLO
import cv2
import numpy as np
import torch
from PIL import Image
import torchreid
from sklearn.metrics.pairwise import cosine_similarity
from deep_sort_realtime.deepsort_tracker import DeepSort
import time



# Initialization

yolo_model = YOLO("yolo11s.pt")

extractor = torchreid.utils.FeatureExtractor(
    model_name='osnet_ibn_x1_0',
    device='cuda' 
)

tracker = DeepSort(max_age=30, n_init=3)

gallery = {}          # {person_id: {'embeddings': [tensor, tensor, ...]}}
person_id_map = {}    # {track_id: person_id}
next_person_id = 0
SIMILARITY_THRESHOLD = 0.8
min_conf = 0.5

cap = cv2.VideoCapture(0)


while True:
    detections = []
    
    ret, frame = cap.read()
    if not ret:
        break

    # YOLO Detection
    result = yolo_model(frame, verbose=False)[0]
    for box in result.boxes:
        if int(box.cls[0]) != 0 or float(box.conf[0]) < min_conf:
            continue
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf[0])
        detections.append(([x1, y1, x2 - x1, y2 - y1], conf, 'person'))

    # DeepSORT Tracking 
    tracks = tracker.update_tracks(detections, frame=frame)

    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        l, t, r, b = map(int, track.to_ltrb())

        # If new track_id, perform ReID
        if track_id not in person_id_map:
            cropped = frame[t:b, l:r]
            if cropped.size == 0:
                continue

            # Convert to RGB and resize
            cropped_rgb = cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB)
            resized = cv2.resize(cropped_rgb, (128, 256))

            # Extract feature
            feature = extractor([resized])[0].cpu()
            feature = feature / torch.norm(feature)  # Normalize

            best_match_id = None
            best_similarity_score = 0.0

            if gallery:
                gallery_embeddings = []
                person_ids = []
                for pid, data in gallery.items():
                    for emb in data['embeddings']:
                        gallery_embeddings.append(emb.numpy())
                        person_ids.append(pid)

                gallery_embeddings = np.array(gallery_embeddings)
                similarity_scores = cosine_similarity(
                    feature.unsqueeze(0).numpy(),
                    gallery_embeddings
                )[0]

                max_idx = np.argmax(similarity_scores)
                best_similarity_score = similarity_scores[max_idx]
                if best_similarity_score > SIMILARITY_THRESHOLD:
                    best_match_id = person_ids[max_idx]

            if best_match_id is not None:
                # Match found - link track_id to existing person
                person_id_map[track_id] = best_match_id
                gallery[best_match_id]['embeddings'].append(feature)
                print(f"Track {track_id} matched with Person {best_match_id} (score={best_similarity_score:.2f})")
            else:
                # No match found - create new person
                current_person_id = next_person_id
                next_person_id += 1

                gallery[current_person_id] = {'embeddings': [feature]}
                person_id_map[track_id] = current_person_id
                print(f"Track {track_id} assigned NEW Person {current_person_id}")

        # Draw tracked person
        person_id = person_id_map[track_id]
        cv2.rectangle(frame, (l, t), (r, b), (0, 255, 0), 2)
        cv2.putText(frame, f"Person {person_id}", (l, t - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Show
    cv2.imshow("ReID + Tracking", frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


  state_dict = torch.load(cached_file)
  self.model.load_state_dict(torch.load(model_wts_path))


Successfully loaded imagenet pretrained weights from "/home/ritwik/.cache/torch/checkpoints/osnet_ibn_x1_0_imagenet.pth"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']
Model: osnet_ibn_x1_0
- params: 2,194,640
- flops: 978,878,352




Track 1 assigned NEW Person 0
Track 4 assigned NEW Person 1
Track 5 matched with Person 0 (score=0.82)
Track 7 assigned NEW Person 2
Track 10 assigned NEW Person 3
Track 9 assigned NEW Person 4
Track 11 assigned NEW Person 5
Track 14 assigned NEW Person 6
Track 15 assigned NEW Person 7
Track 16 assigned NEW Person 8
Track 17 assigned NEW Person 9
Track 19 matched with Person 9 (score=0.83)


## 