In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install necessary system dependencies and packages
!apt-get update
!apt-get install -y ffmpeg
!pip install --upgrade pip
!pip uninstall -y moviepy
!pip install moviepy ultralytics torchreid deep-sort-realtime

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:2 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already th

In [3]:
!pip install ultralytics torchreid deep-sort-realtime



In [2]:
# Import required libraries
from IPython import get_ipython
from IPython.display import display
import cv2
import os
import json
import numpy as np
import gc
import torchreid
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from collections import defaultdict
from google.colab.patches import cv2_imshow
import torch
import torchvision.transforms as T

# Try to import ImageSequenceClip from moviepy
try:
    from moviepy.editor import ImageSequenceClip
except ModuleNotFoundError:
    from moviepy.video.io.ImageSequenceClip import ImageSequenceClip

# Define a GlobalTracker class
class GlobalTracker:
    def __init__(self):
        self.global_id = 0
        self.embeddings_db = defaultdict(list)
        self.threshold = 0.7  # Adjust similarity threshold as needed

    def get_global_id(self, camera_id, local_id, embedding):
        best_sim = -1
        best_gid = None

        for (stored_cam, stored_id), stored_data in self.embeddings_db.items():
            stored_emb = stored_data[0]
            similarity = np.dot(embedding, stored_emb) / (
                np.linalg.norm(embedding) * np.linalg.norm(stored_emb)
            )
            if similarity > self.threshold and similarity > best_sim:
                best_sim = similarity
                best_gid = stored_data[1]

        if best_gid is not None:
            self.embeddings_db[(camera_id, local_id)] = (embedding, best_gid)
            return best_gid
        else:
            self.global_id += 1
            self.embeddings_db[(camera_id, local_id)] = (embedding, self.global_id)
            return self.global_id

# Initialize the detection model, re-identification model, and tracker
def initialize_models():
    detection_model = YOLO("yolov8n.pt")

    reid_model = torchreid.models.build_model(
        name='osnet_x1_0',
        num_classes=1000,
        pretrained=True
    )
    reid_model.eval()

    tracker = DeepSort(
        max_age=30,
        n_init=3,
        max_cosine_distance=0.4,
        nn_budget=None
    )

    return detection_model, reid_model, tracker

# Helper function to extract embeddings from a bounding box region
def extract_embeddings(reid_model, frame, bbox):
    x, y, w, h = [int(v) for v in bbox]
    person_patch = frame[y:y+h, x:x+w]

    if person_patch.size == 0:
        return None

    person_patch = cv2.resize(person_patch, (128, 256))
    transform = T.ToTensor()
    person_patch = transform(person_patch).unsqueeze(0)
    return reid_model(person_patch).detach().cpu().numpy().flatten()

# Initialize models and global tracker
detection_model, reid_model, tracker = initialize_models()
global_tracker = GlobalTracker()



Successfully loaded imagenet pretrained weights from "/root/.cache/torch/checkpoints/osnet_x1_0_imagenet.pth"


In [4]:
import time

# Define base path and output directory
base_path = "/content/drive/MyDrive/dataset/Wildtrack/Image_subsets"
output_dir = "/content/drive/MyDrive/output"
os.makedirs(output_dir, exist_ok=True)

# Define only three camera directories
camera_dirs = {
    "Camera_1": os.path.join(base_path, "C1"),
    "Camera_2": os.path.join(base_path, "C2"),
    "Camera_3": os.path.join(base_path, "C3")
}

# Dictionary to store cropped frames for each global_id
frame_storage = defaultdict(list)

# Confidence threshold for detections
conf_thresh = 0.5

# Process each camera separately
for camera_id, camera_path in camera_dirs.items():
    if not os.path.exists(camera_path):
        print(f"Directory {camera_path} does not exist. Skipping {camera_id}.")
        continue

    print(f"\nProcessing {camera_id}")
    image_files = sorted([f for f in os.listdir(camera_path)
                         if f.lower().endswith(('.jpg', '.jpeg', '.png'))])

    num_frames = len(image_files)
    start_time = time.time()

    for frame_idx, image_name in enumerate(image_files):
        image_path = os.path.join(camera_path, image_name)
        frame = cv2.imread(image_path)
        if frame is None:
            continue

        # Run detection
        results = detection_model(frame, verbose=False)[0]
        detections = []
        for box in results.boxes.data.cpu().numpy():
            x1, y1, x2, y2, conf, cls = box
            if int(cls) == 0 and conf >= conf_thresh:
                detections.append((
                    [float(x1), float(y1), float(x2 - x1), float(y2 - y1)],
                    float(conf)
                ))

        # Prepare embeddings for valid detections
        embeddings = []
        valid_detections = []
        for det in detections:
            bbox = det[0]
            embedding = extract_embeddings(reid_model, frame, bbox)
            if embedding is not None:
                embeddings.append(embedding)
                valid_detections.append(det)

        # Update tracks using DeepSort (if detections exist)
        if valid_detections:
            tracks = tracker.update_tracks(
                valid_detections,
                embeds=embeddings,
                frame=frame
            )
        else:
            tracks = []

        # For each confirmed track, assign a global ID and store the cropped frame
        for track in tracks:
            if not track.is_confirmed():
                continue

            ltrb = track.to_ltrb()
            x1, y1, x2, y2 = map(int, ltrb)
            w, h = x2 - x1, y2 - y1

            embedding = extract_embeddings(reid_model, frame, (x1, y1, w, h))
            if embedding is None:
                continue

            global_id = global_tracker.get_global_id(
                camera_id,
                track.track_id,
                embedding
            )

            crop = frame[y1:y2, x1:x2]
            if crop.size > 0:
                frame_storage[global_id].append(crop)

        if (frame_idx + 1) % 50 == 0 or (frame_idx + 1) == num_frames:
            print(f"{camera_id}: Processed {frame_idx + 1}/{num_frames} frames")
            gc.collect()

    elapsed = time.time() - start_time
    print(f"Finished processing {camera_id} in {elapsed:.2f} seconds.\n")


Processing Camera_1
Camera_1: Processed 50/101 frames
Camera_1: Processed 100/101 frames
Camera_1: Processed 101/101 frames
Finished processing Camera_1 in 245.84 seconds.


Processing Camera_2
Camera_2: Processed 50/101 frames
Camera_2: Processed 100/101 frames
Camera_2: Processed 101/101 frames
Finished processing Camera_2 in 417.29 seconds.


Processing Camera_3
Camera_3: Processed 50/101 frames
Camera_3: Processed 100/101 frames
Camera_3: Processed 101/101 frames
Finished processing Camera_3 in 567.73 seconds.



In [5]:
print("\nGenerating video clips from stored frames...")
for global_id, frames in frame_storage.items():
    if len(frames) < 15:  # Skip if there are too few frames
        continue

    try:
        # Resize frames as needed (here to 256x512)
        resized = [cv2.resize(f, (256, 512)) for f in frames if f.size > 0]
        clip = ImageSequenceClip(resized, fps=12)
        output_path = os.path.join(output_dir, f"person_{global_id}.mp4")
        clip.write_videofile(output_path, codec='libx264', logger=None)
        print(f"Generated clip for ID {global_id} ({len(resized)} frames)")
    except Exception as e:
        print(f"Error processing ID {global_id}: {str(e)}")

print("\nProcessing complete! Check the output directory for results.")


Generating video clips from stored frames...
Generated clip for ID 2 (3081 frames)
Generated clip for ID 9 (618 frames)
Generated clip for ID 11 (453 frames)
Generated clip for ID 15 (1749 frames)
Generated clip for ID 16 (85 frames)
Generated clip for ID 17 (18 frames)
Generated clip for ID 18 (404 frames)
Generated clip for ID 39 (19 frames)
Generated clip for ID 44 (122 frames)
Generated clip for ID 46 (15 frames)
Generated clip for ID 47 (46 frames)
Generated clip for ID 58 (23 frames)
Generated clip for ID 67 (23 frames)

Processing complete! Check the output directory for results.
