# Import Libraries & Configuration

In [1]:
pip install opencv-python numpy ultralytics deep_sort_realtime torchreid



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import cv2
import os
import numpy as np
import time
from collections import defaultdict
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import torchreid
from torchvision import transforms

# Configuration
BASE_PATH = "/content/drive/MyDrive/dataset/Wildtrack"
VIDEO_PATHS = {
    "Camera_1": os.path.join(BASE_PATH, "videos/C1.mp4"),
    "Camera_2": os.path.join(BASE_PATH, "videos/C5.mp4"),
    "Camera_3": os.path.join(BASE_PATH, "videos/C7.mp4")
}
OUTPUT_DIR = "/content/drive/MyDrive/output"
CLIP_DURATION = 120  # 2 minutes in seconds
CONF_THRESH = 0.5
FRAME_SIZE = (640, 360)  # Reduced size for processing

to_tensor = transforms.ToTensor()



# Initialize Models

In [4]:
# Initialize models
detection_model = YOLO("yolov8n.pt")
reid_model = torchreid.models.build_model('osnet_x1_0', num_classes=1000, pretrained=True)
reid_model.eval()

Successfully loaded imagenet pretrained weights from "/root/.cache/torch/checkpoints/osnet_x1_0_imagenet.pth"


OSNet(
  (conv1): ConvLayer(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv2): Sequential(
    (0): OSBlock(
      (conv1): Conv1x1(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (conv2a): LightConv3x3(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (conv2b): Sequential(
        (

# GlobalTracker Class


In [5]:
class GlobalTracker:
    def __init__(self):
        self.global_id = 0
        self.embeddings_db = {}
        self.threshold = 0.7

    def update(self, embedding):
        best_id = None
        best_sim = self.threshold

        for pid, stored_emb in self.embeddings_db.items():
            similarity = np.dot(embedding, stored_emb) / (np.linalg.norm(embedding) * np.linalg.norm(stored_emb))
            if similarity > best_sim:
                best_sim = similarity
                best_id = pid

        if best_id is None:
            self.global_id += 1
            self.embeddings_db[self.global_id] = embedding
            return self.global_id
        else:
            self.embeddings_db[best_id] = embedding  # Update with latest appearance
            return best_id

# process_camera_video Function

In [13]:
def process_camera_video(video_path, global_tracker, start_time=0):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000)

    # Determine original FPS and calculate skip factor to process at 5 FPS.
    orig_fps = cap.get(cv2.CAP_PROP_FPS)
    skip_factor = int(orig_fps / 5) if orig_fps > 5 else 1
    frame_count = 0
    # Initialize progress variables
    last_print_time = start_time
    current_time = start_time

    tracker = DeepSort(max_age=30, n_init=3)
    results = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        # Skip frames to achieve approx 5 FPS processing rate
        if frame_count % skip_factor != 0:
            continue

        # Update current_time from the video position
        current_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
        if current_time - start_time > CLIP_DURATION:
            break

        frame = cv2.resize(frame, FRAME_SIZE)

        # Print progress every 5 seconds
        if current_time - last_print_time >= 5:
            print(f"Processed {current_time - start_time:.1f} seconds of video from {os.path.basename(video_path)}")
            last_print_time = current_time

        # Detection
        detections = detection_model(frame, verbose=False)[0]
        boxes = []
        confidences = []
        embeddings = []

        for box in detections.boxes.data.cpu().numpy():
            x1, y1, x2, y2, conf, cls = box
            if int(cls) == 0 and conf >= CONF_THRESH:
                w, h = x2 - x1, y2 - y1
                bbox = [x1, y1, w, h]
                person = frame[int(y1):int(y2), int(x1):int(x2)]

                if person.size == 0:
                    continue

                # Re-ID embedding (resize to expected dimensions for the model)
                person = cv2.resize(person, (128, 256))
                person = to_tensor(person).unsqueeze(0)
                embedding = reid_model(person).detach().cpu().numpy().flatten()

                boxes.append(bbox)
                confidences.append(conf)
                embeddings.append(embedding)

        # Tracking using DeepSORT
        tracks = tracker.update_tracks(list(zip(boxes, confidences)), embeds=embeddings, frame=frame)

        # Update global IDs based on embeddings
        frame_data = {}
        for i, track in enumerate(tracks):
            if not track.is_confirmed():
                continue

            ltrb = track.to_ltrb()
            if i < len(embeddings):
                global_id = global_tracker.update(embeddings[i])
                frame_data[global_id] = {
                    'bbox': ltrb,
                    'last_seen': current_time,
                    'camera': os.path.basename(video_path)
                }

        results.append((frame, frame_data))

    cap.release()
    print(f"Finished processing {os.path.basename(video_path)}: Processed {current_time - start_time:.1f} seconds of video.")
    return results


# generate_highlight_videos Function

In [14]:
def generate_highlight_videos(global_tracker, all_results):
    for pid in global_tracker.embeddings_db.keys():
        output_frames = []
        last_seen_info = {}

        # Process each camera's results
        for cam_results in all_results.values():
            for frame_idx, (frame, data) in enumerate(cam_results):
                if pid in data:
                    # Draw bounding box
                    x1, y1, x2, y2 = map(int, data[pid]['bbox'])
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    last_seen_info = {
                        'time': data[pid]['last_seen'],
                        'camera': data[pid]['camera']
                    }

                # Resize frame for output consistency
                frame = cv2.resize(frame, FRAME_SIZE)
                output_frames.append(frame)

        # Create composite video for the person if any frames were found
        if output_frames:
            output_path = os.path.join(OUTPUT_DIR, f"person_{pid}.mp4")
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, 5, FRAME_SIZE)

            for frame in output_frames:
                # Overlay person ID and last seen info
                cv2.putText(frame, f"ID: {pid}", (10, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                if last_seen_info:
                    cv2.putText(frame,
                                f"Last seen: {last_seen_info['camera']} @ {last_seen_info['time']:.1f}s",
                                (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)
                out.write(frame)

            out.release()

# Main Processing Loop

In [15]:
# Main processing
global_tracker = GlobalTracker()
all_results = {}

for cam_name, video_path in VIDEO_PATHS.items():
    print(f"Processing {cam_name}...")
    results = process_camera_video(video_path, global_tracker)
    all_results[cam_name] = results

print("Generating highlight videos...")
generate_highlight_videos(global_tracker, all_results)

print("Processing complete! Check output directory for results.")

Processing Camera_1...
Processed 5.2 seconds of video from C1.mp4
Processed 10.4 seconds of video from C1.mp4
Processed 15.4 seconds of video from C1.mp4
Processed 20.4 seconds of video from C1.mp4
Processed 25.4 seconds of video from C1.mp4
Processed 30.4 seconds of video from C1.mp4
Processed 35.4 seconds of video from C1.mp4
Processed 40.4 seconds of video from C1.mp4
Processed 45.4 seconds of video from C1.mp4
Processed 50.4 seconds of video from C1.mp4
Processed 55.4 seconds of video from C1.mp4
Processed 60.4 seconds of video from C1.mp4
Processed 65.6 seconds of video from C1.mp4
Processed 70.6 seconds of video from C1.mp4
Processed 75.6 seconds of video from C1.mp4
Processed 80.6 seconds of video from C1.mp4
Processed 85.6 seconds of video from C1.mp4
Processed 90.6 seconds of video from C1.mp4
Processed 95.6 seconds of video from C1.mp4
Processed 100.6 seconds of video from C1.mp4
Processed 105.6 seconds of video from C1.mp4
Processed 110.6 seconds of video from C1.mp4
Process