# Minimal Example using YOLOv11-Tracking
## Install libraries 
Run this once in the beginning or to get the updated code for the course

In [4]:
!pip install opencv-python
!pip install ffmpegcv
!pip install scipy
!pip install numpy
!pip install tqdm
!pip install scikit-image
!pip install pillow
!pip install --upgrade --force-reinstall git+https://github.com/leon-etienne/gensurv.git@main  # Install from main branch
%load_ext autoreload
%autoreload 2
from gensurv import *

Collecting git+https://github.com/leon-etienne/gensurv.git@main
  Cloning https://github.com/leon-etienne/gensurv.git (to revision main) to /tmp/pip-req-build-7bcqvp7c
  Running command git clone --filter=blob:none --quiet https://github.com/leon-etienne/gensurv.git /tmp/pip-req-build-7bcqvp7c
  Resolved https://github.com/leon-etienne/gensurv.git to commit 1bd515434e29ad107055122c531122531b466343
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: gensurv
  Building wheel for gensurv (setup.py) ... [?25ldone
[?25h  Created wheel for gensurv: filename=gensurv-0.1.1-py3-none-any.whl size=7862 sha256=91faef2c56964b428610aa6d7d93efdd5b7f1e7a020bf22320b40bc499e77cce
  Stored in directory: /tmp/pip-ephem-wheel-cache-mo79cp4q/wheels/3a/75/52/0d4cd0f9e638444cb4b2ec6df244a743705eaaf10a027729c8
Successfully built gensurv
Installing collected packages: gensurv
  Attempting uninstall: gensurv
    Found existing installation: gensurv 0.1.1
    Uninstallin

## Load multiple videos
You can use `get_video_frames` to load multiple videos, just make sure you use the `width` and `height` argument to make every video the same size.
You also have to make sure that you rename the `video_frames`, `fps` and `duration` accordingly or ignore them by using an underscore `_`.
In this example I load the same video three times but use different parts, you can also use multiple distinct videos, just change the path. You can use `display_video=False` to hide the preview.

In [6]:
video_frames_1, fps, duration = get_video_frames("Town.mp4", start=0, end=5, width=1920, height=1080)
video_frames_2, _, _ = get_video_frames("Town.mp4", start=15, end=20, width=1920, height=1080, display_video=False)
video_frames_3, _, _ = get_video_frames("Town.mp4", start=30, end=45, width=1920, height=1080, display_video=False)

Number of frames: 125
Frames per second (fps): 25.00333288894814
Duration: 300.04 seconds


Number of frames: 125
Frames per second (fps): 25.00333288894814
Duration: 300.04 seconds
Number of frames: 375
Frames per second (fps): 25.00333288894814
Duration: 300.04 seconds


## Load the YOLO model

This loads the yolov11 model specialized in segmentation. 
There are other tasks YOLO can do, you can read more about it in the official [ultralytics documentation](https://docs.ultralytics.com/tasks/).

In [7]:
# Prepare Model
model = YOLO("yolo11n-seg.pt")  # Load an official Segment model
print(model.names)

{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

## Run the tracking

This code goes through all frames of the three videos and tracks and masks persons in all of them and then combines them into a single frame.

In [12]:
def process_video_frames(video_frames):
    previous_frame = np.zeros_like(video_frames[0])
    
    processed_frames = []

    ### Reset Ids so they stay consistent###
    if model.predictor is not None:
        model.predictor.trackers[0].reset_id()

    ### Start Tracker ###
    process_results_to_tacks = start_results_to_tracks()

    # With zip we can go through multiple lists of frames at the same time
    for index, (current_frame_1, current_frame_2, current_frame_3) in enumerate(tqdm(zip(video_frames_1, video_frames_2, video_frames_3))):        
        
        ### A) Calculations including the previous frame ####
        processed_frame = np.zeros_like(current_frame_1)

        # Track for each video, create a binary mask and extract the persons, then take the maxium of all three
        results_1 = model.track(current_frame_1, persist=True, verbose=False)
        person_masks_1 = process_results_to_masks_normalized(results_1, current_frame_1)
        processed_frame = np.maximum(processed_frame, person_masks_1 * current_frame_1)

        results_2 = model.track(current_frame_2, persist=True, verbose=False)
        person_masks_2 = process_results_to_masks_normalized(results_2, current_frame_2)
        processed_frame = np.maximum(processed_frame, person_masks_2 * current_frame_2)

        results_3 = model.track(current_frame_3, persist=True, verbose=False)
        person_masks_3 = process_results_to_masks_normalized(results_3, current_frame_3)
        processed_frame = np.maximum(processed_frame, person_masks_3 * current_frame_3)

        #####################################################
        
        previous_frame = processed_frame

        ### B) After Calculations with the previous frame ###


        ####################################################
        processed_frames.append(processed_frame)

    return processed_frames

processed_frames = process_video_frames(video_frames_1)

save_video_frames(processed_frames, "yolo_tracking_multiple_videos.mp4", fps, display_video=False)
create_video_browser("/home/jovyan")

125it [00:21,  5.74it/s]


Video saved to: 20241127_140145_yolo_tracking.mp4


VBox(children=(Dropdown(description='Select Video:', layout=Layout(width='500px'), options=(('20241127_140145_…