# Object Tracking with YoloV7 and Pixeltable

In [None]:
import sys, glob
import pandas as pd
import numpy as np
sys.path.append('/home/marcel/pixeltable')
import PIL

In [None]:
import pixeltable as pt
from pixeltable import catalog
from pixeltable.type_system import ImageType, VideoType, StringType, IntType, ArrayType, ColumnType
from pixeltable.functions.pil import draw_boxes
%load_ext autoreload
%autoreload 2

We're loading the function in order to use it for a computed column. Note that we don't need to import the packages we needed to create the model in the first place.

In [None]:
cl = pt.Client()
functions_db = cl.get_db('functions')
yolov7 = functions_db.load_function('yolov7')

Sanity check

In [None]:
img_file = '/home/marcel/pixeltable/pixeltable/tests/data/imagenette2-160/n03445777_2563.JPEG'
img = PIL.Image.open(img_file)

print(img.size)
display(img)

In [None]:
yolov7.eval_fn(img)

We're now creating database `videos` for our video data.

In [None]:
try:
    videos = cl.get_db('videos')
except:
    videos = cl.create_db('videos')

The videos and their frames will be stored in a table `data`. Initially it only contains the path to the video file, the frame, and the frame sequence number (within the video).

In [None]:
videos.drop_table('data', ignore_errors=True)
t = videos.create_table('data', [
    catalog.Column('video', VideoType(), nullable=False),
    catalog.Column('frame', ImageType(), nullable=False, indexed=False),
    catalog.Column('frame_idx', IntType(), nullable=False),
])

We're using a few short sample videos for the demo.

In [None]:
video_filepaths = glob.glob(f'./**/*.mp4', recursive=True)
video_filepaths = [os.path.abspath(p) for p in video_filepaths]
video_filepaths

We "load" the data by supplying the file paths for the video files.
- The `video_column` argument instructs Pixeltable to treat that data as video files from which to extract frames.
- Each input row, corresponding to one video, is expanded into one row per frame (subject to the requested frame rate, in this case 1 fps).
- Each frame is extract to a JPEG file that is stored in the Pixeltable home directory.
- The columns `frame` and `frame_idx` receive the frame file path and frame sequence number, respectively.

In [None]:
df = pd.DataFrame({'video': video_filepaths[1:2]})
t.insert_pandas(df, video_column='video', frame_column='frame', frame_idx_column='frame_idx', fps=1)

We loaded frames of an intersection in Bangkok.

In [None]:
t[t.video, t.frame, t.frame_idx, t.frame.width, t.frame.height].show(2)

The same with detections:

In [None]:
t[t.frame, yolov7(t.frame)].show(2)

We're happy with the result and decide to add detections to the table. Running `yolov7()` is an expensive operation and adding it as a computed column makes the detections part of the stored table data.

In [None]:
t.add_column(catalog.Column('detections', computed_with=yolov7(t.frame)))

In [None]:
t[t.frame, t.detections].show(2)

We can call the built-in function `draw()` to draw the bounding boxes onto the frames.

The `detections` column is an array column, which supports the standard slicing operations. In this case, each detection contains the bounding box plus the confidence and class number. For `draw()`, we need to create a slice containing only the bounding boxes.

In [None]:
t[t.frame, draw_boxes(t.frame, t.detections[:, :4])].show(10)