# IPyTracker
Jupyter Notebook to perform circle detection, tracking, and analysis.

In [1]:
from ipywidgets import Video, Image, FloatSlider, IntSlider, Button, Output, VBox
from IPython.display import display
from dataclasses import dataclass, fields
%matplotlib inline 
from matplotlib import pyplot as plt
import numpy as np
import time
import os
import cv2
import ffmpeg
import io
import subprocess

## Single Image Capture
The below cell captures, saves, and displays a single image. You might find this useful for positioning your camera correctly. Alternatively, use a program of your own choice to display a preview window.

In [2]:
# path to write image to
path = 'data/still.png'

# capture image
os.system(f'libcamera-still --width 768 --height 432 --immediate -n -e png -o {path} 2>/dev/null')

# display image
img = Image.from_file(path)
img

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03\x00\x00\x00\x01\xb0\x08\x02\x00\x00\x00\x91\xb6\…

## Video Capture
Below cell captures video, converts it to the mp4 format, and displays it in a loop. Additionally, timestamps are also saved per frame.

In [3]:
# video capture settings
path = 'data/recording'  # no file extension in path
length = 10000           # video capture duration
focus = 0.75             # lens distance to focus point in meters
width = 768              # frame width
height = 432             # frame height
shutter = 750            # exposure time in us
gain = 1.5               # gain to apply to image
fps = 100                # capture framerate

# specify options for video recording
flags = f'--width {width} --height {height} --shutter {shutter} -t {length} -n --gain {gain} --denoise cdn_off --level 4.2 --framerate {fps} --autofocus-mode manual --lens-position {1 / focus}'

# capture video
os.system('echo "starting video capture"')
os.system(f'libcamera-vid {flags} --save-pts {path}.pts -o {path}.h264 -n 2>/dev/null')

# convert .h264 to .mp4
os.system('echo "starting video conversion"')
os.system(f'ffmpeg -y -i {path}.h264 {path}.mp4 2>/dev/null')

# display video
vid = Video.from_file(f'{path}.mp4')
vid

starting video capture
starting video conversion


Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free...')

## Single Frame Selection
This cell allows you to select a single frame from your video. This can be very useful in order to finetune your configuration for the circle detection algorithm. Again, use any other tool if this is not your preferred method.
Hint: single-click the slider and use the arrow keys for precise scrolling.

In [4]:
# path to source video
video_path = 'data/recording.mp4'

# path to write selected frame to
frame_path = 'data/frame.png'

# our image byte array
img = np.ndarray((1,1,1))

# calculate number of frames in video
cap = cv2.VideoCapture(video_path)
nr_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
print(nr_frames)

# ui elements
slider = FloatSlider(min=0, max=1.0, step=0.005)
button = Button(description='Save Frame')
out = Output()
display(slider, button)

# gets called each time slider value changes
def slider_callback(change):
    global img
    with out:
        out.clear_output()
        frame_nr = int(change['new'] * nr_frames)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_nr)
        ret, img = cap.read()
        if ret:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            plt.axis('off')
            plt.imshow(img)
            plt.tight_layout()
            plt.show()

# gets called each time button gets pressed
def button_callback(_):
    global img
    with out:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cv2.imwrite(frame_path, img)
        print('image saved')
    

# register callback functions
slider.observe(slider_callback, names='value')
button.on_click(button_callback)

# display callback output
out

937.0


FloatSlider(value=0.0, max=1.0, step=0.005)

Button(description='Save Frame', style=ButtonStyle())

Output()

## Circle Detection Algorithm
The actual circle detection algorithm. For more info on the meaning of the config fields, please refer to the openCV documentation.
Right now, this only function only returns a marked image. CSV ouput will be added in the future.

In [5]:
# sharpening kernel
kernel = np.array([ \
    [0, -1, 0],     \
    [-1, 5, -1],    \
    [0, -1, 0]      \
])

# hough detection configuration
@dataclass
class HoughConfig:
    min_dist: int = 10
    hi_thresh: int = 50
    acc_thresh: int = 30
    min_radius: int = 10
    max_radius: int = 0
    preprocessing_thresh: int = 80

# circle detector returns found circle positions
# or an image with markings on circles if in debug mode
def track_objects(img, conf, debug=False):
    # preprocessing
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.medianBlur(gray, 5)
    sharp = cv2.filter2D(blur, -1, kernel)
    _, thresh = cv2.threshold(sharp, conf.preprocessing_thresh, 255, cv2.THRESH_BINARY)

    # detect circles
    circles = cv2.HoughCircles(                               \
        thresh, cv2.HOUGH_GRADIENT, 1, conf.min_dist,         \
        param1=conf.hi_thresh, param2=conf.acc_thresh,        \
        minRadius=conf.min_radius, maxRadius=conf.max_radius  \
    )
    if circles is None:
        return thresh if debug else None
    if not debug:
        return circles

    # draw circles
    circles = np.uint16(np.around(circles))
    res = cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR)
    for i in circles[0,:]:
        cv2.circle(res, (i[0],i[1]), i[2], (0,255,0), 2)
        cv2.circle(res, (i[0],i[1]), 2, (0,0,255), 3)
    return res

## Detection Configuration
Using the sliders from the cell below, you can create a configuration for the detection algorithm to suit your specific needs. Of course, your configuration is restricted to a certain domain using only these 6 parameters. Luckily, openCV provides a lot of documentation and tutorials, so don't be afraid to get your hands dirty!

In [6]:
# path to read selected frame from
frame_path = 'data/frame.png'

# create a default configuration
config = HoughConfig()

# gets called each time a slider value changes
def slider_callback(change):
    global img
    with out:
        out.clear_output()
        # update config
        setattr(config, change.owner.description, change.new)
        
        # apply tracking
        unmarked = cv2.imread(frame_path)
        marked = track_objects(unmarked, config, True)
        marked = cv2.cvtColor(marked, cv2.COLOR_BGR2RGB)
        
        # show image
        plt.axis('off')
        plt.imshow(marked)
        plt.tight_layout()
        plt.show()

# gets called each time button gets pressed
def button_callback(_):
    for s, f in zip(sliders, fields(HoughConfig)):
        setattr(config, field.name, s.value)
    with out:
        out.clear_output()
        print('Config Updated')
        
# ui elements
sliders = []
button = Button(description='Save Config')

# TODO: fix proper range per slider
# one slider for each field in our config
for field in fields(HoughConfig):
    # create slider
    slider = IntSlider(
        value=getattr(config, field.name),
        min=0,
        max=100,  # Adjust max value as needed
        step=1,
        description=field.name
    )
    # Register the callback with each slider
    slider.observe(slider_callback, 'value')
    sliders.append(slider)

# register button callback
button.on_click(button_callback)

out = Output()
display(VBox(sliders), button)

# display callback output
out

VBox(children=(IntSlider(value=10, description='min_dist'), IntSlider(value=50, description='hi_thresh'), IntS…

Button(description='Save Config', style=ButtonStyle())

Output()

## Cache frames
Get array of frames for processing convenience.
NB: Don't rerun this without restarting the kernel; it caches the entire video

In [7]:
# let ffmpeg write raw video to pipe
out, _ = (
    ffmpeg
    .input('data/recording.h264')
    .output('pipe:', format='rawvideo', pix_fmt='rgb24')
    .run(capture_stdout=True)
)

# capture from pipe
video = (
    np
    .frombuffer(out, dtype=np.uint8)
    .reshape([-1, height, width, 3])
)
print('done')

ffmpeg version 4.3.6-0+deb11u1+rpt5 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 10 (Raspbian 10.2.1-6+rpi1)
  configuration: --prefix=/usr --extra-version=0+deb11u1+rpt5 --toolchain=hardened --incdir=/usr/include/arm-linux-gnueabihf --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-

done


frame=  937 fps=132 q=-0.0 Lsize=  910764kB time=00:00:09.37 bitrate=796262.4kbits/s dup=12 drop=0 speed=1.32x    
video:910764kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.000000%


## Frame Santization
In this cell it is possible to gather tracking information for each frame of the video that gets cached above. By default, a subset of all frames that contain tracking points will be collected. This subset will then be made available for further manual cleanup. 

In [16]:
# metadata class for tracked objects in frames
@dataclass
class TrackingPoint:
    delta_millis: int = 0
    index: int = 0
    pos: (int, int) = (None, None)

# get metadata for each frame where objects were detected
global config
tracking_points = []
for i, frame in enumerate(video):
    circles = track_objects(frame, config)
    if circles is not None:
        for c in circles:
            p = TrackingPoint()
            p.delta_millis = 0 # TODO: fetch from timestamps file
            p.index = i
            p.pos = (c[0][0], c[0][1])
            tracking_points.append(p)

# get a subset of frames which has only non-empty frames
frame_subset = []
for p in tracking_points:
    frame_subset.append(video[p.index])
nr_frames = len(frame_subset)

# ui elements
slider = IntSlider(min=0, max=nr_frames, step=1, readout=True)
button = Button(description='Remove Frame')
out = Output()
display(slider, button)

# gets called each time slider value changes
def slider_callback(change):
    with out:
        out.clear_output()
        plt.axis('off')
        plt.imshow(frame_subset[change['new']])
        plt.tight_layout()
        plt.show()

# gets called each time button gets pressed
def button_callback(_):
    for p in tracking_points:
        if p.index == slider.value:
            frame_subset.pop(p.index)
            tracking_points.remove(p)
            break
    slider.max -= 1
    slider_callback({ 'new': slider.value })
    with out:
        print('enters')

# register callback functions
slider.observe(slider_callback, names='value')
button.on_click(button_callback)

# display callback output
out

IntSlider(value=0, max=31)

Button(description='Remove Frame', style=ButtonStyle())

Output()

## TODO
better naming;
make contour tracker;
csv writer;
rewrite single frame select to use python-ffmpeg solution;

# Bespreken met Joshua
## Is het niet te primitief?
Heel veel hoepeltjes om doorheen te springen, alleen al voor de meest basale tracking
## Welke problemen willen we de studenten wel/niet zelf laten oplossen?
Voornamelijk