# Fast Moving Object (FMO) detection

Based on [Rozumnyi Kotera Sroubek Novotny Matas, CVPR 2017, "The World of Fast Moving Objects"](https://arxiv.org/pdf/1611.07889.pdf).

Initial implementation of the `detector` algorithm.

In [2]:
import cv2
import math
import numpy as np
import os


BASE = os.path.join(os.getenv('HOME'), 'Videos')
VIDEO = os.path.join(BASE, 'Tennis-Sep2016.mov')
IMG_TEMPLATE = os.path.join(BASE, 'tennis-frame-{}.png')

## Extract sequence of frames from video

While the video plays, hit any key to move forward, frame-by-frame, use `s` to save the frame and `q` to stop.
The first three frames saved will be used in the subsequent detection phase

In [11]:
def extract_img(video, img_name):
    cap = cv2.VideoCapture(video)

    idx = 1
    while cap.isOpened():
        ret, frame = cap.read()
        cv2.imshow('tennis', frame)
        key = cv2.waitKey(0) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('s'):
            img = img_name.format(idx)
            cv2.imwrite(img, frame)
            idx += 1

    cap.release()
    cv2.destroyAllWindows()

In [12]:
# Uncomment to extract fresh images to process.
#extract_img(VIDEO, IMG_TEMPLATE)

In [13]:
images = [IMG_TEMPLATE.format(idx) for idx in range(1, 4)]

im_tm1 = cv2.imread(images[0])
im_t = cv2.imread(images[1])
im_tp1 = cv2.imread(images[2])

delta_plus = cv2.absdiff(im_t, im_tm1)
delta_0 = cv2.absdiff(im_tp1, im_tm1)
delta_minus = cv2.absdiff(im_t,im_tp1)

cv2.imshow('delta_plus', delta_plus)
cv2.imshow('delta_minus', delta_minus)
cv2.imshow('delta_0', delta_0)

cv2.waitKey(15000)
cv2.destroyAllWindows()

In [7]:
def norm(img, row, col, channels=3):
    "Computes the Euclidean norm of the given pixel in the image."
    sum_sqr = 0
    for ch in range(channels):
        px = img.item(row, col, ch)
        sum_sqr += px * px
    return math.sqrt(sum_sqr) / channels

In [8]:
def threshold(img, threshold, invert=False):
    """ Computes a binary image, given a threshold.
    
    For all pixels in `img` that are greater than or equal to `threshold`
    it will emit a `255` pixel in the same position; `0` otherwise.
    
    If `invert` is `True`, the opposite holds.
    
    :type img: Mat
    :type threshold: float
    :type invert: bool
    """
    rows, columns, channels = img.shape
    res = np.zeros((rows, columns, 1), dtype = "uint8")

    for row in range(rows):
        for col in range(columns):
            xp = norm(img, row, col, channels)
            if xp > threshold:
                res[row][col][0] = 255 if not invert else 0
            else:
                res[row][col][0] = 0 if not invert else 255
    return res

In [14]:
def combine (dbp, dbm, db0r):
    """Combines the three binary images.
    
    If the corresponding pixel in all three images is non-zero, a "black" value is emitted
    in the result image, otherwise a "white" value.
    
    The resultant image should be a "negative" image of moving objects in the original frames.
    """
    rows, columns, channels = dbp.shape
    res = np.zeros((rows, columns, 1), dtype = "uint8")

    for row in range(rows):
        for col in range(columns):
            res[row][col][0] = 0 if dbp[row][col][0] > 0 and \
                dbm[row][col][0] > 0 and \
                db0r[row][col][0] > 0 else 255
    return res

## Implementation of the Detector algorithm

See section `4.1` of the reference, we essentially compute:

    Delta = Delta+ & Delta- & ~Delta0
    
 to isolate "candidate FMOs" (connected components of non-zero pixels).
 
 Press any key to close the image and terminate the notebook.

In [16]:
sp = cv2.meanStdDev(delta_plus)
sm = cv2.meanStdDev(delta_minus)
s0 = cv2.meanStdDev(delta_0)
print("E(d+):", sp, "\nE(d-):", sm, "\nE(d0):", s0)


th = [
    sp[0][0][0] + 3 * math.sqrt(sp[1][0][0]),
    sm[0][0][0] + 3 * math.sqrt(sm[1][0][0]),
    s0[0][0][0] + 3 * math.sqrt(s0[1][0][0]),
]

print("Thresholds:", th)

dbp = threshold(delta_plus, th[0])
dbm = threshold(delta_minus, th[1])
db0 = threshold(delta_0, th[2], invert=True)

detect = combine(dbp, dbm, db0)

cv2.imshow('Detector', detect)
cv2.waitKey(0)

cv2.imwrite(IMG_TEMPLATE.format('final'), detect)
cv2.destroyAllWindows()

E(d+): (array([[ 2.7639464 ],
       [ 2.75421224],
       [ 2.96147352]]), array([[ 7.67804637],
       [ 7.92485327],
       [ 8.36811816]])) 
E(d-): (array([[ 3.0971365 ],
       [ 3.13435872],
       [ 3.34222222]]), array([[ 8.75848307],
       [ 9.10515107],
       [ 9.51610271]])) 
E(d0): (array([[ 4.56722656],
       [ 4.54775716],
       [ 4.85450087]]), array([[ 12.39641696],
       [ 12.42174856],
       [ 13.0075392 ]]))
Thresholds: [11.076732774848583, 11.975556834941777, 15.129790294789636]
