In [2]:
from collections import namedtuple, deque
import os

import numpy as np
import matplotlib.pyplot as plt
import cv2
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as trans

import vmdata
import more_trans
import more_sampler
import exprlib
import utils

from ezfirstae.loaddata import SlidingWindowBatchSampler

%matplotlib inline

In [3]:
feature_params = {
    'blockSize': 7,
    'maxCorners': 100,
    'minDistance': 7,
    'qualityLevel': 0.3,
    'mask': None,
}
lk_params = {
    'criteria': (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03),
}
prmy_params = {
    'winSize': (15, 15),
    'maxLevel': 8,
}
fb_params = (
    0.5,
    1,
    15,
    3,
    5,
    1.2,
    0,
)

In [14]:
root = vmdata.prepare_dataset_root(9, (8, 0, 0))
#normalize = trans.Normalize(*vmdata.get_normalization_stats(root, bw=True))

basedir = 'data.experiments-cv2flow/multiscale'
os.makedirs(basedir, exist_ok=True)


class MultiscaleFlowLauncher(exprlib.ExperimentLauncher):
    def __init__(self):
        super().__init__(basedir, prefix='flow_', suffix='.npz',
                         translate_kwargs={
                             'root': str,
                             'indices': list,
                             'prmy_params': dict,
                             'fb_params': list,
                         })
    def load_result(self, filename):
        data = np.load(filename)
        flows = [data[str(j)] for j in range(len(data.keys()))]
        del data
        return flows,
    
    def store_result(self, filename, *args):
        scales_flows, = args
        scales_flows = dict((str(j), f) for j, f in enumerate(scales_flows))
        np.savez(filename, **scales_flows)
    
    def run(self, **kwargs):
        root = kwargs['root']
        indices = kwargs['indices']
        prmy_params = kwargs['prmy_params']
        fb_params = kwargs['fb_params']
        
        flows = []
        window = deque(maxlen=2)
        with vmdata.VideoDataset(root) as vdset:
            for j in indices:
                im = vdset[j]
                im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
                pr = cv2.buildOpticalFlowPyramid(im, **prmy_params)[1][::2]
                pr = iter(l[..., np.newaxis] for l in pr)
                window.append(pr)
                
                if len(window) < 2:
                    continue
                
                for k, (i1, i2) in enumerate(zip(*window)):
                    f = cv2.calcOpticalFlowFarneback(i1, i2, None, *fb_params)
                    while k >= len(flows):
                        flows.append([])
                    flows[k].append(f)
        flows = list(map(np.stack, flows))
        return flows,


def visualize_flows(flows, filename, title='', fps=3):
    """
    Visualize flows in HSV.
    
    :param flows: flow data of shape (N, H, W)
    :param filename: video file to write to
    """
    if not flows.shape[0]:
        return
    hw = flows[0].shape[:2]
    wh = tuple(hw[::-1])
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    if not title:
        title = 'flow'
    
    hsv = np.zeros(flows[0].shape[:2] + (3,), dtype=np.uint8)
    hsv[..., 1] = 255
    with utils.videowritercontext(filename, fourcc, fps, wh) as outfile:
        for f in flows:
            mag, ang = cv2.cartToPolar(f[..., 0], f[..., 1])
            hsv[..., 0] = ang * 180 / np.pi / 2
            hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
            rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
            outfile.write(rgb)

def visualize_flows_all_scales(flows_scales, basefilename, fps=3):
    for s, flows in enumerate(flows_scales):
        filename = basefilename + '.sc{}.avi'.format(s)
        visualize_flows(flows, filename, title='flow_scale{}'.format(s), fps=fps)

In [5]:
launcher = MultiscaleFlowLauncher()
flows, = launcher(root=root, indices=range(1000),
                  prmy_params=prmy_params, fb_params=fb_params)

In [15]:
visualize_flows_all_scales(flows, launcher.result_filename)

Conclusion:
Gunnar Farneback dense optical flow produces worse quality under individual scale than in a pyramid of scales.
When the scale is small, dolphin is polluted by water movements.
When the scale is large, dolphin, when far from camera, is not captured.