In [2]:
%load_ext autoreload
%autoreload 2

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import torch
import torchvision.io

### Load videos and compute NNFDIV

In [3]:
from common_utils.video import read_original_video
from nnf_evaluation import resize_by_target_size, get_nnfdiv

vid_orig = read_original_video('./data/airbaloons/original', start_frame=0, end_frame=74)
vid_orig = resize_by_target_size(vid_orig, target_shape=144)

vid_vgpnn = read_original_video('./data/airbaloons/vgpnn/1', start_frame=0, end_frame=74)
vid_vgpnn = resize_by_target_size(vid_vgpnn, target_shape=144)

vid_sinfusion = read_original_video('./data/airbaloons/sinfusion/1', start_frame=0, end_frame=74)
vid_sinfusion = resize_by_target_size(vid_sinfusion, target_shape=144)

nnf_vgpnn, nnfdiv = get_nnfdiv(vid_vgpnn, vid_orig)
print('NNFDIV VGPNN:', nnfdiv)
nnf_sinfusion, nnfdiv = get_nnfdiv(vid_sinfusion, vid_orig)
print('NNFDIV SinFusion:', nnfdiv)

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


NNFDIV VGPNN: 0.17407441448750027
NNFDIV SinFusion: 0.5798958891955985


### Visualizing the NNF

In [14]:
from IPython.display import HTML
from torchvision.transforms.functional import equalize
from common_utils.video import html_vid

def nnf2rgb(nnf):
    """ 
        @param nnf of shape: 1CTHW
        Do histogram equalization on the NNF (for visualization purposes only..) 
        and convert to numpy video.
    """
    n,c,t,h,w = nnf.shape
    # equalize NNF
    nnf_eq = equalize(nnf.contiguous().to(torch.uint8).view(n,c,t*h,w)).view(n,c,t,h,w)
    # return as uint8 with shape THWC (which is the usual format for numpy videos)
    return nnf_eq[0].permute(1,2,3,0).contiguous().to('cpu', torch.uint8)

nnf_rgb = nnf2rgb(nnf_vgpnn)
HTML(html_vid(nnf_rgb).to_html5_video())

# run code below to save video (need to install "av"):
# torchvision.io.write_video('./nnf_vid.mp4', nnf_rgb, fps=15)

#### Code for reproducing the NNF maps in [SinFusion Project Page](https://yaniv.nikankin.com/sinfusion/static/experiments.html)
As opposed to the NNF map viz above, The NNF maps in the SinFusion project page:
1. Only show the NNF in the "TH plane".
2. Show the ["flow wheel"](https://people.csail.mit.edu/celiu/OpticalFlow/) colors with flow-viz (need to [pip install flow_viz](https://pypi.org/project/flow-vis/))

In [4]:
from IPython.display import HTML
from tqdm.auto import tqdm
from common_utils.video import html_vid
import flow_vis

def nnf_th2rgb(nnf):
    flows = []
    for t in tqdm(range(nnf.shape[2])):
        nnf_th = nnf[0, [1,2], t, :, :]
        flowviz_x = flow_vis.flow_to_color(nnf_th.permute(1,2,0).cpu().numpy(), convert_to_bgr=False)
        flows.append(torch.from_numpy(flowviz_x))
    return torch.stack(flows)

torchvision.io.write_video('./NNF_TH.mp4', nnf_th2rgb(nnf_vgpnn), fps=15)

  0%|          | 0/73 [00:00<?, ?it/s]

### Computing the NNFDIV from an NNF

This is pretty straightforward - it is simply the compression ratio of the NNF (which gives an upper bound on the minimal description length of the flow field).

The idea is that this penalizes "simple" generated samples, that are just some "shuffling" of the original image or video (which is the case in [VGPNN](https://nivha.github.io/vgpnn), [GPNN](https://www.wisdom.weizmann.ac.il/~vision/gpnn) and [SinGAN](https://arxiv.org/abs/1905.01164) or other methods for generation from a single image).

In [16]:
import zlib
def zlib_score(nnf):
    x = nnf.to('cpu', torch.uint8).contiguous().numpy()
    return len(zlib.compress(x)) / x.size

zlib_score(nnf_vgpnn), zlib_score(nnf_sinfusion)

(0.17411593929882824, 0.5797582749580574)