* Our team has created a scene detection using OpticalFlow to reset the tracker when the scene changes in this competition
* This code was created based on a great notebook here[https://www.kaggle.com/daigohirooka/optical-flow-estimation-using-raft]

# Scene Change Detection using Optical Flow

# What's Optical flow?

> Optical flow or optic flow is the pattern of apparent motion of objects, surfaces, and edges in a visual scene caused by the relative motion between an observer and a scene. - https://en.wikipedia.org/wiki/Optical_flow

In [None]:
import os
import sys
sys.path.append('/kaggle/input/raft-pytorch')
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch

from glob import glob
from PIL import Image
from tqdm import tqdm

In [None]:
DEBUG = False


#small_rate = 0.5 # image size to (360, 640, 3) estimated elapsed time total = 0.386 [h]
small_rate = 0.25 # image size to (180, 320, 3) estimated elapsed time total = 0.244 [h]
#small_rate = 0.2 # image size to (144, 256, 3) estimated elapsed time total = 0.753 [h]


# RAFT introduction

I introduce the model: **RAFT: Recurrent All-Pairs Field Transforms for Optical Flow** which is originally introduced in ECCV2020 by Teed et. al. in Princeton University and prized Best Paper Award!.
* https://arxiv.org/abs/2003.12039
* https://github.com/princeton-vl/RAFT (licensed under the BSD 3-Clause License)

Briefly, RAFT has below features
* Recurrent optical flow estimation
* Compute pixel-wise correlation between pair-wise input images and reuse it in the following recurrent step
* Lightweight, rapid inference, and high accuracy

![RAFT architecture image from https://github.com/princeton-vl/RAFT](https://github.com/princeton-vl/RAFT/raw/master/RAFT.png)

This is [my explanation slide](https://speakerdeck.com/daigo0927/raft-recurrent-all-pairs-field-transforms-for-optical-flow) in Japanese.

# Run RAFT on sample images

In [None]:
from raft.core.raft import RAFT
from raft.core.utils import flow_viz
from raft.core.utils.utils import InputPadder
from raft.config import RAFTConfig

In [None]:
config = RAFTConfig(
    dropout=0,
    alternate_corr=False,
    small=False,
    mixed_precision=False
)

model = RAFT(config)
model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

weights_path = '/kaggle/input/raft-pytorch/raft-sintel.pth'
# weights_path = '/kaggle/input/raft-pytorch/raft-things.pth'

ckpt = torch.load(weights_path, map_location=device)
model.to(device)
model.load_state_dict(ckpt)

In [None]:
image_files = glob('/kaggle/input/raft-pytorch/raft/demo-frames/*.png')
image_files = sorted(image_files)

print(f'Found {len(image_files)} images')
print(sorted(image_files))

In [None]:
def load_image(imfile, device):
    img = np.array(Image.open(imfile)).astype(np.uint8)
    img = torch.from_numpy(img).permute(2, 0, 1).float()
    return img[None].to(device)


def viz(img1, img2, flo):
    img1 = img1[0].permute(1,2,0).cpu().numpy()
    img2 = img2[0].permute(1,2,0).cpu().numpy()
    
    flo_mean = np.mean(flo.cpu().numpy())
    flo_std = np.std(flo.cpu().numpy())
    
    flo = flo[0].permute(1,2,0).cpu().numpy()
    
    # map flow to rgb image
    flo = flow_viz.flow_to_image(flo)
    
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 4))
    ax1.set_title('input image1')
    ax1.imshow(img1.astype(int))
    ax2.set_title('input image2')
    ax2.imshow(img2.astype(int))
    ax3.set_title(f'optical flow: mean{flo_mean:.1f}, std{flo_std:.1f}')
    cm = ax3.imshow(flo)
    fig.colorbar(cm)
#    plt.colorbar()
    plt.show()

In [None]:
model.eval()
n_vis = 3

for file1, file2 in tqdm(zip(image_files[:n_vis], image_files[1:1+n_vis])):
    image1 = load_image(file1, device)
    image2 = load_image(file2, device)

    padder = InputPadder(image1.shape)
    image1, image2 = padder.pad(image1, image2)
    
    with torch.no_grad():
        flow_low, flow_up = model(image1, image2, iters=20, test_mode=True)
        
    viz(image1, image2, flow_up)

The first and second columns are input paired images and right column is the predicted optical flow.

In [None]:
import pandas as pd
import cv2 

TRAIN_PATH_IMG = '/kaggle/input/tensorflow-great-barrier-reef/'

def get_path(row):
#    row['image_path'] = f'{TRAIN_PATH_IMG}/clahe_img/video_{row.video_id}_{row.video_frame}.jpg'
    row['image_path'] = f'{TRAIN_PATH_IMG}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    return row

df = pd.read_csv(f'/kaggle/input/tensorflow-great-barrier-reef/train.csv')
                 
#Path of images
df = df.apply(get_path, axis=1)
df.head(5)


In [None]:
df.video_id.value_counts()

In [None]:
df.sequence.value_counts()

In [None]:
frames = [] # same sequence

for i in range(20):
    img = cv2.imread(df.image_path[i])[:,:,::-1]
    img = cv2.resize(img, dsize=None, fx=small_rate, fy=small_rate)
    frames.append(img)
frames = np.stack(frames, axis=0)
print (img.shape)

In [None]:
frames2 = [] # include sequence change
for i in range(20):
    img = cv2.imread(df.image_path[i + 475])[:,:,::-1]
    img = cv2.resize(img, dsize=None, fx=small_rate, fy=small_rate)
    frames2.append(img)

frames2 = np.stack(frames2, axis=0)

In [None]:
frames3 = [] # include video change
for i in range(20):
    img = cv2.imread(df.image_path[i + 6705])[:,:,::-1]
    img = cv2.resize(img, dsize=None, fx=small_rate, fy=small_rate)
    frames3.append(img)

frames3 = np.stack(frames3, axis=0)

# Same sequence

In [None]:
#%%time
import time

start = time.time()

n_vis = 10
for i in range(n_vis):
    image1 = torch.from_numpy(frames[i]).permute(2, 0, 1).float().to(device)
    image2 = torch.from_numpy(frames[i+1]).permute(2, 0, 1).float().to(device)
    
    image1 = image1[None].to(device)
    image2 = image2[None].to(device)

    padder = InputPadder(image1.shape)
    image1, image2 = padder.pad(image1, image2)
    
    with torch.no_grad():
        flow_low, flow_up = model(image1, image2, iters=20, test_mode=True)
        
    viz(image1, image2, flow_up)
    

end = time.time()
print (f'elapsed time= {(end - start) / n_vis:.3f} [sec/frame]')

# sequence change

In [None]:
#%%time

start = time.time()

for i in range(n_vis):
    image1 = torch.from_numpy(frames2[i]).permute(2, 0, 1).float().to(device)
    image2 = torch.from_numpy(frames2[i+1]).permute(2, 0, 1).float().to(device)
    
    image1 = image1[None].to(device)
    image2 = image2[None].to(device)

    padder = InputPadder(image1.shape)
    image1, image2 = padder.pad(image1, image2)
    
    with torch.no_grad():
        flow_low, flow_up = model(image1, image2, iters=10, test_mode=True)
        
    viz(image1, image2, flow_up)
    
end = time.time()

print (f'elapsed time= {(end - start) / n_vis:.3f} [sec/frame]')

# video change

In [None]:
start = time.time()

for i in range(n_vis):
    image1 = torch.from_numpy(frames3[i]).permute(2, 0, 1).float().to(device)
    image2 = torch.from_numpy(frames3[i+1]).permute(2, 0, 1).float().to(device)
    
    image1 = image1[None].to(device)
    image2 = image2[None].to(device)

    padder = InputPadder(image1.shape)
    image1, image2 = padder.pad(image1, image2)
    
    with torch.no_grad():
        flow_low, flow_up = model(image1, image2, iters=10, test_mode=True)
        
    viz(image1, image2, flow_up)
    
end = time.time()

print (f'elapsed time= {(end - start) / n_vis:.3f} [sec/frame]')

# elapsed time without display

In [None]:
#%%time

start = time.time()

for i in range(n_vis):
    image1 = torch.from_numpy(frames2[i]).permute(2, 0, 1).float().to(device)
    image2 = torch.from_numpy(frames2[i+1]).permute(2, 0, 1).float().to(device)
    
    image1 = image1[None].to(device)
    image2 = image2[None].to(device)

    padder = InputPadder(image1.shape)
    image1, image2 = padder.pad(image1, image2)
    
    with torch.no_grad():
        flow_low, flow_up = model(image1, image2, iters=10, test_mode=True)
        
    #viz(image1, image2, flow_up)
    
end = time.time()

print (f'elapsed time= {(end - start) / n_vis:.3f} [sec/frame]')

In [None]:
print (f'estimated elapsed time total = {((end - start) / n_vis) * 13499 / 60 / 60 :.3f} [h]')

# predict all images

In [None]:
if DEBUG:
    df = df[:1000]

In [None]:
%%time

for i in range(len(df.image_path) -1):
    
    img1 = cv2.imread(df.image_path[i])[:,:,::-1]
    img1 = cv2.resize(img1, dsize=None, fx=small_rate, fy=small_rate)
    
    img2 = cv2.imread(df.image_path[i+1])[:,:,::-1]
    img2 = cv2.resize(img2, dsize=None, fx=small_rate, fy=small_rate)
    
    image1 = torch.from_numpy(img1).permute(2, 0, 1).float().to(device)
    image2 = torch.from_numpy(img2).permute(2, 0, 1).float().to(device)
    
    image1 = image1[None].to(device)
    image2 = image2[None].to(device)

    padder = InputPadder(image1.shape)
    image1, image2 = padder.pad(image1, image2)
    
    with torch.no_grad():
        flow_low, flow_up = model(image1, image2, iters=10, test_mode=True)
        
    flow_up = flow_up.cpu().numpy()
    df.loc[i, 'flow_mean'] = np.mean(flow_up)
    df.loc[i, 'flow_std'] = np.std(flow_up)
    df.loc[i, 'flow_med'] = np.median(flow_up)
    

# scene change detection from stats of optical flow

In [None]:
fig, ax1 = plt.subplots(figsize=(20,5))
ax2 = ax1.twinx()
ax1.plot(df.sequence, label='seq_id', c='g',linewidth=0.5)
ax2.plot(df.flow_mean, 'o', label='mean')
plt.legend()

In [None]:
fig, ax1 = plt.subplots(figsize=(20,5))
ax2 = ax1.twinx()

ax1.plot(df.sequence, label='seq_id', c='g',linewidth=0.5)
ax2.plot(df.flow_med, 'o', label='median')

ax1.set_ylabel(r'sequence id')
ax2.set_ylabel(r'flow value')

plt.legend()

In [None]:
fig, ax1 = plt.subplots(figsize=(20,5))
ax2 = ax1.twinx()
ax1.plot(df.sequence, label='seq_id', c='g',linewidth=0.5)
ax2.plot(df.flow_std, 'o', label='std')
plt.legend()

ax1.set_ylabel(r'sequence id')
ax2.set_ylabel(r'flow value')

In [None]:
import seaborn as sns
plt.hist(df.flow_std, bins=100);

In [None]:
df.to_csv('opt_flow.csv', index=False)