In [1]:
# google: torchvision optical flow

from pathlib import Path
import numpy as np
from skimage import io
from skimage.transform import rescale, resize
from scipy.ndimage import gaussian_filter
from scipy import ndimage

import torch
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
import torchvision.transforms as T

imginfo = lambda img: print(type(img), img.dtype, img.shape, img.min(), img.max())

  warn(


In [2]:
from torchvision.models.optical_flow import raft_large

device = "cpu"
model = raft_large(pretrained=True, progress=False).to(device)
model = model.eval()



In [8]:
# === load first touch coors
import json
with open("data/single_log.json") as f:
    episode_log = json.load(f)
first_x, first_y, first_z = map(int, episode_log["first_touch"])
cropw = 200

def cut_crop(image):
    """
    :param image: [h, w, c]
    """
    return image[first_y-cropw:first_y+cropw, first_x-cropw:first_x+cropw]

In [9]:
# === Load images
file_list = sorted(Path("data/frames").glob("center*jpg"))
print("image list:", len(file_list))

i1 = io.imread(file_list[2]).astype(np.float32) / 255 # [h, w, 3] float32 [0, 1]
i2 = io.imread("data/frames/grip_image.jpg").astype(np.float32) / 255

i1 = io.imread(file_list[2]).astype(np.float32) / 255 # [h, w, 3] float32 [0, 1]
#i2 = io.imread("data/frames/grip_image.jpg").astype(np.float32) / 255
i2 = io.imread(file_list[3]).astype(np.float32) / 255

io.imsave("data/image_1.jpg", (i1 * 255).astype(np.uint8))
io.imsave("data/image_2.jpg", (i2 * 255).astype(np.uint8))

image list: 13


In [13]:
# === prepare batch for RAFT
def to_batch(image):
    batch = torch.from_numpy(image)[None, :, :, :].to(torch.float32) / 255  # [h, w, c] -> [1, h, w, c]
    batch = batch.permute(0, 3, 1, 2)  # [1, h, w, c] -> [1, c, h, w]
    
    transforms = T.Compose(
        [
            T.ConvertImageDtype(torch.float32),
            T.Normalize(mean=0.5, std=0.5),  # map [0, 1] into [-1, 1]
            #T.Resize(size=(520, 960)),
        ]
    )
    batch = transforms(batch)
    return batch

img1_batch = to_batch(i2)
img2_batch = to_batch(i1)
imginfo(img1_batch)

<class 'torch.Tensor'> torch.float32 torch.Size([1, 3, 720, 1280]) tensor(-1.) tensor(-0.9922)


In [14]:
# === compute flow with RAFT
from torchvision.utils import flow_to_image

def compute_flow_picture(img1_batch, img2_batch):
    with torch.no_grad():
        list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
    print(f"length = {len(list_of_flows)} = number of iterations of the model")
    predicted_flows = list_of_flows[-1].detach()

    flow_imgs = flow_to_image(predicted_flows)
    
    # The images have been mapped into [-1, 1] but for plotting we want them in [0, 1]
    img1_batch = [(img1 + 1) / 2 for img1 in img1_batch]
    
    grid = [[img1, flow_img] for (img1, flow_img) in zip(img1_batch, flow_imgs)]
    
    return grid[0][1]
    

In [15]:
# === save image from RAFT
flow = compute_flow_picture(img1_batch, img2_batch)

def save_torch(path, tensor):
    """
    tensor: [c, h, w] cuda float32 [0, 1]
    """
    img = (tensor.to("cpu").permute([1, 2, 0]).numpy() * 255).astype(np.uint8)
    io.imsave(path, img)

save_torch("data/raft_flow.jpg", flow)

length = 12 = number of iterations of the model


## Try segment the flow (example #0)

In [None]:
# === try #2
i1 = io.imread(file_list[2]).astype(np.float32) / 255 # [h, w, 3] float32 [0, 1]
#i2 = io.imread("data/frames/grip_image.jpg").astype(np.float32) / 255
i2 = io.imread(file_list[3]).astype(np.float32) / 255
img1_batch = to_batch(i2)
img2_batch = to_batch(i1)
flow = compute_flow_picture(img1_batch, img2_batch)
save_torch("data/raft_flow_2.jpg", flow)

In [11]:
# idea is to take the moving region closest to the center.
# For that I first threshold the moving part,
# then take histogram of distances from the center.
def compute_flow(img1_batch, img2_batch):
    with torch.no_grad():
        list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
    print(f"length = {len(list_of_flows)} = number of iterations of the model")
    predicted_flows = list_of_flows[-1].detach()

    return predicted_flows

In [12]:
flow = compute_flow(img1_batch, img2_batch)

length = 12 = number of iterations of the model


In [13]:
imginfo(flow) # (u, v) - horizontal, vertical

<class 'torch.Tensor'> torch.float32 torch.Size([1, 2, 720, 1280]) tensor(-12.4233) tensor(3.5777)


In [14]:
# magnitude
mag = (flow[:, 0, :, :] ** 2 + flow[:, 1, :, :] ** 2) ** 0.5
imginfo(mag)

<class 'torch.Tensor'> torch.float32 torch.Size([1, 720, 1280]) tensor(2.5050e-05) tensor(12.4610)


In [15]:
# hint: quantile
q = torch.tensor([0.5, 0.9, 0.95, 0.99])
print(torch.quantile(mag, q)) # distribution from whole image
print(torch.quantile(mag[mag > 1], q)) # distribtuion from moving region
# conclusion: I dont' understand the unit of flow

tensor([ 0.0323,  0.1536,  2.1189, 11.4040])
tensor([ 9.9604, 11.6004, 11.8116, 12.0742])


In [16]:
# area of the moving region?
_b, _c, h, w = flow.shape
print(h, w)
pixels = torch.sum(mag > 0.1).numpy()
print(pixels / (h * w))

720 1280
0.14130750868055555


In [17]:
def save_mask(path, tensor):
    """
    tensor: [h, w] cuda float32 [0, 1]
    """
    img = (tensor.to("cpu").numpy() * 255).astype(np.uint8)
    io.imsave(path, img)

THR = 1
mask = (mag > THR)
save_mask("data/mask.jpg", mask[0])