In [3]:
import cv2
import h5py
import numpy as np
from PIL import Image
from tqdm import tqdm

In [4]:
CROP_DIM = (0, 40, 640, 360)
NEW_SIZE = (200, 100)

def op_flow_raw(img0, img1):
    gray0 = cv2.cvtColor(img0, cv2.COLOR_BGR2GRAY)
    gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(
        gray0, gray1, None, 0.5, 3, 5, 3, 5, 1.1, 0
    )
    return flow

def op_flow_bgr(flow, shape):
    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    mask = np.zeros(shape, dtype=np.uint8)
    mask[..., 0] = ang * 180 / np.pi / 2    # direction
    mask[..., 1] = 255                      # full saturation
    mask[..., 2] = cv2.normalize(           # intensity
        mag, None, 0, 255, cv2.NORM_MINMAX
    )
    bgr = cv2.cvtColor(mask, cv2.COLOR_HSV2BGR)
    return bgr

def format_frame(frame):
    img = Image.fromarray(frame).crop(CROP_DIM).resize(NEW_SIZE)
    return np.array(img)

def split_video(video):
    frames, op_flows, op_bgrs = [], [], []
    cap = cv2.VideoCapture(video)
    pbar = tqdm(total=cap.get(cv2.CAP_PROP_FRAME_COUNT), position=0, leave=2)
    ret, frame1 = cap.read()
    frame1 = format_frame(frame1)
    while ret:
        ret, frame2 = cap.read()
        if ret:
            frame2 = format_frame(frame2)
            frames.append(frame2)
            op_flow = op_flow_raw(frame1, frame2)
            op_flows.append(op_flow)
            op_bgr = op_flow_bgr(op_flow, frame2.shape)
            op_bgrs.append(op_bgr)
            frame1 = frame2
            pbar.update()
        else:
            print('Finished saving '+video)
    return np.array(frames), np.array(op_flows), np.array(op_bgrs)

In [5]:
with h5py.File('./data/train.h5', 'w') as f:
    frm, opf, opv = split_video('./data/train.mp4')
    f.create_dataset('FRM', frm.shape, data=frm)
    f.create_dataset('OPF', opf.shape, data=opf)
    f.create_dataset('OPV', opv.shape, data=opv)
f.close()

100%|█████████▉| 20399/20400.0 [02:57<00:00, 108.61it/s]

Finished saving ./data/train.mp4


In [6]:
with h5py.File('./data/train.h5', 'r') as f:
    frm = f['FRM']
    opv = f['OPV']
    for a, b in zip(frm, opv):
        cv2.imshow('FRM', a)
        cv2.imshow('OPV', b)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
f.close()

In [1]:
import cv2
import h5py
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class SpeedDataset(Dataset):
    def __init__(self, filename, transform=None):
        super(SpeedDataset, self).__init__()

        self.file = h5py.File(filename, 'r')
        self.transform = transform

    def __len__(self):
        return len(self.file.keys())

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # retrieve from h5py
        img = np.array(self.file['IMG'+str(idx)])
        flw = np.array(self.file['FLW'+str(idx)])

        # apply transform
        if self.transform is not None:
            img = self.transform(img).permute(1, 2, 0)
            flw = self.transform(flw).permute(1, 2, 0)

        return img, flw

dataset = SpeedDataset(
    './data/train.h5',
    transforms.Compose([
        transforms.ToTensor()
    ])
)

dataloader = DataLoader(
    dataset,
    batch_size=1200,
    shuffle=False,
    num_workers=0
)

count = 0
for images, flows in dataloader:
    for img in flows:
        cv2.imshow('train', img.numpy())
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
    if (count := count + 1) > 0:
        break