In [None]:
import h5py
import numpy as np
import cv2 as cv
from PIL import Image, ImageEnhance
from tqdm import tqdm

In [None]:
TRAIN_PATH = r'.\data\train.mp4'
LABEL_PATH = r'.\data\train.txt'
H5PY_PATH  = r'.\data\train.h5'

In [None]:
def flow_from_frames(img0, img1):
    """
    Computes dense optical flow between frames using Farneback method
    """
    gray0 = cv.cvtColor(img0, cv.COLOR_BGR2GRAY)
    gray1 = cv.cvtColor(img1, cv.COLOR_BGR2GRAY)
    flow = cv.calcOpticalFlowFarneback(
        gray0, gray1, None, 0.5, 1, 15, 2, 5, 1.3, 0
    )
    return flow

def bgr_from_flow(flow):
    """
    Generates BGR representation of optical flow given flow
    """
    mag, ang = cv.cartToPolar(flow[...,0], flow[...,1])
    hsv = np.zeros((mag.shape[0], mag.shape[1], 3)).astype(np.uint8)
    hsv[...,0] = ang * 180 / np.pi / 2
    hsv[...,1] = 255
    hsv[...,2] = cv.normalize(mag, None, 0, 255, cv.NORM_MINMAX)
    bgr = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
    return bgr

def bgr_from_frames(img0, img1):
    """
    Generates BGR representation of optical flow given two frames
    """
    return bgr_from_flow(flow_from_frames(img0, img1))

def crop_frame(img, crop_dim, size_dim=None):
    """
    Crops image to given dimensions
    dim = (left, top, right, bottom)
    """
    img = Image.fromarray(img)
    img = img.crop(crop_dim)
    return np.array(img)

def resize_frame(img, dim):
    """
    Resize image to given dimensions
    dim = (width, height)
    """
    img = Image.fromarray(img)
    img = img.resize(dim)
    return np.array(img)

def crop_resize(img, crop_dim, size_dim):
    """
    Crop and resize frame
    """
    img = crop_frame(img, crop_dim)
    img = resize_frame(img, size_dim)
    return img

def augment_frame(img, low=0.5, high=1.5):
    """
    Augments brightness and color of image to avoid overfitting
    """
    factor = np.random.uniform(low, high)
    img = Image.fromarray(img)
    img = ImageEnhance.Brightness(img).enhance(factor)
    img = ImageEnhance.Color(img).enhance(factor)
    return np.array(img)

In [None]:
# data dimensions
TRAIN_COUNT = 20400
CROP_DIM = (0, 40, 640, 360)
RESIZE_DIM = (300, 150)
FRAME_CHANNELS = 3
FLOW_CHANNELS = 2

# raw train data
lbl = np.loadtxt(LABEL_PATH)
cap = cv.VideoCapture(TRAIN_PATH)

# train datasets
file = h5py.File(H5PY_PATH, 'w')
frames = file.create_dataset(
    'FRM',
    shape=(TRAIN_COUNT-1, RESIZE_DIM[1], RESIZE_DIM[0], FRAME_CHANNELS*2),
    maxshape=(None, RESIZE_DIM[1], RESIZE_DIM[0], FRAME_CHANNELS*2),
    chunks=(1, RESIZE_DIM[1], RESIZE_DIM[0], FRAME_CHANNELS*2),
    dtype=np.uint8
)
op_flows = file.create_dataset(
    'OPF',
    shape=(TRAIN_COUNT-1, RESIZE_DIM[1], RESIZE_DIM[0], FLOW_CHANNELS),
    maxshape=(None, RESIZE_DIM[1], RESIZE_DIM[0], FLOW_CHANNELS),
    chunks=(1, RESIZE_DIM[1], RESIZE_DIM[0], FLOW_CHANNELS),
    dtype='f4'
)
speeds = file.create_dataset(
    'LBL', data=lbl[1:], dtype='f4'
)

# initial frame
ret, img0 = cap.read()
img0 = crop_resize(img0, CROP_DIM, RESIZE_DIM)

pbar = tqdm(total=TRAIN_COUNT-1, position=0, leave=2)
while True:
    frame_id = int(cap.get(1))-1
    ret, img1 = cap.read()
    if cv.waitKey(1) & 0xFF == ord('q') or not ret:
        break
    img1 = crop_resize(img1, CROP_DIM, RESIZE_DIM)
    flow = flow_from_frames(img0, img1)
    # cv.imshow('', img1)
    file['FRM'][frame_id] = np.concatenate(
        (img0/255., img1/255.), axis=2
    )
    file['OPF'][frame_id] = flow
    img0 = img1
    pbar.update()
cv.destroyAllWindows()

file.close()
