In [29]:
import h5py
import keras
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import *
from sklearn.model_selection import KFold

In [30]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14631663233457597930
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 5060693856
locality {
  bus_id: 1
  links {
  }
}
incarnation: 14971784184129370023
physical_device_desc: "device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


In [31]:
TRAIN_PATH = r'.\data\train.mp4'
LABEL_PATH = r'.\data\train.txt'
H5PY_PATH  = r'.\data\train.h5'

In [32]:
def flow_from_frames(img0, img1):
    """
    Computes dense optical flow between frames using Farneback method
    """
    gray0 = cv.cvtColor(img0, cv.COLOR_BGR2GRAY)
    gray1 = cv.cvtColor(img1, cv.COLOR_BGR2GRAY)
    flow = cv.calcOpticalFlowFarneback(
        gray0, gray1, None, 0.5, 1, 15, 2, 5, 1.3, 0
    )
    return flow

def bgr_from_flow(flow):
    """
    Generates BGR representation of optical flow given flow
    """
    mag, ang = cv.cartToPolar(flow[...,0], flow[...,1])
    hsv = np.zeros((mag.shape[0], mag.shape[1], 3)).astype(np.uint8)
    hsv[...,0] = ang * 180 / np.pi / 2
    hsv[...,1] = 255
    hsv[...,2] = cv.normalize(mag, None, 0, 255, cv.NORM_MINMAX)
    bgr = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
    return bgr

def bgr_from_frames(img0, img1):
    """
    Generates BGR representation of optical flow given two frames
    """
    return bgr_from_flow(flow_from_frames(img0, img1))

def crop_frame(img, crop_dim, size_dim=None):
    """
    Crops image to given dimensions
    dim = (left, top, right, bottom)
    """
    img = Image.fromarray(img)
    img = img.crop(crop_dim)
    return np.array(img)

def resize_frame(img, dim):
    """
    Resize image to given dimensions
    dim = (width, height)
    """
    img = Image.fromarray(img)
    img = img.resize(dim)
    return np.array(img)

def crop_resize(img, crop_dim, size_dim):
    """
    Crop and resize frame
    """
    img = crop_frame(img, crop_dim)
    img = resize_frame(img, size_dim)
    return img

def augment_frame(img, low=0.5, high=1.5):
    """
    Augments brightness and color of image to avoid overfitting
    """
    factor = np.random.uniform(low, high)
    img = Image.fromarray(img)
    img = ImageEnhance.Brightness(img).enhance(factor)
    img = ImageEnhance.Color(img).enhance(factor)
    return np.array(img)

In [5]:
# data dimensions
TRAIN_COUNT = 20400
CROP_DIM = (0, 40, 640, 360)
RESIZE_DIM = (300, 150)
FRAME_CHANNELS = 3
FLOW_CHANNELS = 2

# raw train data
lbl = np.loadtxt(LABEL_PATH)
cap = cv.VideoCapture(TRAIN_PATH)

# train datasets
file = h5py.File(H5PY_PATH, 'w')
frames = file.create_dataset(
    'FRM',
    shape=(TRAIN_COUNT-1, RESIZE_DIM[1], RESIZE_DIM[0], FRAME_CHANNELS*2),
    maxshape=(None, RESIZE_DIM[1], RESIZE_DIM[0], FRAME_CHANNELS*2),
    chunks=(1, RESIZE_DIM[1], RESIZE_DIM[0], FRAME_CHANNELS*2),
    dtype=np.uint8
)
op_flows = file.create_dataset(
    'OPF',
    shape=(TRAIN_COUNT-1, RESIZE_DIM[1], RESIZE_DIM[0], FLOW_CHANNELS),
    maxshape=(None, RESIZE_DIM[1], RESIZE_DIM[0], FLOW_CHANNELS),
    chunks=(1, RESIZE_DIM[1], RESIZE_DIM[0], FLOW_CHANNELS),
    dtype='f4'
)
speeds = file.create_dataset(
    'LBL', data=lbl[1:], dtype='f4'
)

# initial frame
ret, img0 = cap.read()
img0 = crop_resize(img0, CROP_DIM, RESIZE_DIM)

pbar = tqdm(total=TRAIN_COUNT-1, position=0, leave=2)
while True:
    frame_id = int(cap.get(1))-1
    ret, img1 = cap.read()
    if cv.waitKey(1) & 0xFF == ord('q') or not ret:
        break
    img1 = crop_resize(img1, CROP_DIM, RESIZE_DIM)
    flow = flow_from_frames(img0, img1)
    # cv.imshow('', img1)
    file['FRM'][frame_id] = np.concatenate(
        (img0/255., img1/255.), axis=2
    )
    file['OPF'][frame_id] = flow
    img0 = img1
    pbar.update()
cv.destroyAllWindows()

file.close()

100%|██████████| 20399/20399 [07:40<00:00, 47.45it/s]

In [37]:
class DataGenerator(keras.utils.Sequence):

    def __init__(
        self, data_path, indexes=None, n_channels=3, batch_size=16, shuffle=True
    ):
        self.batch_size = batch_size
        self.data_path = data_path
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.file = h5py.File(data_path, 'r')
        if indexes is None:
            self.indexes = np.arange(len(self.file['OPF']))
        else:
            self.indexes = indexes

    def __len__(self):
        return int(np.floor(len(self.indexes) / self.batch_size))

    def __getitem__(self, index):
        X, y = self.__data_generation(
            list(self.indexes[index*self.batch_size:(index+1)*self.batch_size])
        )
        return X, y

    def __del__(self):
        self.file.close()

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, index):
        index = list(index)
        index.sort()
        X = np.array(self.file['OPF'][index])
        y = np.array(self.file['LBL'][index])
        return X, y

In [38]:
class SpeedNet(keras.Model):

    def __init__(self, input_dim=(150, 300, 2), kernel_size=3):
        super(SpeedNet, self).__init__()

        self.model = Sequential([
            Input(shape=input_dim),
            Conv2D(8, (kernel_size, kernel_size), activation='relu', data_format='channels_last'),
            MaxPooling2D(pool_size=(1, 2)),
            Conv2D(16, (kernel_size, kernel_size), activation='relu', data_format='channels_last'),
            MaxPooling2D(pool_size=(1, 2)),
            Conv2D(32, (kernel_size, kernel_size), activation='relu', data_format='channels_last'),
            MaxPooling2D(),
            Conv2D(64, (kernel_size, kernel_size), activation='relu', data_format='channels_last'),
            MaxPooling2D(),
            Conv2D(128, (kernel_size, kernel_size), activation='relu', data_format='channels_last'),
            MaxPooling2D(),
            Conv2D(256, (kernel_size, kernel_size), activation='relu', data_format='channels_last'),
            MaxPooling2D(),
            Conv2D(512, (kernel_size, kernel_size), activation='relu', data_format='channels_last', padding='same'),
            MaxPooling2D(),
            GlobalAveragePooling2D(),
            Dropout(0.2),
            Dense(500, activation='relu'),
            Dense(250, activation='relu'),
            Dense(1, activation='relu')
        ])

    def call(self, x):
        return self.model(x)

In [None]:
kf = KFold(n_splits=5)

valid_loss = []
valid_accr = []

fold_var = 0

for train_index, val_index in kf.split(np.arange(TRAIN_COUNT-1)):
    train_data_generator = DataGenerator(
        H5PY_PATH, indexes=train_index, batch_size=32
    )
    valid_data_generator = DataGenerator(
        H5PY_PATH, indexes=val_index, batch_size=32
    )

    model = SpeedNet()
    model.compile(loss='mse', optimizer='adam')

    es = keras.callbacks.EarlyStopping(
        monitor='val_loss',
        mode='min',
        verbose=1,
        patience=50
    )
    mc = keras.callbacks.ModelCheckpoint(
        f'./models/best_model_{fold_var}.h5',
        monitor='val_loss',
        mode='min',
        save_best_only=True,
        verbose=1
    )

    history = model.fit(
        train_data_generator,
        epochs=10,
        callbacks=[es, mc],
        validation_data=valid_data_generator
    )

    model.load_weights(f'./models/best_model_{fold_var}.h5')

    results = model.evaluate(valid_data_generator)
    results = dict(zip(model.metrics_names, results))

    valid_loss.append(results['loss'])
    valid_accr.append(results['accuracy'])

    keras.backend.clear_session()

    fold_var += 1

Epoch 1/10

Epoch 00001: val_loss improved from inf to 70.91537, saving model to ./models\best_model_0.h5
Epoch 2/10

Epoch 00002: val_loss improved from 70.91537 to 10.66410, saving model to ./models\best_model_0.h5
Epoch 3/10

Epoch 00003: val_loss improved from 10.66410 to 8.40978, saving model to ./models\best_model_0.h5
Epoch 4/10

Epoch 00004: val_loss improved from 8.40978 to 4.73090, saving model to ./models\best_model_0.h5
Epoch 5/10

Epoch 00005: val_loss improved from 4.73090 to 3.27750, saving model to ./models\best_model_0.h5
Epoch 6/10

Epoch 00006: val_loss did not improve from 3.27750
Epoch 7/10

Epoch 00007: val_loss did not improve from 3.27750
Epoch 8/10

Epoch 00008: val_loss did not improve from 3.27750
Epoch 9/10