In [1]:
!git clone https://github.com/dlpbc/keras-kinetics-i3d.git
%cd keras-kinetics-i3d

Cloning into 'keras-kinetics-i3d'...
remote: Enumerating objects: 49, done.[K
remote: Total 49 (delta 0), reused 0 (delta 0), pack-reused 49[K
Unpacking objects: 100% (49/49), done.
/content/keras-kinetics-i3d


In [0]:
from google.colab import files

uploaded = files.upload()
for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'
          .format(name=fn, length=len(uploaded[fn])))

!unzip data.zip

In [0]:
# !wget -P data https://github.com/deepmind/kinetics-i3d/raw/master/data/v_CricketShot_g04_c01_flow.npy
# !wget -P data https://github.com/deepmind/kinetics-i3d/raw/master/data/v_CricketShot_g04_c01_rgb.npy
# !ls data

In [8]:
import cv2
import glob
import numpy as np
import os
import random
import tensorflow as tf

from i3d_inception import Inception_Inflated3d
from keras.layers import Dense, Dropout, Flatten
from keras.models import Model
from keras.utils import to_categorical

Using TensorFlow backend.


In [0]:
LABELS = [
    "360_kickflip",
    "heelflip",
    "kickflip", 
    "nollie_fakie_360_kickflip",
    "nollie_fakie_heelflip",
    "nollie_fakie_kickflip"
]

LABEL_IDS = {label: ix for ix, label in enumerate(LABELS)}

N_FRAMES = 32
FRAME_HEIGHT = 224
FRAME_WIDTH = 224
N_RGB_CHANNELS = 3
N_LABELS = len(LABELS)
TRAIN_BATCH_SIZE = 4

class DataGenerator:
    
    def __init__(self, train_dir=None, test_dir=None, shuffle_train=True, validation_split=0.0):
        self.frame_h = 224
        self.frame_w = 224
        
        if train_dir:
            filepaths = []
            sub_dirs_pathname = os.path.join(train_dir, "*")
            sub_dirs = glob.glob(sub_dirs_pathname)
            for sub_dir in sub_dirs:
                pathname = os.path.join(sub_dir, "*")
                filepaths += glob.glob(pathname)
            
            if shuffle_train:
                random.shuffle(filepaths)
            
            n_train = int((1.0-validation_split) * len(filepaths))
            self.train_filepaths = filepaths[:n_train]
            self.val_filepaths = filepaths[n_train:]
            
    def get_train_generator(self):
        n_filepaths = len(self.train_filepaths)
        while True:
            for ix_start in range(0, n_filepaths, TRAIN_BATCH_SIZE):
                ix_end = min(ix_start+TRAIN_BATCH_SIZE, n_filepaths)
                filepaths_for_batch = self.train_filepaths[ix_start:ix_end]
                X, y = self._get_batch(filepaths_for_batch)

                yield X, y
            
    def _get_batch(self, filepaths):
        batch_frames = []
        batch_labels = []
        for filepath in filepaths:
            frames = self._get_frames(filepath)
            batch_frames.append(frames)
            label = self._get_label(filepath)
            label_encoded = LABEL_IDS[label]
            batch_labels.append(label_encoded)
        batch_labels = to_categorical(batch_labels, num_classes=N_LABELS).astype(int)

        return np.array(batch_frames), batch_labels
        
    def _get_frames(self, filepath):
        video = cv2.VideoCapture(filepath)
        if not video.isOpened():
            raise FileNotFoundError("The input video path you provided is invalid.")
        
        frames = []
        while video.isOpened():
            grabbed, frame_bgr = video.read()
            if not grabbed:
                break
            frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
            frames.append(frame_rgb)
        video.release()
        
        if len(frames) < N_FRAMES:
            n_pad_frames = N_FRAMES - len(frames)
            for _ in range(n_pad_frames):
                blank_frame = np.zeros((self.frame_h, self.frame_w, 3))
                frames.append(blank_frame)
        
        frames_processed = []
        current_ix = 0
        step_size = len(frames) / float(N_FRAMES)
        for _ in range(N_FRAMES):
            frame = cv2.resize(frames[int(current_ix)], (self.frame_h, self.frame_w))
            frame = frame / 255.0
            frames_processed.append(frame)
            current_ix += step_size
        
        return frames_processed

    def _get_label(self, filepath):
        return filepath.split("/")[-2]

In [0]:
dg = DataGenerator(train_dir="data/train")
train_gen = dg.get_train_generator()

# ix = 0
# for frames, labels in train_gen:
#     print(frames[0].shape, labels[0])
    
#     ix += 1
#     if ix == 5:
#         break

In [0]:
rgb_model = Inception_Inflated3d(include_top=False, weights='rgb_kinetics_only', 
                                 input_shape=(N_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, N_RGB_CHANNELS), 
                                 classes=N_LABELS)

In [0]:
for layer in rgb_model.layers:
    layer.trainable = False

In [0]:
x = rgb_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(N_LABELS, activation='softmax')(x)

model = Model(inputs=rgb_model.input, outputs=output)
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:
model.fit_generator(train_gen, steps_per_epoch=23, epochs=15)