<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Investigating-Architectures-of-3D-CNNs" data-toc-modified-id="Investigating-Architectures-of-3D-CNNs-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Investigating Architectures of 3D CNNs</a></span><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Getting-the-Data" data-toc-modified-id="Getting-the-Data-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Getting the Data</a></span><ul class="toc-item"><li><span><a href="#Sanity-Check" data-toc-modified-id="Sanity-Check-1.2.1"><span class="toc-item-num">1.2.1&nbsp;&nbsp;</span>Sanity Check</a></span></li><li><span><a href="#Processing-Videos-to-Images" data-toc-modified-id="Processing-Videos-to-Images-1.2.2"><span class="toc-item-num">1.2.2&nbsp;&nbsp;</span>Processing Videos to Images</a></span></li><li><span><a href="#Train-Test-Split" data-toc-modified-id="Train-Test-Split-1.2.3"><span class="toc-item-num">1.2.3&nbsp;&nbsp;</span>Train Test Split</a></span></li><li><span><a href="#Sanity-Check" data-toc-modified-id="Sanity-Check-1.2.4"><span class="toc-item-num">1.2.4&nbsp;&nbsp;</span>Sanity Check</a></span></li><li><span><a href="#Investigating-Time-Complexity" data-toc-modified-id="Investigating-Time-Complexity-1.2.5"><span class="toc-item-num">1.2.5&nbsp;&nbsp;</span>Investigating Time Complexity</a></span></li></ul></li><li><span><a href="#Preprocessing" data-toc-modified-id="Preprocessing-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Preprocessing</a></span></li><li><span><a href="#Data-Cleanse" data-toc-modified-id="Data-Cleanse-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>Data Cleanse</a></span></li><li><span><a href="#Training-From-Scratch" data-toc-modified-id="Training-From-Scratch-1.5"><span class="toc-item-num">1.5&nbsp;&nbsp;</span>Training From Scratch</a></span><ul class="toc-item"><li><span><a href="#Model-Definition" data-toc-modified-id="Model-Definition-1.5.1"><span class="toc-item-num">1.5.1&nbsp;&nbsp;</span>Model Definition</a></span></li></ul></li><li><span><a href="#Transfer-Learning" data-toc-modified-id="Transfer-Learning-1.6"><span class="toc-item-num">1.6&nbsp;&nbsp;</span>Transfer Learning</a></span><ul class="toc-item"><li><span><a href="#Loading-the-Weights" data-toc-modified-id="Loading-the-Weights-1.6.1"><span class="toc-item-num">1.6.1&nbsp;&nbsp;</span>Loading the Weights</a></span></li><li><span><a href="#Sanity-Check" data-toc-modified-id="Sanity-Check-1.6.2"><span class="toc-item-num">1.6.2&nbsp;&nbsp;</span>Sanity Check</a></span></li><li><span><a href="#Fine-Tuning" data-toc-modified-id="Fine-Tuning-1.6.3"><span class="toc-item-num">1.6.3&nbsp;&nbsp;</span>Fine Tuning</a></span></li></ul></li></ul></li></ul></div>

# Investigating Architectures of 3D CNNs

## Imports

In [221]:
import cv2
import os
from tqdm import tqdm_notebook
import shutil
import pandas as pd
import numpy as np
import time

from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, Convolution3D, ZeroPadding3D
from keras.layers.pooling import MaxPooling2D, MaxPooling3D
from keras.layers.core import Activation, Dropout, Flatten, Dense, Lambda
from keras.layers import ELU, TimeDistributed, LSTM, Input, Reshape, Concatenate
from keras.optimizers import Adam, SGD
import keras.backend.tensorflow_backend as KTF
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications import ResNet50
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.utils import to_categorical

LEGACY_DIR_PATH = r'/Volumes/Expansion/3dcnn'
DIR_PATH = r'/Users/shivam/dev/3dcnn'

DATA_PATH = os.path.join(DIR_PATH, 'ucfdata')
METADATA_PATH = os.path.join(DIR_PATH, 'ucfmetadata')

## Getting the Data

In [2]:
from google.colab import drive
drive.mount('/content/drive')
DIR_PATH = '/content/drive/My Drive'
DATA_PATH = os.path.join(DIR_PATH, 'ucfdata')

TIMEOUT: ignored

In [7]:
!mkdir {DATA_PATH}
!cd {DATA_PATH} && wget http://crcv.ucf.edu/data/UCF101/UCF101.rar
!cd {DATA_PATH} && unrar e UCF101.rar -idq
!rm {DATA_PATH + '/UCF101.rar'}

--2019-07-20 04:23:43--  http://crcv.ucf.edu/data/UCF101/UCF101.rar
Resolving crcv.ucf.edu (crcv.ucf.edu)... 132.170.214.127
Connecting to crcv.ucf.edu (crcv.ucf.edu)|132.170.214.127|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://www.crcv.ucf.edu/data/UCF101/UCF101.rar [following]
--2019-07-20 04:23:44--  https://www.crcv.ucf.edu/data/UCF101/UCF101.rar
Resolving www.crcv.ucf.edu (www.crcv.ucf.edu)... 132.170.214.127
Connecting to www.crcv.ucf.edu (www.crcv.ucf.edu)|132.170.214.127|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6932971618 (6.5G) [application/rar]
Saving to: ‘UCF101.rar’


2019-07-20 04:39:11 (7.13 MB/s) - ‘UCF101.rar’ saved [6932971618/6932971618]



In [33]:
!mkdir {METADATA_PATH}
!cd {METADATA_PATH} && wget http://crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip
!cd {METADATA_PATH} && unzip UCF101TrainTestSplits-RecognitionTask.zip

--2019-07-21 00:16:46--  http://crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip
Resolving crcv.ucf.edu (crcv.ucf.edu)... 132.170.214.127
Connecting to crcv.ucf.edu (crcv.ucf.edu)|132.170.214.127|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip [following]
--2019-07-21 00:16:46--  https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip
Resolving www.crcv.ucf.edu (www.crcv.ucf.edu)... 132.170.214.127
Connecting to www.crcv.ucf.edu (www.crcv.ucf.edu)|132.170.214.127|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 113943 (111K) [application/zip]
Saving to: ‘UCF101TrainTestSplits-RecognitionTask.zip’


2019-07-21 00:16:47 (3.04 MB/s) - ‘UCF101TrainTestSplits-RecognitionTask.zip’ saved [113943/113943]

Archive:  UCF101TrainTestSplits-RecognitionTask.zip
   creating: ucfTrainTestlist/
  inflating: ucfTrainTe

### Sanity Check

UCF101 dataset should contain 13K+ videos.

In [2]:
!ls -l {DATA_PATH} | wc -l

   13285


### Processing Videos to Images

In [228]:
VIDEOS_PATH = DATA_PATH
IMAGES_PATH = os.path.join(DIR_PATH, 'imgs')

def create_empty_folder(target_folder):
    if os.path.exists(target_folder):
        shutil.rmtree(target_folder)

    os.makedirs(target_folder)
    
def get_image_path(folder, num_frame):
    return os.path.join(folder, str(num_frame) + '.jpg')

def get_image_folder(video_title, images_dir=IMAGES_PATH):
    [video_name, extension] = video_title.split(".")
    if extension != 'avi':
        raise Exception("File is not a video")
    [_, action, group, vid_num] = video_name.split("_")
    image_folder = os.path.join(images_dir,"_".join([action, group, vid_num]))
    return image_folder

def load_image(image_path):
    if not os.path.isfile(image_path):
        print(image_path)
    bgr_img = cv2.imread(image_path)
    #rbg_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)
    return bgr_img

def process_video_to_images(video_fp, target_folder):
    
    create_empty_folder(target_folder)
  
    cap = cv2.VideoCapture(video_fp)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    for frame_num in range(frame_count):
        _, frame = cap.read()
        img_path = get_image_path(target_folder, frame_num)
        cv2.imwrite(img_path, frame)
        
    cap.release()
    return frame_count

def process_videos_to_images():
    video_list = os.listdir(VIDEOS_PATH)
  
    video_data = []

    for video_title in tqdm_notebook(video_list):
        video_fp = os.path.join(VIDEOS_PATH, video_title)
    
        try:
            image_folder = get_image_folder(video_title)
        except:
            print("Not a video: " + str(video_title))
            continue
      
        frame_count = process_video_to_images(video_fp, image_folder)
    
        video_data.append([video_fp, frame_count])
    
    return video_data

In [28]:
video_metadata = process_videos_to_images()

HBox(children=(IntProgress(value=0, max=13320), HTML(value='')))

In [51]:
video_md_df = pd.DataFrame(video_metadata)
video_md_df.columns = ['fp', 'frame_count']
#video_md_df['fp'] = video_md_df['fp'].str.replace('\.\/ucfdata', DATA_PATH) # delete this after
video_md_df.head()

Unnamed: 0,fp,frame_count
0,/Volumes/Expansion/3dcnn/ucfdata/v_CricketBowl...,170
1,/Volumes/Expansion/3dcnn/ucfdata/v_BreastStrok...,116
2,/Volumes/Expansion/3dcnn/ucfdata/v_TennisSwing...,228
3,/Volumes/Expansion/3dcnn/ucfdata/v_BrushingTee...,716
4,/Volumes/Expansion/3dcnn/ucfdata/v_Typing_g08_...,250


In [210]:
video_md_df['fp'] = video_md_df['fp'].str.replace(LEGACY_DIR_PATH, DIR_PATH)

In [185]:
video_md_df = pd.concat([train_df, test_df])[['fp', 'frame_count']]
video_md_df.drop_duplicates(inplace=True)
video_md_df = video_md_df.reset_index()[['fp', 'frame_count']]
video_md_df.head()

Unnamed: 0,fp,frame_count
0,/Volumes/Expansion/3dcnn/ucfdata/v_ApplyEyeMak...,120
1,/Volumes/Expansion/3dcnn/ucfdata/v_ApplyEyeMak...,117
2,/Volumes/Expansion/3dcnn/ucfdata/v_ApplyEyeMak...,146
3,/Volumes/Expansion/3dcnn/ucfdata/v_ApplyEyeMak...,224
4,/Volumes/Expansion/3dcnn/ucfdata/v_ApplyEyeMak...,276


In [211]:
video_md_df.head()

Unnamed: 0,fp,frame_count
0,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,120
1,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,117
2,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,146
3,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,224
4,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,276


### Train Test Split

In [214]:
TRAIN_TEST_METADATA = os.path.join(METADATA_PATH, 'ucfTrainTestlist')
TEST_SPLITS = ['testlist01.txt', 'testlist02.txt', 'testlist03.txt']
TRAIN_SPLITS = ['trainlist01.txt', 'trainlist02.txt', 'trainlist03.txt']
USE_SPLIT = 0
CLASS_MAPPING = 'classInd.txt'

def process_split_to_df(split_file):
    class_map_fp = os.path.join(TRAIN_TEST_METADATA, CLASS_MAPPING)
    class_map_df = pd.read_csv(class_map_fp, sep=" ", header=None)
    class_map_df.columns = ['class', 'class_name']
    
    # Make class labels from 0 to num_classes, as required for keras.utils.to_categorical
    class_map_df['class'] = class_map_df['class'] - 1  
  
    fp = os.path.join(TRAIN_TEST_METADATA, split_file)
    data = pd.read_csv(fp, sep=" ", header=None)
    data = data.iloc[:,:1]
    data.columns = ['title']
    title_split = data["title"].str.split("\/", n = 1, expand = True) 
    data["fp"] = DATA_PATH + '/' + title_split[1]
    data["class_name"] = title_split[0]
    data = pd.merge(data,  class_map_df,left_on='class_name', right_on='class_name')

    return data

def get_train_test_split(video_metadata):
    test_split = TEST_SPLITS[USE_SPLIT]
    train_split = TRAIN_SPLITS[USE_SPLIT]
    test_df = process_split_to_df(test_split)
    train_df = process_split_to_df(train_split)
    video_metadata['fp'].str.split
    test_df = pd.merge(test_df, video_metadata, left_on='fp', right_on='fp')
    train_df = pd.merge(train_df, video_metadata, left_on='fp', right_on='fp')

    return train_df, test_df

train_df, test_df = get_train_test_split(video_md_df)

In [215]:
print(train_df.shape)
print(test_df.shape)

(9537, 5)
(3783, 5)


In [216]:
train_df.head()

Unnamed: 0,title,fp,class_name,class,frame_count
0,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,120
1,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c02.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,117
2,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c03.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,146
3,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c04.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,224
4,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c05.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,276


In [217]:
# Metadata filepaths
train_df_fp = os.path.join(METADATA_PATH, 'train_df.csv')
test_df_fp = os.path.join(METADATA_PATH, 'test_df.csv')

In [218]:
# Persist metadata
train_df.to_csv(train_df_fp, index=False)
test_df.to_csv(test_df_fp, index=False)

In [9]:
# Load metadata
train_df = pd.read_csv(train_df_fp)
test_df = pd.read_csv(test_df_fp)

In [276]:
BATCH_SIZE = 30
IMG_HEIGHT = 112
IMG_WIDTH = 112
IMG_CHANNELS = 3
FRAMES = 16
NUM_CLASSES = 101

class DataHandler(): 
    def __init__(self, test_df, train_df, img_height=IMG_HEIGHT,
                 img_width=IMG_WIDTH, img_channels=IMG_CHANNELS, 
                 frames=FRAMES, num_classes=NUM_CLASSES, batch_size=BATCH_SIZE):
        
        self.test_df = test_df
        self.train_df = train_df
        self.img_height = img_height
        self.img_width = img_width
        self.img_channels = img_channels
        self.frames = frames
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.use_preprocess_dir = False

    def resize_image(self, image):
        # input is (240,320,3) for UCF101 dataset
        # Crop
        image = image[:, 40:280]
        # Resize
        image = cv2.resize(image, (self.img_height, self.img_width))
        return image
    
    def set_preprocess(self, preprocess_dir):
        self.use_preprocess_dir = True
        self.preprocess_dir = preprocess_dir

    def preprocess(self, preprocess_dir):
        create_empty_folder(preprocess_dir)
        self.set_preprocess(preprocess_dir)      
        for df in [self.train_df, self.test_df]:
            with tqdm_notebook(total=len(df)) as pbar:
                for row_idx, row in df.iterrows():
                    pbar.update(1)
                    video_title = row['title']
                    image_folder = get_image_folder(video_title)

                    preprocess_image_folder = get_image_folder(video_title, preprocess_dir)
                    create_empty_folder(preprocess_image_folder)
                    
                    frame_count = row['frame_count']
                    for frame_num in range(frame_count):
                        image_path = get_image_path(image_folder, frame_num)
                        image = load_image(image_path)
                        try:
                            preprocess_image = self.resize_image(image)
                            preprocess_image_path = get_image_path(preprocess_image_folder, frame_num)
                            cv2.imwrite(preprocess_image_path, preprocess_image)
                        except:
                            print(image_path)
    
    def cleanse_train_test_df(self):
        # Make sure frame counts do not include trailing black frames
        frames_cleansed = 0
        for df in [self.train_df, self.test_df]:
            new_frames = []
            with tqdm_notebook(total=len(df)) as pbar:
                for row_idx, row in df.iterrows():
                    pbar.update(1)
                    video_title = row['title']
                    frame_count = row['frame_count']
                    prev_frame_count = frame_count
                    image_folder = get_image_folder(video_title)
                    if self.use_preprocess_dir:
                        image_folder = get_image_folder(video_title, self.preprocess_dir)
                    
                    frames = [".".join(f.split(".")[:-1]) for f in os.listdir(image_folder)]
                    frames = [int(x) for x in frames]
                    
                    max_frame = max(frames)
                    
                    if frame_count > max_frame:
                        frame_count = max_frame
                    
                    while frame_count > 0:
                        image_path = get_image_path(image_folder, frame_count)
                        frame = load_image(image_path)
                        if np.sum(frame) != 0:
                            break
                        frame_count -= 1
                    new_frames.append(frame_count)
                    frames_cleansed += prev_frame_count - frame_count
                    
            df['frame_count'] = new_frames
        
        print("# of Frames Cleansed from Data: %d " % frames_cleansed)
        return [self.train_df, self.test_df]
    
    def create_generator(self, df):
        
        input_batch = np.zeros((self.batch_size, self.frames, self.img_height,
                            self.img_width, self.img_channels))
    
        output_batch = np.zeros((self.batch_size, 1))
        
        while True:
            for batch_num in range(self.batch_size):
                # pick a random video from our df
                video_idx = np.random.randint(0, len(df) - 1)

                # get the image folder
                row = df.iloc[video_idx]
                video_title = row['title']
                image_folder = get_image_folder(video_title)
                if self.use_preprocess_dir:
                    image_folder = get_image_folder(video_title, self.preprocess_dir)

                # get the label for the video
                output_batch[batch_num] = row['class']

                # pick a random spot in the video
                frame_count = row['frame_count']
                base_frame_idx = np.random.randint(0, frame_count - 1 - self.frames)

                # take the next 16 frames from this spot
                for frame_num in range(self.frames):
                    curr_frame_num = base_frame_idx + frame_num
                    
                    # get image path
                    image_path = get_image_path(image_folder, curr_frame_num)

                    # load image
                    image = load_image(image_path)

                    # preprocess
                    if not self.use_preprocess_dir:
                        image = self.resize_image(image)
    
                    # add to batch
                    input_batch[batch_num][frame_num] = image

            yield (input_batch, to_categorical(output_batch, num_classes=self.num_classes))

      
    def create_test_generator(self):
        df = self.test_df
        return self.create_generator(df)
    
    def create_train_generator(self):
        df = self.train_df
        return self.create_generator(df)
    
DH = DataHandler(test_df, train_df)

### Sanity Check

The input batch should be of size 30, with 16 frames, each 128x128 with 3 channels.

The output batch should be of size 30, each with a categorical vector of size 101.

In [274]:
def verify_data_generators(DH):
    gen = DH.create_train_generator()

    t1 = time.time()
    print(next(gen)[0].shape)
    t2 = time.time()
    print(next(gen)[1].shape)
    print(t2-t1)

verify_data_generators(DH)
# 3.83 seconds for a single batch on HDD
# for 1000 steps, 25 epochs, we can expect 20 hours for just loading the data
# This is way too slow... Let's stop using an external drive or let's preprocess the data beforehand.
# After moving all the data to my SSD, the time is now 1.2s for a single batch. This will be about 8 hours of processing. 

(30, 16, 112, 112, 3)
(30, 101)
1.220026969909668


### Investigating Time Complexity

In [283]:
def compare_loading_times():
    row_idx = np.random.randint(0, len(train_df))
    row = train_df.iloc[row_idx]
    video_title = row['title']

    image_folder = get_image_folder(video_title)
    image_path = get_image_path(image_folder, 0)

    t1 = time.time()
    img1 = load_image(image_path)
    a = time.time() - t1
    print("Time for loading raw image: %.5f (s)" % a)
    resized_img = DH.resize_image(img1)
    cv2.imwrite('test.jpg', resized_img)
    t1 = time.time()
    img2 = load_image('test.jpg')
    b = time.time() - t1
    print("Time for loading preprocessed image: %.5f (s)" % b)
    c = a/b
    print("Loading the preprocessed image is %.2f x faster" % c)
    
    
compare_loading_times()

Time for loading raw image: 0.00776 (s)
Time for loading preprocessed image: 0.00035 (s)
Loading the preprocessed image is 22.24 x faster


## Preprocessing

In [284]:
PREPROCESS_DIR = os.path.join(DIR_PATH, 'preprocess')
DH.set_preprocess(PREPROCESS_DIR)

In [285]:
if not os.path.isdir(PREPROCESS_DIR):
    DH.preprocess(PREPROCESS_DIR)

## Data Cleanse

In [251]:
[train_df, test_df] = DH.cleanse_train_test_df()

HBox(children=(IntProgress(value=0, max=9537), HTML(value='')))

HBox(children=(IntProgress(value=0, max=3783), HTML(value='')))

# of Frames Cleansed from Data: 13320 


In [252]:
train_df.head()

Unnamed: 0,title,fp,class_name,class,frame_count
0,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,119
1,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c02.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,116
2,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c03.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,145
3,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c04.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,223
4,ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c05.avi,/Users/shivam/dev/3dcnn/ucfdata/v_ApplyEyeMake...,ApplyEyeMakeup,0,275


In [287]:
verify_data_generators(DH)

(30, 16, 112, 112, 3)
(30, 101)
0.5413329601287842


Now it takes approximately 0.5 seconds to load a batch. This is a much needed improvement.
The total time for loading the data for all epochs is now ~3.4 hours from an original ~20 hours.


## Training From Scratch

### Model Definition

In [300]:
def cnn_3d():
    input_shape = (FRAMES, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
     
    # Normalize 
    #model.add(Lambda(lambda x: x / 127.5 - 1, input_shape=input_shape, name="normalize"))   
    
    model = Sequential()

    model.add(Convolution3D(64, (3, 3, 3), activation='relu',
                     padding='same', name='conv1',
                     input_shape=input_shape))
    model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2),
                           padding='valid', name='pool1'))
    # 2nd layer group
    model.add(Convolution3D(128, (3, 3, 3), activation='relu',
                     padding='same', name='conv2'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool2'))
    # 3rd layer group
    model.add(Convolution3D(256, (3, 3, 3), activation='relu',
                     padding='same', name='conv3a'))
    model.add(Convolution3D(256, (3, 3, 3), activation='relu',
                     padding='same', name='conv3b'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool3'))
    # 4th layer group
    model.add(Convolution3D(512, (3, 3, 3), activation='relu',
                     padding='same', name='conv4a'))
    model.add(Convolution3D(512, (3, 3, 3), activation='relu',
                     padding='same', name='conv4b'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool4'))
    # 5th layer group
    model.add(Convolution3D(512, (3, 3, 3), activation='relu',
                     padding='same', name='conv5a'))
    model.add(Convolution3D(512, (3, 3, 3), activation='relu',
                     padding='same', name='conv5b'))
    
    model.add(ZeroPadding3D(padding=((0, 0), (0, 1), (0, 1)), name='zeropad5'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool5'))
    model.add(Flatten())
    
    # FC layers group
    model.add(Dense(4096, activation='relu', name='fc6'))
    model.add(Dropout(.5))
    model.add(Dense(4096, activation='relu', name='fc7'))
    model.add(Dropout(.5))
#     model.add(Dense(487, activation='softmax', name='fc8'))
    model.add(Dense(NUM_CLASSES, activation='softmax', name='classify'))
    
    return model


model = cnn_3d()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv3D)               (None, 16, 112, 112, 64)  5248      
_________________________________________________________________
pool1 (MaxPooling3D)         (None, 16, 56, 56, 64)    0         
_________________________________________________________________
conv2 (Conv3D)               (None, 16, 56, 56, 128)   221312    
_________________________________________________________________
pool2 (MaxPooling3D)         (None, 8, 28, 28, 128)    0         
_________________________________________________________________
conv3a (Conv3D)              (None, 8, 28, 28, 256)    884992    
_________________________________________________________________
conv3b (Conv3D)              (None, 8, 28, 28, 256)    1769728   
_________________________________________________________________
pool3 (MaxPooling3D)         (None, 4, 14, 14, 256)    0         
__________

In [73]:
train_generator = DH.create_train_generator()
test_generator = DH.create_test_generator()

adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

#sgd = SGD(lr=1e-5, decay=0.0005, momentum=0.9)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit_generator(
    train_generator,
    steps_per_epoch=len(train_df) // BATCH_SIZE,
    epochs=25,
    #callbacks=callbacks_list,
    verbose=1, 
    validation_data=test_generator,
    validation_steps=len(test_df) // BATCH_SIZE)

W0721 00:34:45.286726 4503860672 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/25
 80/958 [=>............................] - ETA: 38:16:44 - loss: 4.6917 - acc: 0.0104

KeyboardInterrupt: 

## Transfer Learning 

Let's take model weights learned on the sports-1M dataset.

### Loading the Weights

In [301]:
MODEL_DIR = os.path.join(DIR_PATH, 'models')
SPORTS1M_WEIGHTS_PATH = os.path.join(MODEL_DIR, 'sports1m.h5')

transfer_model = cnn_3d()
transfer_model.load_weights(SPORTS1M_WEIGHTS_PATH, by_name=True)

In [302]:
class ModelManager:
    def __init__(self):
        class_map_fp = os.path.join(TRAIN_TEST_METADATA, CLASS_MAPPING)
        class_map_df = pd.read_csv(class_map_fp, sep=" ", header=None)
        class_map_df.columns = ['class', 'class_name']
        # Make class labels from 0 to num_classes
        class_map_df['class'] = class_map_df['class'] - 1
        class_map = dict(zip(class_map_df['class'], class_map_df['class_name']))
        self.class_map = class_map
    
    def predict_class(self, model, example):
        prediction_softmax = model.predict(example)
        predicted_class = np.argmax(prediction_softmax)
        return predicted_class
    
    def predict_class_name(self, model, example):
        predicted_class = self.predict_class(model, example)
        predicted_class_name = self.class_map[predicted_class]
        return predicted_class_name
    
MM = ModelManager()

### Sanity Check

In [303]:
dummy_input = np.zeros((1, 16, 112, 112, 3))
MM.predict_class_name(transfer_model, dummy_input)

'Diving'

### Fine Tuning

In [305]:
train_generator = DH.create_train_generator()
test_generator = DH.create_test_generator()

adam = Adam()

for layer in transfer_model.layers[:-1]:
    layer.trainable = False
    
transfer_model.summary()

model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit_generator(
    train_generator,
    steps_per_epoch=len(train_df) // BATCH_SIZE,
    epochs=25,
    #callbacks=callbacks_list,
    verbose=1, 
    validation_data=test_generator,
    validation_steps=len(test_df) // BATCH_SIZE)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv3D)               (None, 16, 112, 112, 64)  5248      
_________________________________________________________________
pool1 (MaxPooling3D)         (None, 16, 56, 56, 64)    0         
_________________________________________________________________
conv2 (Conv3D)               (None, 16, 56, 56, 128)   221312    
_________________________________________________________________
pool2 (MaxPooling3D)         (None, 8, 28, 28, 128)    0         
_________________________________________________________________
conv3a (Conv3D)              (None, 8, 28, 28, 256)    884992    
_________________________________________________________________
conv3b (Conv3D)              (None, 8, 28, 28, 256)    1769728   
_________________________________________________________________
pool3 (MaxPooling3D)         (None, 4, 14, 14, 256)    0         
__________

KeyboardInterrupt: 