In [1]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
from keras import backend as K
from keras.layers import Conv2D, Dropout, LSTM, BatchNormalization, Input,Activation, MaxPool2D, Flatten, Dense,TimeDistributed
from keras.models import Model, load_model
from keras.layers.convolutional import ZeroPadding2D
from keras import metrics
import h5py
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
VIDEOS_DIR = './Videos/'
IMAGES_DIR = './Images/'
classes = []
class_to_index = {}
videos = []

classes = ['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']


for i in range(len(classes)):
    class_to_index[classes[i]] = i
class_to_index

for x in classes:
    videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
videos

[['014',
  '009',
  '005',
  '011',
  '010',
  '003',
  '012',
  '006',
  '013',
  '004',
  '016',
  '001',
  '007',
  '002',
  '017',
  '015'],
 ['009', '005', '010', '003', '006', '004', '001', '007', '008', '002'],
 ['009', '005', '010', '006', '004', '001', '007', '008', '002'],
 ['009', '005', '010', '003', '006', '004', '001', '007', '008', '002'],
 ['014',
  '009',
  '005',
  '011',
  '010',
  '003',
  '012',
  '006',
  '013',
  '004',
  '016',
  '001',
  '007',
  '008',
  '002',
  '017',
  '015'],
 ['005', '003', '004', '001', '002'],
 ['009', '005', '011', '010', '003', '006', '004', '001', '007', '008', '002'],
 ['014',
  '009',
  '005',
  '011',
  '010',
  '018',
  '003',
  '012',
  '006',
  '013',
  '004',
  '016',
  '001',
  '019',
  '007',
  '008',
  '002',
  '017',
  '015'],
 ['014',
  '009',
  '005',
  '011',
  '010',
  '003',
  '012',
  '006',
  '013',
  '004',
  '001',
  '007',
  '008',
  '002']]

In [3]:
def permute(X,Y):
    train_size = X.shape[0]
    permutation_train = np.random.permutation(train_size)
    X = X[permutation_train]
    Y = Y[permutation_train]
    return X,Y

def load_image(path,image_size):
    image = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, image_size)
    return image

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

def pad(X_train_images_class,max_len):
    length = len(X_train_images_class)
    pad_arr = np.zeros((X_train_images_class.shape[1:4]),dtype=np.uint8)
    X_train_images_class = list(X_train_images_class)
    for i in range(max_len-length):
        X_train_images_class.append(pad_arr)
    return np.array(X_train_images_class,dtype=np.uint8)

In [4]:
def build_dataset_end_to_end(image_size, max_len = 40, stride = 10):
    
    X_train_images = []
    Y_train_images = []
    
    X_test_images = []
    Y_test_images = []
    
    test_videos = [['002', '009'], ['005', '010'], ['007'], \
                   ['003'], ['006', '012'], ['004'], ['008'], ['004', '012'], ['001', '013']]
    
    for i in range(len(classes)):
        cls = classes[i]
        test = test_videos[i] 
        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_train_images_class = np.array(X_train_images_class)        
            X_test_frames = []                                
            for k in range(0,len(X_train_images_class),stride):
                lower = k
                upper = min(len(X_train_images_class),k+max_len)
                if upper == len(X_train_images_class):
                    if vid not in test:                
                        X_train_images.append(pad(X_train_images_class[lower:upper],max_len))
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(pad(X_train_images_class[lower:upper],max_len))
                        X_test_images.append(np.array(X_test_frames))        
                        Y_test_images.append(i)
                    print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    if vid not in test:                
                        X_train_images.append(X_train_images_class[lower:upper])
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(X_train_images_class[lower:upper])
                    print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])
    return np.array(X_train_images,dtype=np.uint8),np.array(Y_train_images,dtype=np.uint8), np.array(X_test_images), np.array(Y_test_images)

In [5]:
X_train, Y_train, X_test, Y_test = build_dataset_end_to_end((172, 172))

Padded frames 0 to 23
Processed 014 of class Kicking
Padded frames 0 to 22
Processed 009 of class Kicking
Padded frames 0 to 23
Processed 005 of class Kicking
Padded frames 0 to 23
Processed 011 of class Kicking
Padded frames 0 to 23
Processed 010 of class Kicking
Padded frames 0 to 23
Processed 003 of class Kicking
Padded frames 0 to 23
Processed 012 of class Kicking
Padded frames 0 to 23
Processed 006 of class Kicking
Padded frames 0 to 23
Processed 013 of class Kicking
Padded frames 0 to 23
Processed 004 of class Kicking
Padded frames 0 to 23
Processed 016 of class Kicking
Padded frames 0 to 23
Processed 001 of class Kicking
Padded frames 0 to 23
Processed 007 of class Kicking
Padded frames 0 to 23
Processed 002 of class Kicking
Padded frames 0 to 23
Processed 017 of class Kicking
Padded frames 0 to 23
Processed 015 of class Kicking
Added frames 0 to 40
Added frames 10 to 50
Padded frames 20 to 58
Processed 009 of class Riding-Horse
Added frames 0 to 40
Added frames 10 to 50
Padded 

Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 011 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 010 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 109
Processed 018 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 003 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Padded frames 20 to 60
Processed 012 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Ad

In [6]:
print(X_train.shape, X_test.shape)
print(Y_train.shape, Y_test.shape)

(375, 40, 172, 172, 3) (14,)
(375,) (14,)


In [7]:
def end_to_end(input_shape):
    X_input = Input(input_shape)
    X = TimeDistributed(BatchNormalization(name = 'BatchNorm_1'))(X_input)
    #X = TimeDistributed(ZeroPadding2D((3, 3)))(X)
    X = TimeDistributed(Conv2D(32, (7, 7), strides = (4, 4), activation='relu', name="Conv_1a", padding="same"))(X)
    X = TimeDistributed(Conv2D(32, (3, 3), activation='relu', name="Conv_1b", padding="same"))(X)
    X = TimeDistributed(MaxPool2D((2, 2), name = "Pool_1"))(X)
    X = TimeDistributed(Dropout(0.2))(X)
    
    X = TimeDistributed(Conv2D(32, (3, 3), name ="Conv_2a", activation='relu', padding = "same"))(X)
    #X = TimeDistributed(Conv2D(32, (3, 3), name ="Conv_2b", activation='relu', padding = "same"))(X)
    X = TimeDistributed(MaxPool2D((2, 2), name = "Pool_2"))(X)
    X = TimeDistributed(Dropout(0.2))(X)
    X = TimeDistributed(Conv2D(32,(3,3), name='Conv_3a'))(X)
    X = TimeDistributed(MaxPool2D((2, 2), name = "Pool_3"))(X)
    
    X = TimeDistributed(Conv2D(8,(1,1), name='Conv_1x1'))(X)
    X = TimeDistributed(Flatten())(X)
    X = TimeDistributed(Dropout(0.3))(X)
    Y = TimeDistributed(Dense(9,activation='softmax',name='final'))(X)
    
    #X = LSTM(32, return_sequences=True)(X)
    X = LSTM(32, return_sequences=False)(X)
    X = Dense(9, activation='softmax')(X)

    return Model(X_input, outputs=[X, Y])

In [8]:
e2e = end_to_end((40, 172, 172, 3))
print(e2e.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 40, 172, 172,  0                                            
____________________________________________________________________________________________________
time_distributed_1 (TimeDistribu (None, 40, 172, 172,  12          input_1[0][0]                    
____________________________________________________________________________________________________
time_distributed_2 (TimeDistribu (None, 40, 43, 43, 32 4736        time_distributed_1[0][0]         
____________________________________________________________________________________________________
time_distributed_3 (TimeDistribu (None, 40, 43, 43, 32 9248        time_distributed_2[0][0]         
___________________________________________________________________________________________

In [9]:
e2e.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], loss_weights=[1, 0.5],
        metrics=['accuracy'],
        optimizer='adam')

In [10]:
X_train, Y_train = permute(X_train, Y_train)

In [11]:
X_train.shape

(375, 40, 172, 172, 3)

In [12]:
Y_train = convert_to_one_hot(Y_train, 9)


In [13]:
Y_train.shape

(375, 9)

In [14]:
Y_train2 = np.tile(Y_train, (40, 1, 1))

In [15]:
Y_train2 = Y_train2.transpose(1, 0, 2)


In [16]:
Y_train2.shape

(375, 40, 9)

In [17]:
e2e.fit(X_train, [Y_train, Y_train2], batch_size=16, epochs=1, validation_split = 0.1)

Train on 337 samples, validate on 38 samples
Epoch 1/1


<keras.callbacks.History at 0x7f0336da8630>

In [19]:
np.save('./Numpy/End2End/X_train', X_train)
np.save('./Numpy/End2End/Y_train', Y_train)
np.save('./Numpy/End2End/X_test', X_test)
np.save('./Numpy/End2End/Y_test', Y_test)