In [1]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
from keras import backend as K
from keras.layers import Conv2D, Dropout, LSTM, BatchNormalization, Input,Activation, MaxPool2D, Flatten, Dense,TimeDistributed
from keras.models import Model, load_model
from keras.layers.convolutional import ZeroPadding2D
from keras import metrics
import h5py

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
VIDEOS_DIR = './Videos/'
IMAGES_DIR = './Images/'
classes = []
class_to_index = {}
videos = []

classes = ['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']


for i in range(len(classes)):
    class_to_index[classes[i]] = i
class_to_index

for x in classes:
    videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
videos


[['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '004',
  '011',
  '002',
  '001'],
 ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '004',
  '011',
  '002',
  '008',
  '001'],
 ['003', '005', '004', '002', '001'],
 ['006', '003', '009', '005', '010', '007', '004', '011', '002', '008', '001'],
 ['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '019',
  '004',
  '011',
  '018',
  '002',
  '008',
  '001'],
 ['006',
  '003',
  '009',
  '013',
  '005',
  '012',
  '010',
  '014',
  '007',
  '004',
  '011',
  '002',
  '008',
  '001']]

In [3]:
def permute(X,Y):
    train_size = X.shape[0]
    permutation_train = np.random.permutation(train_size)
    X = X[permutation_train]
    Y = Y[permutation_train]
    return X,Y

def load_image(path,image_size):
    image = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, image_size)
    return image

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

def pad(X_train_images_class,max_len):
    length = len(X_train_images_class)
    pad_arr = np.zeros((X_train_images_class.shape[1:4]),dtype=np.uint8)
    X_train_images_class = list(X_train_images_class)
    for i in range(max_len-length):
        X_train_images_class.append(pad_arr)
    return np.array(X_train_images_class,dtype=np.uint8)

In [4]:
def build_dataset_end_to_end(image_size, max_len = 40, stride = 10):
    global classes
    global videos

    X_train_images = []
    Y_train_images = []
    
    X_test_images = []
    Y_test_images = []
    
    test_videos = [['002', '009'], ['005', '010'], ['007'], \
                   ['003'], ['006', '012'], ['004'], ['008'], ['004', '012'], ['001', '013']]
    
    for i in range(len(classes)):
        cls = classes[i]
        test = test_videos[i] 
        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_train_images_class = np.array(X_train_images_class)        
            X_test_frames = []                                
            for k in range(0,len(X_train_images_class),stride):
                lower = k
                upper = min(len(X_train_images_class),k+max_len)
                if upper == len(X_train_images_class):
                    if vid not in test:                
                        X_train_images.append(pad(X_train_images_class[lower:upper],max_len))
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(pad(X_train_images_class[lower:upper],max_len))
                        X_test_images.append(np.array(X_test_frames))        
                        Y_test_images.append(i)
                    print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    if vid not in test:                
                        X_train_images.append(X_train_images_class[lower:upper])
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(X_train_images_class[lower:upper])
                    print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])
    return np.array(X_train_images,dtype=np.uint8),np.array(Y_train_images,dtype=np.uint8), np.array(X_test_images), np.array(Y_test_images)

In [5]:
X_train, Y_train, X_test, Y_test = build_dataset_end_to_end((172, 172))

Padded frames 0 to 23
Processed 006 of class Kicking
Padded frames 0 to 23
Processed 017 of class Kicking
Padded frames 0 to 23
Processed 003 of class Kicking
Padded frames 0 to 23
Processed 016 of class Kicking
Padded frames 0 to 22
Processed 009 of class Kicking
Padded frames 0 to 23
Processed 013 of class Kicking
Padded frames 0 to 23
Processed 005 of class Kicking
Padded frames 0 to 23
Processed 012 of class Kicking
Padded frames 0 to 23
Processed 010 of class Kicking
Padded frames 0 to 23
Processed 015 of class Kicking
Padded frames 0 to 23
Processed 014 of class Kicking
Padded frames 0 to 23
Processed 007 of class Kicking
Padded frames 0 to 23
Processed 004 of class Kicking
Padded frames 0 to 23
Processed 011 of class Kicking
Padded frames 0 to 23
Processed 002 of class Kicking
Padded frames 0 to 23
Processed 001 of class Kicking
Padded frames 0 to 39
Processed 006 of class Riding-Horse
Added frames 0 to 40
Added frames 10 to 50
Padded frames 20 to 60
Processed 003 of class Ridin

Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 003 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Added frames 70 to 110
Added frames 80 to 120
Added frames 90 to 130
Added frames 100 to 140
Padded frames 110 to 144
Processed 016 of class Walking
Padded frames 0 to 37
Processed 009 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 013 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 005 of class Walking
Added frames 0 to

In [13]:
print(X_train.shape, X_test.shape)
print(Y_train.shape, Y_test.shape)

(375, 40, 172, 172, 3) (14,)
(375,) (14,)


In [14]:
Y_train = convert_to_one_hot(Y_train,9)

In [15]:
def end_to_end(input_shape):
    X_input = Input(input_shape)
    X = TimeDistributed(BatchNormalization(name = 'BatchNorm_1'))(X_input)
    #X = TimeDistributed(ZeroPadding2D((3, 3)))(X)
    X = TimeDistributed(Conv2D(32, (7, 7), strides = (3, 3), activation='relu', name="Conv_1a", padding="same"))(X)
    X = TimeDistributed(Conv2D(32, (3, 3), activation='relu', name="Conv_1b", padding="same"))(X)
    X = TimeDistributed(MaxPool2D((2, 2), name = "Pool_1"))(X)
    X = TimeDistributed(Dropout(0.2))(X)
    
    X = TimeDistributed(Conv2D(32, (3, 3), name ="Conv_2a", activation='relu', padding = "same"))(X)
    #X = TimeDistributed(Conv2D(32, (3, 3), name ="Conv_2b", activation='relu', padding = "same"))(X)
    X = TimeDistributed(MaxPool2D((2, 2), name = "Pool_2"))(X)
    X = TimeDistributed(Dropout(0.2))(X)
    X = TimeDistributed(Conv2D(32,(3,3), name='Conv_3a'))(X)
    X = TimeDistributed(MaxPool2D((2, 2), name = "Pool_3"))(X)
    
    X = TimeDistributed(Conv2D(6,(1,1), name='Conv_1x1'))(X)
    X = TimeDistributed(Flatten())(X)

    X = LSTM(32, return_sequences=True)(X)
    X = LSTM(32, return_sequences=False)(X)
    X = Dense(9, activation='softmax')(X)

    return Model(X_input,X)

In [16]:
e2e = end_to_end((40, 172, 172, 3))
print(e2e.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 40, 172, 172, 3)   0         
_________________________________________________________________
time_distributed_13 (TimeDis (None, 40, 172, 172, 3)   12        
_________________________________________________________________
time_distributed_14 (TimeDis (None, 40, 58, 58, 32)    4736      
_________________________________________________________________
time_distributed_15 (TimeDis (None, 40, 58, 58, 32)    9248      
_________________________________________________________________
time_distributed_16 (TimeDis (None, 40, 29, 29, 32)    0         
_________________________________________________________________
time_distributed_17 (TimeDis (None, 40, 29, 29, 32)    0         
_________________________________________________________________
time_distributed_18 (TimeDis (None, 40, 29, 29, 32)    9248      
__________

In [17]:
e2e.compile(loss='categorical_crossentropy',
        metrics=['accuracy'],
        optimizer='adam')

In [18]:
X_train,Y_train = permute(X_train,Y_train)

In [20]:
epochs = 10
for i in range(epochs):
    e2e.fit(X_train, Y_train, epochs=1, batch_size = 8, validation_split=0.1)
    e2e.save('models/End_End/epoch_' + str(i+10) + '.h5')

Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1
Train on 337 samples, validate on 38 samples
Epoch 1/1


In [21]:
e2e.evaluate(x=X_test,y=Y_test)

ValueError: Error when checking input: expected input_2 to have 5 dimensions, but got array with shape (14, 1)