In [66]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
from keras import backend as K
from keras.layers import Conv2D, Conv1D, Dropout, LSTM, BatchNormalization, Input,Activation, MaxPool2D, Flatten 
from keras.layers import Dense,TimeDistributed, Lambda
from keras.models import Model, load_model
from keras.layers.convolutional import ZeroPadding2D
from keras import metrics
import h5py
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
VIDEOS_DIR = '../Videos/'
IMAGES_DIR = '../Images/'
classes = []
class_to_index = {}
videos = []

classes = ['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']


for i in range(len(classes)):
    class_to_index[classes[i]] = i
class_to_index

for x in classes:
    videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
videos

[['014',
  '009',
  '005',
  '011',
  '010',
  '003',
  '012',
  '006',
  '013',
  '004',
  '016',
  '001',
  '007',
  '002',
  '017',
  '015'],
 ['009', '005', '010', '003', '006', '004', '001', '007', '008', '002'],
 ['009', '005', '010', '006', '004', '001', '007', '008', '002'],
 ['009', '005', '010', '003', '006', '004', '001', '007', '008', '002'],
 ['014',
  '009',
  '005',
  '011',
  '010',
  '003',
  '012',
  '006',
  '013',
  '004',
  '016',
  '001',
  '007',
  '008',
  '002',
  '017',
  '015'],
 ['005', '003', '004', '001', '002'],
 ['009', '005', '011', '010', '003', '006', '004', '001', '007', '008', '002'],
 ['014',
  '009',
  '005',
  '011',
  '010',
  '018',
  '003',
  '012',
  '006',
  '013',
  '004',
  '016',
  '001',
  '019',
  '007',
  '008',
  '002',
  '017',
  '015'],
 ['014',
  '009',
  '005',
  '011',
  '010',
  '003',
  '012',
  '006',
  '013',
  '004',
  '001',
  '007',
  '008',
  '002']]

In [3]:
def permute(X,Y):
    train_size = X.shape[0]
    permutation_train = np.random.permutation(train_size)
    X = X[permutation_train]
    Y = Y[permutation_train]
    return X,Y

def load_image(path,image_size):
    image = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, image_size)
    return image

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

def pad(X_train_images_class,max_len):
    length = len(X_train_images_class)
    pad_arr = np.zeros((X_train_images_class.shape[1:4]),dtype=np.uint8)
    X_train_images_class = list(X_train_images_class)
    for i in range(max_len-length):
        X_train_images_class.append(pad_arr)
    return np.array(X_train_images_class,dtype=np.uint8)

def evaluate(model, X_test,Y_test,verbose = True):
    count = 0
    for i in range(len(X_test)):
        pred = model.predict(X_test[i])[0]
        #print(pred[0].shape, pred[1].shape)
        #break
        max_pred = [np.argmax(i) for i in pred]
        counts = np.bincount(max_pred)
        class_pred = np.argmax(counts)
        #class_pred = max_pred
        #actual = np.argmax(Y_test[i])
        actual = Y_test[i]
        if verbose:
            print("Max Preds time", max_pred)
            print("Pred",classes[class_pred],"Actual",classes[actual])
            print()
        if class_pred == actual:
            count += 1
    return float(count)/float(len(Y_test)) * 100.0

In [75]:
def dilated_conv(input_shape):
    X_input = Input(input_shape)
    X = TimeDistributed(BatchNormalization(), name = 'BatchNorm_1')(X_input)
    #X = TimeDistributed(ZeroPadding2D((3, 3)))(X)
    X = TimeDistributed(Conv2D(32, (7, 7), strides = (4, 4), activation='relu', padding="same"), name="Conv_1a")(X)
    X = TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding="same"), name="Conv_1b")(X)
    X = TimeDistributed(MaxPool2D((2, 2)), name = "Pool_1")(X)
    X = TimeDistributed(Dropout(0.2), name='Dropout_a')(X)
    
    X = TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding = "same"), name ="Conv_2a")(X)
    #X = TimeDistributed(Conv2D(32, (3, 3), name ="Conv_2b", activation='relu', padding = "same"))(X)
    X = TimeDistributed(MaxPool2D((2, 2)), name = "Pool_2")(X)
    X = TimeDistributed(Dropout(0.2), name='Dropout_b')(X)
    X = TimeDistributed(Conv2D(32,(3,3)), name='Conv_3a')(X)
    X = TimeDistributed(MaxPool2D((2, 2)), name = "Pool_3")(X)
    
    X = TimeDistributed(Conv2D(16,(1,1)), name='Conv_1x1')(X)
    X = TimeDistributed(Flatten(), name='Flatten')(X)
    X = TimeDistributed(Dropout(0.3), name='Dropout_c')(X)
    
    X = Conv1D(96, 1, dilation_rate=2, name = 'Conv1Da')(X)
    X = Conv1D(64, 1, dilation_rate=4, name = 'Conv1Db')(X)
    X = Conv1D(32, 1, dilation_rate=4, name = 'Conv1Dc')(X)
    X = Lambda(lambda x : x[:, -1, :], name = "Extractoutput")(X)
    X = Dense(9, activation='softmax', name = 'Output')(X)
    return Model(X_input, X)

In [76]:
dcnn = dilated_conv((40, 172, 172, 3))

In [77]:
dcnn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_26 (InputLayer)        (None, 40, 172, 172, 3)   0         
_________________________________________________________________
BatchNorm_1 (TimeDistributed (None, 40, 172, 172, 3)   12        
_________________________________________________________________
Conv_1a (TimeDistributed)    (None, 40, 43, 43, 32)    4736      
_________________________________________________________________
Conv_1b (TimeDistributed)    (None, 40, 43, 43, 32)    9248      
_________________________________________________________________
Pool_1 (TimeDistributed)     (None, 40, 21, 21, 32)    0         
_________________________________________________________________
Dropout_a (TimeDistributed)  (None, 40, 21, 21, 32)    0         
_________________________________________________________________
Conv_2a (TimeDistributed)    (None, 40, 21, 21, 32)    9248      
__________

In [81]:
dcnn.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

## DATASET

In [78]:
def build_dataset_end_to_end(image_size, max_len = 40, stride = 10):
    
    X_train_images = []
    Y_train_images = []
    
    X_test_images = []
    Y_test_images = []
    
    test_videos = [['004', '011', '007'], ['006', '011'], ['007', '003'], \
                   ['003','001'], ['006', '012', '009'], ['004', '005'], ['008','002'], ['004', '012', '002'], ['001', '013', '006']]
    
    for i in range(len(classes)):
        cls = classes[i]
        test = test_videos[i] 
        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_train_images_class = np.array(X_train_images_class)        
            X_test_frames = []                                
            for k in range(0,len(X_train_images_class),stride):
                lower = k
                upper = min(len(X_train_images_class),k+max_len)
                if upper == len(X_train_images_class):
                    if vid not in test:                
                        X_train_images.append(pad(X_train_images_class[lower:upper],max_len))
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(pad(X_train_images_class[lower:upper],max_len))
                        X_test_images.append(np.array(X_test_frames))        
                        Y_test_images.append(i)
                    print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    if vid not in test:                
                        X_train_images.append(X_train_images_class[lower:upper])
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(X_train_images_class[lower:upper])
                    print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])
    return np.array(X_train_images,dtype=np.uint8),np.array(Y_train_images,dtype=np.uint8), np.array(X_test_images), np.array(Y_test_images)

In [79]:
X_train = None
X_test = None
Y_train = None
Y_test = None
try:
    X_train = np.load('../Numpy/End2End/X_train.npy')
    Y_train = np.load('../Numpy/End2End/Y_train.npy')
    X_test = np.load('../Numpy/End2End/X_test.npy')
    Y_test = np.load('../Numpy/End2End/Y_test.npy')
except FileNotFoundError:
    X_train, Y_train, X_test, Y_test = build_dataset_end_to_end((172, 172))
    np.save('../Numpy/End2End/X_train.npy', X_train)
    np.save('../Numpy/End2End/Y_train.npy', Y_train)
    np.save('../Numpy/End2End/X_test.npy', X_test)
    np.save('../Numpy/End2End/Y_test.npy', Y_test)

Padded frames 0 to 23
Processed 014 of class Kicking
Padded frames 0 to 22
Processed 009 of class Kicking
Padded frames 0 to 23
Processed 005 of class Kicking
Padded frames 0 to 23
Processed 011 of class Kicking
Padded frames 0 to 23
Processed 010 of class Kicking
Padded frames 0 to 23
Processed 003 of class Kicking
Padded frames 0 to 23
Processed 012 of class Kicking
Padded frames 0 to 23
Processed 006 of class Kicking
Padded frames 0 to 23
Processed 013 of class Kicking
Padded frames 0 to 23
Processed 004 of class Kicking
Padded frames 0 to 23
Processed 016 of class Kicking
Padded frames 0 to 23
Processed 001 of class Kicking
Padded frames 0 to 23
Processed 007 of class Kicking
Padded frames 0 to 23
Processed 002 of class Kicking
Padded frames 0 to 23
Processed 017 of class Kicking
Padded frames 0 to 23
Processed 015 of class Kicking
Added frames 0 to 40
Added frames 10 to 50
Padded frames 20 to 58
Processed 009 of class Riding-Horse
Added frames 0 to 40
Added frames 10 to 50
Padded 

Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 011 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 010 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 109
Processed 018 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 101
Processed 003 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Padded frames 20 to 60
Processed 012 of class Walking
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Ad

In [80]:
X_train.shape, X_test.shape,Y_train.shape, Y_test.shape

((345, 40, 172, 172, 3), (20,), (345,), (20,))

## TRAINING

In [83]:
dcnn.fit(X_train, convert_to_one_hot(Y_train, 9), batch_size=16, epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x7f0456a566a0>

## TESTING

In [84]:
evaluate(dcnn, X_test, Y_test, verbose = False)

15.0

In [None]:
def build_test_dataset(image_size, stride = 10, max_len = 40):
    
    X_test_images = []
    Y_test_images = []
    
    VIDEOS_DIR = '../Videos/'
    IMAGES_DIR = '../Images/'
    classes = ['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']
    videos = []
    for x in classes:
        videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
    
    for i in range(len(classes)):
        cls = classes[i]
        
        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_train_images_class = np.array(X_train_images_class)        
            X_test_frames = []                                
            for k in range(0,len(X_train_images_class),stride):
                lower = k
                upper = min(len(X_train_images_class),k+max_len)
                if upper == len(X_train_images_class):             
                    X_test_frames.append(pad(X_train_images_class[lower:upper],max_len))
                    X_test_images.append(np.array(X_test_frames))        
                    Y_test_images.append(i)
                    print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    X_test_frames.append(X_train_images_class[lower:upper])
                    print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])
    return np.array(X_test_images), np.array(Y_test_images)

In [None]:
X_test_full = None
Y_test_full = None
try:
    X_test_full = np.load('../Numpy/End2End/X_test_full_training.npy')
    Y_test_full = np.load('../Numpy/End2End/Y_test_full_training.npy')
except FileNotFoundError:
    X_test_full, Y_test_full = build_test_dataset((172,172))
    np.save('../Numpy/End2End/X_test_full_training.npy', X_test_full)
    np.save('../Numpy/End2End/Y_test_full_training.npy', Y_test_full)

In [None]:
X_test_unseen = None
Y_test_unseen = None
try:
    X_test_unseen = np.load('../Numpy/End2End/X_test_full_training.npy')
    Y_test_unseen = np.load('../Numpy/End2End/Y_test_full_training.npy')
except FileNotFoundError:
    X_test_unseen, Y_test_unseen = build_test_dataset((172,172))
    np.save('../Numpy/End2End/X_test_full_training.npy', X_test_unseen)
    np.save('../Numpy/End2End/Y_test_full_training.npy', Y_test_unseen)