In [24]:
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import numpy as np
import os
from keras import backend as K
from keras.layers import Conv2D, Dropout, LSTM, BatchNormalization, Input,Activation, MaxPool2D, Flatten, Dense,TimeDistributed
from keras.models import Model, load_model
from keras import metrics 
import random
import pickle

In [3]:
VIDEOS_DIR = './Videos/'
IMAGES_DIR = './Images/'

In [4]:
classes = ['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']
classes

['Kicking',
 'Riding-Horse',
 'Running',
 'SkateBoarding',
 'Swing-Bench',
 'Lifting',
 'Swing-Side',
 'Walking',
 'Golf-Swing']

In [5]:
class_to_index = {}
for i in range(len(classes)):
    class_to_index[classes[i]] = i
class_to_index

{'Golf-Swing': 8,
 'Kicking': 0,
 'Lifting': 5,
 'Riding-Horse': 1,
 'Running': 2,
 'SkateBoarding': 3,
 'Swing-Bench': 4,
 'Swing-Side': 6,
 'Walking': 7}

In [6]:
videos = []
for x in classes:
    videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
videos

[['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '004',
  '011',
  '002',
  '001'],
 ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '004',
  '011',
  '002',
  '008',
  '001'],
 ['003', '005', '004', '002', '001'],
 ['006', '003', '009', '005', '010', '007', '004', '011', '002', '008', '001'],
 ['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '019',
  '004',
  '011',
  '018',
  '002',
  '008',
  '001'],
 ['006',
  '003',
  '009',
  '013',
  '005',
  '012',
  '010',
  '014',
  '007',
  '004',
  '011',
  '002',
  '008',
  '001']]

### Function Block

In [7]:
def permute(X,Y):
    train_size = X.shape[0]
    permutation_train = np.random.permutation(train_size)
    X = X[permutation_train]
    Y = Y[permutation_train]
    return X,Y

def load_image(path,image_size):
    image = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, image_size)
    return image

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

def model_predict(model,images):
    output = K.function([model.layers[0].input,K.learning_phase()],
                        [model.layers[13].output])
    return output([images,0])[0]

def pad(X_cnn,max_len):
    features_len = X_cnn.shape[1]
    length = X_cnn.shape[0]
    X_cnn = list(X_cnn)
    pad_arr = [0 for i in range(features_len)]
    for i in range(max_len-length):
        X_cnn.append(pad_arr)
    return np.array(X_cnn)

def evaluate(X_test,Y_test,model):
    count = 0
    for i in range(len(X_test)):
        pred = model.predict(X_test[i])
        max_pred = [np.argmax(i) for i in pred]
        counts = np.bincount(max_pred)
        class_pred = np.argmax(counts)
        #class_pred = max_pred
        actual = np.argmax(Y_test[i])
        #print("Max Preds time", max_pred)
        #print("Pred",classes[class_pred],"Actual",classes[actual])
        #print()
        if class_pred == actual:
            count += 1
    return float(count)/float(len(Y_test)) * 100.0 

### Build Dataset

In [8]:
def build_dataset_for_lstm_strided(image_size, stride = 10, max_len = 40):
    
    model = load_model('models/Conv/17epochs_valacc_94.h5')
    
    X_train_images = []
    Y_train_images = []
    X_test_images = []
    Y_test_images = []
    
    test_videos = [['002', '009'], ['005', '010'], ['007'], \
                   ['003'], ['006', '012'], ['004'], ['008'], ['004', '012'], ['001', '013']]
    
    for i in range(len(classes)):
        cls = classes[i]
        #test_video = random.randint(0,len(videos[i])-1)
        test = test_videos[i] 
        #print("Selected Video for test is",[videos[i][test_video] for test_video in test])

        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_cnn = model_predict(model,np.array(X_train_images_class))
            #print(X_cnn.shape)
            
            del X_train_images_class
            X_test_frames = []                                
            for k in range(0,X_cnn.shape[0],stride):
                lower = k
                upper = min(X_cnn.shape[0],k+max_len)
                if upper == X_cnn.shape[0]:
                    if vid not in test:                
                        X_train_images.append(pad(X_cnn[lower:upper],max_len))
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(pad(X_cnn[lower:upper],max_len))
                        X_test_images.append(np.array(X_test_frames))        
                        Y_test_images.append(i)
                    #print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    if vid not in test:                
                        X_train_images.append(X_cnn[lower:upper])
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(X_cnn[lower:upper])
                    #print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])

        #X_test_images.append(np.array(X_test_frames))        
        #Y_test_images.append(i)
    return X_train_images,Y_train_images,X_test_images,Y_test_images

### Conv Model

In [9]:
def build_model(input_shape):
    X_input = Input(input_shape, name = "Input")
    
    X = BatchNormalization(name = 'BatchNorm_1')(X_input)
    X = Conv2D(32, (7, 7), strides = (5, 5), name="Conv_1a", padding="same")(X)
    X = Activation('relu')(X)
    
    X = Conv2D(32, (3, 3), name = "Conv_1b", padding="same")(X)
    X = Activation('relu')(X)
    X = MaxPool2D((2, 2), name = "Pool_1")(X)
    X = Dropout(0.2)(X)
    
    X = Conv2D(32, (3, 3), name ="Conv_2", padding = "same")(X)
    X = Activation('relu')(X)
    X = MaxPool2D((4, 4), name = "Pool_2")(X)
    X = Dropout(0.2)(X)
    
    X = Conv2D(8,(1,1), name='Conv_1x1')(X)
    
    X = Flatten()(X)
    X = Dropout(0.2)(X)
    
    return Model(X_input, X)

In [10]:
def CNN_model(prev_model):
    X = Dense(9,activation='softmax',name='final')(prev_model.output)
    return Model(prev_model.input, X)

In [11]:
model = build_model((172, 172, 3))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 172, 172, 3)       0         
_________________________________________________________________
BatchNorm_1 (BatchNormalizat (None, 172, 172, 3)       12        
_________________________________________________________________
Conv_1a (Conv2D)             (None, 35, 35, 32)        4736      
_________________________________________________________________
activation_1 (Activation)    (None, 35, 35, 32)        0         
_________________________________________________________________
Conv_1b (Conv2D)             (None, 35, 35, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 35, 35, 32)        0         
_________________________________________________________________
Pool_1 (MaxPooling2D)        (None, 17, 17, 32)        0         
__________

In [12]:
cnn = CNN_model(model)
cnn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 172, 172, 3)       0         
_________________________________________________________________
BatchNorm_1 (BatchNormalizat (None, 172, 172, 3)       12        
_________________________________________________________________
Conv_1a (Conv2D)             (None, 35, 35, 32)        4736      
_________________________________________________________________
activation_1 (Activation)    (None, 35, 35, 32)        0         
_________________________________________________________________
Conv_1b (Conv2D)             (None, 35, 35, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 35, 35, 32)        0         
_________________________________________________________________
Pool_1 (MaxPooling2D)        (None, 17, 17, 32)        0         
__________

In [13]:
cnn.compile(loss='categorical_crossentropy', 
            metrics=['accuracy'], 
            optimizer='adam')

### RNN Model

In [14]:
def rnn_model(input_shape):
    X_input = Input(input_shape)
    X = LSTM(32, return_sequences=True)(X_input)
    X = Dropout(0.3)(X)
    X = LSTM(32, return_sequences=False)(X)
    X = Dropout(0.3)(X)
    X = Dense(9,activation='softmax')(X)
    return Model(X_input, X)

In [15]:
rnn = rnn_model((30,128))
rnn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 30, 128)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 32)            20608     
_________________________________________________________________
dropout_4 (Dropout)          (None, 30, 32)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dropout_5 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 9)                 297       
Total params: 29,225
Trainable params: 29,225
Non-trainable params: 0
_________________________________________________________________


In [16]:
rnn.compile(loss='categorical_crossentropy', 
            metrics=['accuracy'], 
            optimizer='adam')

### Data Loader

In [17]:
def data_loader(stride=10, max_len=40):
    X_train = None
    Y_train = None
    X_test = None
    Y_test = None
    try:
        X_train = np.load('Numpy/LSTM_Strided/train_X_'+str(stride)+'_'+str(max_len)+'.npy')
        Y_train = np.load('Numpy/LSTM_Strided/train_Y_'+str(stride)+'_'+str(max_len)+'.npy')
        X_test = np.load('Numpy/LSTM_Strided/test_X_'+str(stride)+'_'+str(max_len)+'.npy')
        Y_test = np.load('Numpy/LSTM_Strided/test_Y_'+str(stride)+'_'+str(max_len)+'.npy')
    except FileNotFoundError:
        X_train,Y_train,X_test,Y_test = build_dataset_for_lstm_strided((172,172), stride, max_len)
        X_train = np.array(X_train)
        X_test = np.array(X_test)
        Y_train = convert_to_one_hot(np.array(Y_train),9)
        Y_test = convert_to_one_hot(np.array(Y_test),9)

        np.save('Numpy/LSTM_Strided/train_X_'+str(stride)+'_'+str(max_len)+'.npy',X_train)
        np.save('Numpy/LSTM_Strided/train_Y_'+str(stride)+'_'+str(max_len)+'.npy',Y_train)
        np.save('Numpy/LSTM_Strided/test_X_'+str(stride)+'_'+str(max_len)+'.npy',X_test)
        np.save('Numpy/LSTM_Strided/test_Y_'+str(stride)+'_'+str(max_len)+'.npy',Y_test)
    
    print("Training")    
    print("Shape X",X_train.shape)
    print("Shape Y",Y_train.shape)
    print()
    print("Test")
    print("Shape X",X_test.shape)
    print("Shape Y",Y_test.shape)
    return X_train, Y_train, X_test, Y_test

In [19]:
X_train, Y_train, X_test, Y_test = data_loader(stride=5, max_len=40)

Processed 006 of class Kicking
Processed 017 of class Kicking
Processed 003 of class Kicking
Processed 016 of class Kicking
Processed 009 of class Kicking
Processed 013 of class Kicking
Processed 005 of class Kicking
Processed 012 of class Kicking
Processed 010 of class Kicking
Processed 015 of class Kicking
Processed 014 of class Kicking
Processed 007 of class Kicking
Processed 004 of class Kicking
Processed 011 of class Kicking
Processed 002 of class Kicking
Processed 001 of class Kicking
Processed 006 of class Riding-Horse
Processed 003 of class Riding-Horse
Processed 009 of class Riding-Horse
Processed 005 of class Riding-Horse
Processed 010 of class Riding-Horse
Processed 007 of class Riding-Horse
Processed 004 of class Riding-Horse
Processed 002 of class Riding-Horse
Processed 008 of class Riding-Horse
Processed 001 of class Riding-Horse
Processed 006 of class Running
Processed 009 of class Running
Processed 005 of class Running
Processed 010 of class Running
Processed 007 of cla

In [20]:
X_train_rnn,Y_train_rnn = permute(X_train,Y_train)

In [21]:
[i.shape for i in X_test]

[(1, 40, 128),
 (1, 40, 128),
 (5, 40, 128),
 (1, 40, 128),
 (6, 40, 128),
 (7, 40, 128),
 (3, 40, 128),
 (3, 40, 128),
 (18, 40, 128),
 (1, 40, 128),
 (5, 40, 128),
 (14, 40, 128),
 (1, 40, 128),
 (5, 40, 128)]

## Training

In [50]:
def search():
    results = {}
    histories = {}
    strides = [5,7,11,13]
    lengths = [20,30,40,50]
    for stride in strides:
        for length in lengths:
            X_train, Y_train, X_test, Y_test = data_loader(stride, length)
            X_train_rnn,Y_train_rnn = permute(X_train,Y_train)
            
            rnn = None
            try:
                rnn = load_model('models/LSTM_Strided/100ep_double_LSTM_dropout_'+ str(stride) + '_' + str(length)+'.h5')
                histories[str(stride) + '_' + str(length)] = 'Trained'
            except FileNotFoundError:
                rnn = rnn_model((length,128))
                rnn.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
                histories[str(stride) + '_' + str(length)] = rnn.fit(X_train_rnn, Y_train_rnn, epochs=100, batch_size = X_train_rnn.shape[0], validation_split=0.2)
                rnn.save('models/LSTM_Strided/100ep_double_LSTM_dropout_'+ str(stride) + '_' + str(length)+'.h5')
            results[str(stride) + '_' + str(length)] = evaluate(X_test, Y_test, rnn)
    return results, histories

In [51]:
histories = None
results = None
try:
    os.path.exists('results.pkl')
    with open("results.pkl", "rb") as f:
        results = pickle.load(f)
    with open("histories.pkl", "rb") as f:
        histories  = pickle.load(f)    
except:    
    results, histories = search()
    with open("results.pkl", "wb") as f:
        pickle.dump(results, f)
    with open("histories.pkl", "wb") as f:
        hist_pickle = {}
        for key in histories:
            hist_pickle[key] = histories[key].history
        pickle.dump(hist_pickle,f)      

Training
Shape X (957, 20, 128)
Shape Y (957, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (781, 30, 128)
Shape Y (781, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (620, 40, 128)
Shape Y (620, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (464, 50, 128)
Shape Y (464, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (745, 20, 128)
Shape Y (745, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (611, 30, 128)
Shape Y (611, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (489, 40, 128)
Shape Y (489, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (382, 50, 128)
Shape Y (382, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (525, 20, 128)
Shape Y (525, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (437, 30, 128)
Shape Y (437, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (356, 40, 128)
Shape Y (356, 9)

Test
Shape X (14,)
Shape Y (14, 9)
Training
Shape X (279, 50, 128)
Shape Y (279, 9)

Test
Shape X (1

AttributeError: 'str' object has no attribute 'history'

In [53]:
results

{'11_20': 78.57142857142857,
 '11_30': 64.28571428571429,
 '11_40': 71.42857142857143,
 '11_50': 71.42857142857143,
 '13_20': 78.57142857142857,
 '13_30': 64.28571428571429,
 '13_40': 64.28571428571429,
 '13_50': 64.28571428571429,
 '5_20': 71.42857142857143,
 '5_30': 57.14285714285714,
 '5_40': 71.42857142857143,
 '5_50': 64.28571428571429,
 '7_20': 78.57142857142857,
 '7_30': 57.14285714285714,
 '7_40': 57.14285714285714,
 '7_50': 71.42857142857143}

## Testing

In [30]:
def build_test_dataset(image_size, stride = 10, max_len = 40):
    
    model = load_model('models/Conv/17epochs_valacc_94.h5')
    
    X_test_images = []
    Y_test_images = []
    
    VIDEOS_DIR = './UCF_Unseen/'
    IMAGES_DIR = './UCF_Images/'
    
    videos = []
    for x in classes:
        videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
    
    for i in range(len(classes)):
        cls = classes[i]

        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_cnn = model_predict(model,np.array(X_train_images_class))
            print(X_cnn.shape)
            
            del X_train_images_class
            X_test_frames = []                                
            for k in range(0,X_cnn.shape[0],stride):
                lower = k
                upper = min(X_cnn.shape[0],k+max_len)
                if upper == X_cnn.shape[0]:
                    X_test_frames.append(pad(X_cnn[lower:upper],max_len))
                    X_test_images.append(np.array(X_test_frames))        
                    Y_test_images.append(i)
                    print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    X_test_frames.append(X_cnn[lower:upper])
                    print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])

    return X_test_images,Y_test_images

In [32]:
X_test,Y_test = build_test_dataset((172,172))

(22, 128)
Padded frames 0 to 22
Processed 010 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 006 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 004 of class Kicking
(20, 128)
Padded frames 0 to 20
Processed 008 of class Kicking
(58, 128)
Added frames 0 to 40
Added frames 10 to 50
Padded frames 20 to 58
Processed 011 of class Riding-Horse
(36, 128)
Padded frames 0 to 36
Processed 012 of class Riding-Horse
(65, 128)
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Padded frames 30 to 65
Processed 012 of class Running
(70, 128)
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Padded frames 30 to 70
Processed 011 of class SkateBoarding
(70, 128)
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Padded frames 30 to 70
Processed 012 of class SkateBoarding
(50, 128)
Added frames 0 to 40
Padded frames 10 to 50
Processed 018 of class Swing-Bench
(50, 128)
Added frames 0 to 40
Padded frames 10 to 50
Processed 020 of class Swi

In [34]:
X_test = np.array(X_test)
[i.shape for i in X_test]

[(1, 40, 128),
 (1, 40, 128),
 (1, 40, 128),
 (1, 40, 128),
 (3, 40, 128),
 (1, 40, 128),
 (4, 40, 128),
 (4, 40, 128),
 (4, 40, 128),
 (2, 40, 128),
 (2, 40, 128),
 (2, 40, 128),
 (10, 40, 128),
 (5, 40, 128),
 (1, 40, 128),
 (8, 40, 128),
 (5, 40, 128),
 (8, 40, 128),
 (3, 40, 128),
 (3, 40, 128),
 (3, 40, 128),
 (3, 40, 128)]

In [37]:
Y_test = convert_to_one_hot(np.array(Y_test), 9)
Y_test.shape

(22, 9)

In [49]:
evaluate(X_test,Y_test,rnn)

Max Preds time [0]
Pred Kicking Actual Kicking
Max Preds time [0]
Pred Kicking Actual Kicking
Max Preds time [2]
Pred Running Actual Kicking
Max Preds time [7]
Pred Walking Actual Kicking
Max Preds time [1, 1, 1]
Pred Riding-Horse Actual Riding-Horse
Max Preds time [1]
Pred Riding-Horse Actual Riding-Horse
Max Preds time [8, 8, 8, 8]
Pred Golf-Swing Actual Running
Max Preds time [0, 3, 3, 3]
Pred SkateBoarding Actual SkateBoarding
Max Preds time [2, 2, 2, 2]
Pred Running Actual SkateBoarding
Max Preds time [4, 4]
Pred Swing-Bench Actual Swing-Bench
Max Preds time [4, 4]
Pred Swing-Bench Actual Swing-Bench
Max Preds time [4, 4]
Pred Swing-Bench Actual Swing-Bench
Max Preds time [5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
Pred Lifting Actual Lifting
Max Preds time [6, 6, 6, 6, 6]
Pred Swing-Side Actual Swing-Side
Max Preds time [6]
Pred Swing-Side Actual Swing-Side
Max Preds time [7, 7, 7, 7, 7, 7, 7, 7]
Pred Walking Actual Walking
Max Preds time [7, 7, 7, 7, 7]
Pred Walking Actual Walking
Max Preds 

81.81818181818183