In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import numpy as np
import os
from keras import backend as K
from keras.layers import Conv2D, Dropout, LSTM, BatchNormalization, Input,Activation, MaxPool2D, Flatten, Dense,TimeDistributed
from keras.models import Model, load_model
from keras import metrics 
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
VIDEOS_DIR = './Videos/'
IMAGES_DIR = './Images/'

In [3]:
classes = list(os.listdir(VIDEOS_DIR))
print(classes)

['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']


In [4]:
class_to_index = {}
for i in range(len(classes)):
    class_to_index[classes[i]] = i
class_to_index

{'Golf-Swing': 8,
 'Kicking': 0,
 'Lifting': 5,
 'Riding-Horse': 1,
 'Running': 2,
 'SkateBoarding': 3,
 'Swing-Bench': 4,
 'Swing-Side': 6,
 'Walking': 7}

In [5]:
videos = []
for x in classes:
    videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
print(videos)

[['006', '017', '003', '016', '009', '013', '005', '012', '010', '015', '014', '007', '004', '011', '002', '001'], ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'], ['006', '009', '005', '010', '007', '004', '002', '008', '001'], ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'], ['006', '017', '003', '016', '009', '013', '005', '012', '010', '015', '014', '007', '004', '011', '002', '008', '001'], ['003', '005', '004', '002', '001'], ['006', '003', '009', '005', '010', '007', '004', '011', '002', '008', '001'], ['006', '017', '003', '016', '009', '013', '005', '012', '010', '015', '014', '007', '019', '004', '011', '018', '002', '008', '001'], ['006', '003', '009', '013', '005', '012', '010', '014', '007', '004', '011', '002', '008', '001']]


### Function Block

In [76]:
def permute(X,Y):
    train_size = X.shape[0]
    permutation_train = np.random.permutation(train_size)
    X = X[permutation_train]
    Y = Y[permutation_train]
    return X,Y

def load_image(path,image_size):
    image = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, image_size)
    return image

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

def model_predict(model,images):
    output = K.function([model.layers[0].input,K.learning_phase()],
                        [model.layers[13].output])
    return output([images,0])[0]

def pad(X_cnn,max_len):
    features_len = X_cnn.shape[1]
    length = X_cnn.shape[0]
    X_cnn = list(X_cnn)
    pad_arr = [0 for i in range(features_len)]
    for i in range(max_len-length):
        X_cnn.append(pad_arr)
    return np.array(X_cnn)

def evaluate(X_test,Y_test,model):
    for i in range(len(X_test)):
        #print("---Start---")
        pred = model.predict(X_test[i])
        #print(pred)
        #print(pred.shape)
        max_pred = [np.argmax(i) for i in pred]
        #print(max_pred)
        #print("---End---")
        counts = np.bincount(max_pred)
        print("Pred",np.argmax(counts),"Actual",np.argmax(Y_test[i]))

### Build Dataset

In [51]:
def build_dataset_for_lstm_strided(image_size, stride = 10, max_len = 40):
    
    model = load_model('models/Conv/17epochs_valacc_94.h5')
    
    X_train_images = []
    Y_train_images = []
    X_test_images = []
    Y_test_images = []
    
    for i in range(len(classes)):
        cls = classes[i]
        
        test_video = random.randint(0,len(videos[i])-1)
        print("Selected Video for test is",videos[i][test_video])
        
        X_test_frames = []

        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_cnn = model_predict(model,np.array(X_train_images_class))
            print(X_cnn.shape)
            
            del X_train_images_class
                                                
            for k in range(0,X_cnn.shape[0],stride):
                lower = k
                upper = min(X_cnn.shape[0],k+max_len)
                if upper == X_cnn.shape[0]:
                    if j is not test_video:                
                        X_train_images.append(pad(X_cnn[lower:upper],max_len))
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(pad(X_cnn[lower:upper],max_len))
                    print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    if j is not test_video:                
                        X_train_images.append(X_cnn[lower:upper])
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(X_cnn[lower:upper])
                    print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])

        X_test_images.append(np.array(X_test_frames))        
        Y_test_images.append(i)
    return X_train_images,Y_train_images,X_test_images,Y_test_images

### Conv Model

In [8]:
def build_model(input_shape):
    X_input = Input(input_shape, name = "Input")
    
    X = BatchNormalization(name = 'BatchNorm_1')(X_input)
    X = Conv2D(32, (7, 7), strides = (5, 5), name="Conv_1a", padding="same")(X)
    X = Activation('relu')(X)
    
    X = Conv2D(32, (3, 3), name = "Conv_1b", padding="same")(X)
    X = Activation('relu')(X)
    X = MaxPool2D((2, 2), name = "Pool_1")(X)
    X = Dropout(0.2)(X)
    
    X = Conv2D(32, (3, 3), name ="Conv_2", padding = "same")(X)
    X = Activation('relu')(X)
    X = MaxPool2D((4, 4), name = "Pool_2")(X)
    X = Dropout(0.2)(X)
    
    X = Conv2D(8,(1,1), name='Conv_1x1')(X)
    
    X = Flatten()(X)
    X = Dropout(0.2)(X)
    
    return Model(X_input, X)

In [9]:
def CNN_model(prev_model):
    X = Dense(9,activation='softmax',name='final')(prev_model.output)
    return Model(prev_model.input, X)

In [10]:
model = build_model((172, 172, 3))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 172, 172, 3)       0         
_________________________________________________________________
BatchNorm_1 (BatchNormalizat (None, 172, 172, 3)       12        
_________________________________________________________________
Conv_1a (Conv2D)             (None, 35, 35, 32)        4736      
_________________________________________________________________
activation_1 (Activation)    (None, 35, 35, 32)        0         
_________________________________________________________________
Conv_1b (Conv2D)             (None, 35, 35, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 35, 35, 32)        0         
_________________________________________________________________
Pool_1 (MaxPooling2D)        (None, 17, 17, 32)        0         
__________

In [11]:
cnn = CNN_model(model)
cnn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 172, 172, 3)       0         
_________________________________________________________________
BatchNorm_1 (BatchNormalizat (None, 172, 172, 3)       12        
_________________________________________________________________
Conv_1a (Conv2D)             (None, 35, 35, 32)        4736      
_________________________________________________________________
activation_1 (Activation)    (None, 35, 35, 32)        0         
_________________________________________________________________
Conv_1b (Conv2D)             (None, 35, 35, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 35, 35, 32)        0         
_________________________________________________________________
Pool_1 (MaxPooling2D)        (None, 17, 17, 32)        0         
__________

In [12]:
cnn.compile(loss='categorical_crossentropy', 
            metrics=['accuracy'], 
            optimizer='adam')

### RNN Model

In [124]:
def rnn_model(input_shape):
    X_input = Input(input_shape)
    X = LSTM(32, return_sequences=True)(X_input)
    X = Dropout(0.3)(X)
    X = LSTM(32, return_sequences=False)(X)
    X = Dropout(0.3)(X)
    X = Dense(9,activation='softmax')(X)
    return Model(X_input, X)

In [125]:
rnn = rnn_model((40,128))
rnn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 40, 128)           0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 40, 32)            20608     
_________________________________________________________________
dropout_8 (Dropout)          (None, 40, 32)            0         
_________________________________________________________________
lstm_9 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dropout_9 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 9)                 297       
Total params: 29,225
Trainable params: 29,225
Non-trainable params: 0
_________________________________________________________________


In [126]:
rnn.compile(loss='categorical_crossentropy', 
            metrics=['accuracy'], 
            optimizer='adam')

### Data Loader

In [52]:
X_train = None
Y_train = None
X_test = None
Y_test = None
try:
    os.path.exists('Numpy/LSTM_Strided/train_X.npy')
    X_train = np.load('Numpy/LSTM_Strided/train_X.npy')
    Y_train = np.load('Numpy/LSTM_Strided/train_Y.npy')
    X_test = np.load('Numpy/LSTM_Strided/test_X.npy')
    Y_test = np.load('Numpy/LSTM_Strided/test_Y.npy')
except FileNotFoundError:
    X_train,Y_train,X_test,Y_test = build_dataset_for_lstm_strided((172,172))
    X_train = np.array(X_train)
    X_test = np.array(X_test)
    Y_train = convert_to_one_hot(np.array(Y_train),9)
    Y_test = convert_to_one_hot(np.array(Y_test),9)
    
    np.save('Numpy/LSTM_Strided/train_X.npy',X_train)
    np.save('Numpy/LSTM_Strided/train_Y.npy',Y_train)
    np.save('Numpy/LSTM_Strided/test_X.npy',X_test)
    np.save('Numpy/LSTM_Strided/test_Y.npy',Y_test)

Selected Video for test is 011
(23, 128)
Padded frames 0 to 23
Processed 006 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 017 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 003 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 016 of class Kicking
(22, 128)
Padded frames 0 to 22
Processed 009 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 013 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 005 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 012 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 010 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 015 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 014 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 007 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 004 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 011 of class Kicking
(23, 128)
Padded frames 0 to 23
Processed 002 of class Kicking
(23, 128)
Padded frames 

(75, 128)
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Padded frames 40 to 75
Processed 004 of class Swing-Side
(75, 128)
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Padded frames 40 to 75
Processed 011 of class Swing-Side
(75, 128)
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Padded frames 40 to 75
Processed 002 of class Swing-Side
(14, 128)
Padded frames 0 to 14
Processed 008 of class Swing-Side
(35, 128)
Padded frames 0 to 35
Processed 001 of class Swing-Side
Selected Video for test is 005
(102, 128)
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Added frames 40 to 80
Added frames 50 to 90
Added frames 60 to 100
Padded frames 70 to 102
Processed 006 of class Walking
(71, 128)
Added frames 0 to 40
Added frames 10 to 50
Added frames 20 to 60
Added frames 30 to 70
Padded frames 40 to 71
Processed 017 of class Walking
(101

In [53]:
print("Training")    
print("Shape X",X_train.shape)
print("Shape Y",Y_train.shape)
print()
print("Test")
print("Shape X",X_test.shape)
print("Shape Y",Y_test.shape)
X_train_rnn,Y_train_rnn = permute(X_train,Y_train)

Training
Shape X (381, 40, 128)
Shape Y (381, 9)

Test
Shape X (9,)
Shape Y (9, 9)


In [56]:
[i.shape for i in X_test]

[(1, 40, 128),
 (1, 40, 128),
 (4, 40, 128),
 (4, 40, 128),
 (2, 40, 128),
 (10, 40, 128),
 (5, 40, 128),
 (8, 40, 128),
 (3, 40, 128)]

In [128]:
history = rnn.fit(X_train_rnn, Y_train_rnn, epochs=50, batch_size = X_train_rnn.shape[0], validation_split=0.2)

Train on 304 samples, validate on 77 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [130]:
rnn.save('models/LSTM_Strided/100ep_valacc_94_double_LSTM_dropout.h5')

In [131]:
rnn = load_model('models/LSTM_Strided/500ep_valacc_97_single_LSTM.h5')

print(X_test.shape)
print(Y_test.shape)

evaluate(X_test,Y_test,rnn)

(9,)
(9, 9)
Pred 0 Actual 0
Pred 1 Actual 1
Pred 2 Actual 2
Pred 3 Actual 3
Pred 4 Actual 4
Pred 5 Actual 5
Pred 6 Actual 6
Pred 7 Actual 7
Pred 8 Actual 8
