In [2]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
from keras import backend as K
from keras.layers import Conv2D, Conv1D, Dropout, LSTM, BatchNormalization, Input,Activation, MaxPool2D, Flatten, Dense,TimeDistributed
from keras.models import Model, load_model
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from keras.layers.convolutional import ZeroPadding2D
from keras import metrics
import h5py
from sklearn.metrics import confusion_matrix

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
VIDEOS_DIR = '../Videos/'
IMAGES_DIR = '../Images/'
classes = []
class_to_index = {}
videos = []

classes = ['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']


for i in range(len(classes)):
    class_to_index[classes[i]] = i
class_to_index

for x in classes:
    videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
videos

[['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '004',
  '011',
  '002',
  '001'],
 ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '004',
  '011',
  '002',
  '008',
  '001'],
 ['003', '005', '004', '002', '001'],
 ['006', '003', '009', '005', '010', '007', '004', '011', '002', '008', '001'],
 ['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '019',
  '004',
  '011',
  '018',
  '002',
  '008',
  '001'],
 ['006',
  '003',
  '009',
  '013',
  '005',
  '012',
  '010',
  '014',
  '007',
  '004',
  '011',
  '002',
  '008',
  '001']]

In [4]:
def permute(X,Y):
    train_size = X.shape[0]
    permutation_train = np.random.permutation(train_size)
    X = X[permutation_train]
    Y = Y[permutation_train]
    return X,Y

def load_image(path,image_size):
    image = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, image_size)
    return image

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

def pad(X_train_images_class,max_len):
    length = len(X_train_images_class)
    pad_arr = np.zeros((X_train_images_class.shape[1:4]),dtype=np.uint8)
    X_train_images_class = list(X_train_images_class)
    for i in range(max_len-length):
        X_train_images_class.append(pad_arr)
    return np.array(X_train_images_class,dtype=np.uint8)

def predict(model,X,verbose=True):
    pred = model.predict(X)[0]
    max_pred = np.argmax(pred)
    if verbose:
        print("Max Preds time", max_pred)
    return max_pred

def evaluate(model, X_test,Y_test,verbose = True):
    count = 0
    preds = []
    for i in range(len(X_test)):
        class_pred = predict(model,X_test[i],verbose=verbose)
        preds.append(class_pred)
        actual = Y_test[i]
        if verbose:
            print("Pred",classes[class_pred],"Actual",classes[actual])
            print()
        if class_pred == actual:
            count += 1
    if verbose:
        print("Confusion Matrix")
        print(confusion_matrix(Y_test,preds))
    return float(count)/float(len(Y_test)) * 100.0

def model_predict(model,images):
    output = K.function([model.layers[0].input,K.learning_phase()],
                        [model.layers[13].output])
    return output([images,0])[0]

In [5]:
def build_dataset(image_size, stride = 10, max_len = 40):
    
    model = load_model('../models/Conv/17epochs_valacc_94.h5')
    
    X_train_images = []
    Y_train_images = []
    X_test_images = []
    Y_test_images = []
    
    test_videos = [['002', '009'], ['005', '010'], ['007'], \
                   ['003'], ['006', '012'], ['004'], ['008'], ['004', '012'], ['001', '013']]
    
    for i in range(len(classes)):
        cls = classes[i]
        #test_video = random.randint(0,len(videos[i])-1)
        test = test_videos[i] 
        #print("Selected Video for test is",[videos[i][test_video] for test_video in test])

        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_cnn = model_predict(model,np.array(X_train_images_class))
            #print(X_cnn.shape)
            
            del X_train_images_class
            X_test_frames = []                                
            for k in range(0,X_cnn.shape[0],stride):
                lower = k
                upper = min(X_cnn.shape[0],k+max_len)
                if upper == X_cnn.shape[0]:
                    if vid not in test:                
                        X_train_images.append(pad(X_cnn[lower:upper],max_len))
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(pad(X_cnn[lower:upper],max_len))
                        X_test_images.append(np.array(X_test_frames))        
                        Y_test_images.append(i)
                    #print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    if vid not in test:                
                        X_train_images.append(X_cnn[lower:upper])
                        Y_train_images.append(i)
                    else:
                        X_test_frames.append(X_cnn[lower:upper])
                    #print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])

        #X_test_images.append(np.array(X_test_frames))        
        #Y_test_images.append(i)
    return np.array(X_train_images),np.array(Y_train_images),np.array(X_test_images),np.array(Y_test_images)

In [6]:
X_train = None
X_test = None
Y_train = None
Y_test = None
try:
    X_train = np.load('../Numpy/ConvMLP/X_train.npy')
    Y_train = np.load('../Numpy/ConvMLP/Y_train.npy')
    X_test = np.load('../Numpy/ConvMLP/X_test.npy')
    Y_test = np.load('../Numpy/ConvMLP/Y_test.npy')
except FileNotFoundError:
    X_train, Y_train, X_test, Y_test = build_dataset((172, 172))
    np.save('../Numpy/ConvMLP/X_train.npy', X_train)
    np.save('../Numpy/ConvMLP/Y_train.npy', Y_train)
    np.save('../Numpy/ConvMLP/X_test.npy', X_test)
    np.save('../Numpy/ConvMLP/Y_test.npy', Y_test)

In [7]:
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)

In [8]:
Y_train.shape

(375,)

In [9]:
X_train.shape

(375, 40, 128)

In [22]:
def mlp_model(input_shape):
    X_input = Input(input_shape)
    
    X = Conv1D(16, 3, dilation_rate=1, name = 'Conv1a', activation='relu')(X_input)
    X = Dropout(0.2)(X)
    X = Conv1D(32, 5, dilation_rate=2, name = 'Conv1b', activation='relu')(X)
    X = Dropout(0.2)(X)
    X = Conv1D(64, 7, dilation_rate=4, name = 'Conv1c', activation='relu')(X)
    X = Dropout(0.2)(X)
    
    X = Flatten()(X)
    
    X = Dense(32,activation='relu')(X)
    X = Dropout(0.3)(X)
    X = Dense(16,activation='relu')(X)
    X = Dropout(0.3)(X)
    
    X = Dense(9,activation='softmax')(X)
    return Model(X_input, X)

In [23]:
mlp = mlp_model((40,128))

In [24]:
mlp.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 40, 128)           0         
_________________________________________________________________
Conv1a (Conv1D)              (None, 38, 16)            6160      
_________________________________________________________________
dropout_10 (Dropout)         (None, 38, 16)            0         
_________________________________________________________________
Conv1b (Conv1D)              (None, 30, 32)            2592      
_________________________________________________________________
dropout_11 (Dropout)         (None, 30, 32)            0         
_________________________________________________________________
Conv1c (Conv1D)              (None, 6, 64)             14400     
_________________________________________________________________
dropout_12 (Dropout)         (None, 6, 64)             0         
__________

In [25]:
mlp.compile(loss='categorical_crossentropy', 
            metrics=['accuracy'], 
            optimizer='adam')

In [26]:
X_train,Y_train = permute(X_train,Y_train)

In [None]:
es = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=2, mode='auto')
mlp.fit(X_train, convert_to_one_hot(Y_train,9), 
              epochs = 500, 
              batch_size = X_train.shape[0], 
              validation_split = 0.2,
              verbose = 2,
              callbacks = [])

Train on 300 samples, validate on 75 samples
Epoch 1/500
 - 1s - loss: 4.9484 - acc: 0.1067 - val_loss: 5.2359 - val_acc: 0.1200
Epoch 2/500
 - 0s - loss: 4.7894 - acc: 0.1533 - val_loss: 5.0238 - val_acc: 0.1733
Epoch 3/500
 - 0s - loss: 4.7911 - acc: 0.1600 - val_loss: 5.2724 - val_acc: 0.2667
Epoch 4/500
 - 0s - loss: 4.7449 - acc: 0.1967 - val_loss: 5.4634 - val_acc: 0.3067
Epoch 5/500
 - 0s - loss: 4.7272 - acc: 0.2000 - val_loss: 5.4612 - val_acc: 0.3067
Epoch 6/500
 - 0s - loss: 4.6900 - acc: 0.2033 - val_loss: 5.3918 - val_acc: 0.3067
Epoch 7/500
 - 0s - loss: 4.6026 - acc: 0.1933 - val_loss: 5.3525 - val_acc: 0.3067
Epoch 8/500
 - 0s - loss: 4.6195 - acc: 0.2167 - val_loss: 5.3160 - val_acc: 0.3067
Epoch 9/500
 - 0s - loss: 4.8183 - acc: 0.2233 - val_loss: 5.2099 - val_acc: 0.3067
Epoch 10/500
 - 0s - loss: 4.5604 - acc: 0.2367 - val_loss: 5.0562 - val_acc: 0.3067
Epoch 11/500
 - 0s - loss: 4.5308 - acc: 0.2733 - val_loss: 4.8839 - val_acc: 0.3333
Epoch 12/500
 - 0s - loss: 4.

Epoch 97/500
 - 0s - loss: 1.7349 - acc: 0.3833 - val_loss: 1.9380 - val_acc: 0.4400
Epoch 98/500
 - 0s - loss: 1.7254 - acc: 0.4233 - val_loss: 1.9565 - val_acc: 0.4400
Epoch 99/500
 - 0s - loss: 1.8330 - acc: 0.4000 - val_loss: 1.9646 - val_acc: 0.4400
Epoch 100/500
 - 0s - loss: 1.7487 - acc: 0.4400 - val_loss: 1.9767 - val_acc: 0.4400
Epoch 101/500
 - 0s - loss: 1.7855 - acc: 0.3700 - val_loss: 1.9917 - val_acc: 0.4267
Epoch 102/500
 - 0s - loss: 1.7135 - acc: 0.4267 - val_loss: 2.0035 - val_acc: 0.4267
Epoch 103/500
 - 0s - loss: 1.6566 - acc: 0.4300 - val_loss: 2.0071 - val_acc: 0.4400
Epoch 104/500
 - 0s - loss: 1.6760 - acc: 0.4133 - val_loss: 2.0093 - val_acc: 0.4533
Epoch 105/500
 - 0s - loss: 1.6530 - acc: 0.3833 - val_loss: 2.0113 - val_acc: 0.4667
Epoch 106/500
 - 0s - loss: 1.6609 - acc: 0.3833 - val_loss: 1.9878 - val_acc: 0.4667
Epoch 107/500
 - 0s - loss: 1.6874 - acc: 0.4000 - val_loss: 1.9665 - val_acc: 0.4667
Epoch 108/500
 - 0s - loss: 1.6111 - acc: 0.4300 - val_lo

Epoch 193/500
 - 0s - loss: 1.2444 - acc: 0.5333 - val_loss: 1.8147 - val_acc: 0.4800
Epoch 194/500
 - 0s - loss: 1.2221 - acc: 0.5533 - val_loss: 1.8150 - val_acc: 0.4800
Epoch 195/500
 - 0s - loss: 1.1941 - acc: 0.5567 - val_loss: 1.8165 - val_acc: 0.4800
Epoch 196/500
 - 0s - loss: 1.2423 - acc: 0.5533 - val_loss: 1.8191 - val_acc: 0.4800
Epoch 197/500
 - 0s - loss: 1.2444 - acc: 0.5367 - val_loss: 1.8232 - val_acc: 0.4800
Epoch 198/500
 - 0s - loss: 1.2521 - acc: 0.5300 - val_loss: 1.8253 - val_acc: 0.4800
Epoch 199/500
 - 0s - loss: 1.2114 - acc: 0.5433 - val_loss: 1.8318 - val_acc: 0.4800
Epoch 200/500
 - 0s - loss: 1.2321 - acc: 0.5567 - val_loss: 1.8378 - val_acc: 0.4800
Epoch 201/500
 - 0s - loss: 1.2349 - acc: 0.5433 - val_loss: 1.8472 - val_acc: 0.4800
Epoch 202/500
 - 0s - loss: 1.2393 - acc: 0.5533 - val_loss: 1.8562 - val_acc: 0.4800
Epoch 203/500
 - 0s - loss: 1.2419 - acc: 0.5500 - val_loss: 1.8652 - val_acc: 0.4800
Epoch 204/500
 - 0s - loss: 1.2377 - acc: 0.5333 - val

Epoch 289/500
 - 0s - loss: 1.0804 - acc: 0.5900 - val_loss: 2.1020 - val_acc: 0.5067
Epoch 290/500
 - 0s - loss: 1.0628 - acc: 0.6133 - val_loss: 2.1034 - val_acc: 0.4933
Epoch 291/500
 - 0s - loss: 1.0795 - acc: 0.6033 - val_loss: 2.1000 - val_acc: 0.4933
Epoch 292/500
 - 0s - loss: 1.0396 - acc: 0.5900 - val_loss: 2.0983 - val_acc: 0.5067
Epoch 293/500
 - 0s - loss: 1.0649 - acc: 0.6267 - val_loss: 2.0969 - val_acc: 0.5067
Epoch 294/500
 - 0s - loss: 1.0639 - acc: 0.5733 - val_loss: 2.0953 - val_acc: 0.5067
Epoch 295/500
 - 0s - loss: 1.0641 - acc: 0.6133 - val_loss: 2.0944 - val_acc: 0.5067
Epoch 296/500
 - 0s - loss: 1.0840 - acc: 0.5867 - val_loss: 2.0912 - val_acc: 0.5067
Epoch 297/500
 - 0s - loss: 1.0618 - acc: 0.5967 - val_loss: 2.0871 - val_acc: 0.5200
Epoch 298/500
 - 0s - loss: 1.1194 - acc: 0.5467 - val_loss: 2.0835 - val_acc: 0.5067
Epoch 299/500
 - 0s - loss: 1.0549 - acc: 0.6067 - val_loss: 2.0830 - val_acc: 0.5067
Epoch 300/500
 - 0s - loss: 1.0512 - acc: 0.5900 - val

Epoch 385/500
 - 0s - loss: 0.8518 - acc: 0.6700 - val_loss: 2.5052 - val_acc: 0.4933
Epoch 386/500
 - 0s - loss: 0.8635 - acc: 0.6633 - val_loss: 2.5183 - val_acc: 0.4933
Epoch 387/500
 - 0s - loss: 0.8617 - acc: 0.6900 - val_loss: 2.5294 - val_acc: 0.5067
Epoch 388/500
 - 0s - loss: 0.8643 - acc: 0.6800 - val_loss: 2.5343 - val_acc: 0.5067
Epoch 389/500
 - 0s - loss: 0.8785 - acc: 0.6500 - val_loss: 2.5358 - val_acc: 0.5067
Epoch 390/500
 - 0s - loss: 0.8921 - acc: 0.6667 - val_loss: 2.5318 - val_acc: 0.4933
Epoch 391/500
 - 0s - loss: 0.8540 - acc: 0.7000 - val_loss: 2.5295 - val_acc: 0.4933
Epoch 392/500
 - 0s - loss: 0.8232 - acc: 0.6933 - val_loss: 2.5272 - val_acc: 0.4933
Epoch 393/500
 - 0s - loss: 0.8154 - acc: 0.7133 - val_loss: 2.5269 - val_acc: 0.4933
Epoch 394/500
 - 0s - loss: 0.8399 - acc: 0.6833 - val_loss: 2.5321 - val_acc: 0.4933
Epoch 395/500
 - 0s - loss: 0.8359 - acc: 0.6900 - val_loss: 2.5381 - val_acc: 0.4933
Epoch 396/500
 - 0s - loss: 0.8815 - acc: 0.6800 - val

In [273]:
X_test.shape
evaluate(mlp,X_test,Y_test)

Max Preds time 0
Pred Kicking Actual Kicking

Max Preds time 0
Pred Kicking Actual Kicking

Max Preds time 1
Pred Riding-Horse Actual Riding-Horse

Max Preds time 7
Pred Walking Actual Riding-Horse

Max Preds time 8
Pred Golf-Swing Actual Running

Max Preds time 3
Pred SkateBoarding Actual SkateBoarding

Max Preds time 4
Pred Swing-Bench Actual Swing-Bench

Max Preds time 4
Pred Swing-Bench Actual Swing-Bench

Max Preds time 5
Pred Lifting Actual Lifting

Max Preds time 0
Pred Kicking Actual Swing-Side

Max Preds time 7
Pred Walking Actual Walking

Max Preds time 7
Pred Walking Actual Walking

Max Preds time 0
Pred Kicking Actual Golf-Swing

Max Preds time 8
Pred Golf-Swing Actual Golf-Swing

Confusion Matrix
[[2 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 1 0 0 0]
 [1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 2 0]
 [1 0 0 0 0 0 0 0 1]]


71.42857142857143

In [77]:
def build_dataset_unseen(image_size, stride = 10, max_len = 40):
    
    model = load_model('../models/Conv/17epochs_valacc_94.h5')
    
    X = []
    Y = []
    
    VIDEOS_DIR = '../UCF_Unseen/'
    IMAGES_DIR = '../UCF_Images/'
    
    for i in range(len(classes)):
        cls = classes[i]
        
        videos = list(os.listdir(VIDEOS_DIR+cls+'/'))

        for j in range(len(videos)):
            vid = videos[j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_cnn = model_predict(model,np.array(X_train_images_class))
            
            del X_train_images_class
            X_test_frames = []                                
            for k in range(0,X_cnn.shape[0],stride):
                lower = k
                upper = min(X_cnn.shape[0],k+max_len)
                if upper == X_cnn.shape[0]:
                    X.append(pad(X_cnn[lower:upper],max_len))
                    Y.append(i)        
                    break
                else:
                    X.append(X_cnn[lower:upper])
                    Y.append(i)

            print("Processed",vid,"of","class",classes[i])

        #X_test_images.append(np.array(X_test_frames))        
        #Y_test_images.append(i)
    return np.array(X),np.array(Y)

In [249]:
X,Y = build_dataset_unseen((172,172))

Processed 006 of class Kicking
Processed 010 of class Kicking
Processed 004 of class Kicking
Processed 008 of class Kicking
Processed 012 of class Riding-Horse
Processed 011 of class Riding-Horse
Processed 012 of class Running
Processed 012 of class SkateBoarding
Processed 011 of class SkateBoarding
Processed 020 of class Swing-Bench
Processed 019 of class Swing-Bench
Processed 018 of class Swing-Bench
Processed 006 of class Lifting
Processed 013 of class Swing-Side
Processed 012 of class Swing-Side
Processed 022 of class Walking
Processed 020 of class Walking
Processed 021 of class Walking
Processed 009 of class Golf-Swing
Processed 005 of class Golf-Swing
Processed 007 of class Golf-Swing
Processed 008 of class Golf-Swing


In [145]:
X.shape

(75, 40, 128)

In [266]:
res = mlp.evaluate(x=X,y=convert_to_one_hot(Y,9))
print(res[1])

0.8
