In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
from keras import backend as K
from keras.layers import Conv2D, Conv1D, Dropout, LSTM, BatchNormalization, Input,Activation, MaxPool2D, Flatten 
from keras.layers import Dense,TimeDistributed, Lambda
from keras.models import Model, load_model
from keras.layers.convolutional import ZeroPadding2D
from keras import metrics
from keras.callbacks import ModelCheckpoint
import h5py
from sklearn.metrics import confusion_matrix

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
VIDEOS_DIR = '../Videos/'
IMAGES_DIR = '../Images/'
classes = []
class_to_index = {}
videos = []

classes = ['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']


for i in range(len(classes)):
    class_to_index[classes[i]] = i
class_to_index

for x in classes:
    videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
videos

[['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '004',
  '011',
  '002',
  '001'],
 ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006', '003', '009', '005', '010', '007', '004', '002', '008', '001'],
 ['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '004',
  '011',
  '002',
  '008',
  '001'],
 ['003', '005', '004', '002', '001'],
 ['006', '003', '009', '005', '010', '007', '004', '011', '002', '008', '001'],
 ['006',
  '017',
  '003',
  '016',
  '009',
  '013',
  '005',
  '012',
  '010',
  '015',
  '014',
  '007',
  '019',
  '004',
  '011',
  '018',
  '002',
  '008',
  '001'],
 ['006',
  '003',
  '009',
  '013',
  '005',
  '012',
  '010',
  '014',
  '007',
  '004',
  '011',
  '002',
  '008',
  '001']]

In [3]:
def permute(X,Y):
    train_size = X.shape[0]
    permutation_train = np.random.permutation(train_size)
    X = X[permutation_train]
    Y = Y[permutation_train]
    return X,Y

def load_image(path,image_size):
    image = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, image_size)
    return image

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

def pad(X_train_images_class,max_len):
    length = len(X_train_images_class)
    pad_arr = np.zeros((X_train_images_class.shape[1:4]),dtype=np.uint8)
    X_train_images_class = list(X_train_images_class)
    for i in range(max_len-length):
        X_train_images_class.append(pad_arr)
    return np.array(X_train_images_class,dtype=np.uint8)

def predict(model,X,verbose=True):
    pred = model.predict(X)[0]
    max_pred = [np.argmax(i) for i in pred]
    if verbose:
        print("Max Preds time", max_pred)
    counts = np.bincount(max_pred)
    class_pred = np.argmax(counts)
    return class_pred

def evaluate(model, X_test,Y_test,verbose = True):
    count = 0
    preds = []
    for i in range(len(X_test)):
        class_pred = predict(model,X_test[i],verbose=verbose)
        preds.append(class_pred)
        actual = Y_test[i]
        if verbose:
            print("Pred",classes[class_pred],"Actual",classes[actual])
            print()
        if class_pred == actual:
            count += 1
    
    print("Confusion Matrix")
    print(confusion_matrix(Y_test,preds))
    return float(count)/float(len(Y_test)) * 100.0

In [4]:
def dilated_conv(input_shape):
    X_input = Input(input_shape)
    X = TimeDistributed(BatchNormalization(), name = 'BatchNorm_1')(X_input)
    #X = TimeDistributed(ZeroPadding2D((3, 3)))(X)
    X = TimeDistributed(Conv2D(32, (7, 7), strides = (4, 4), activation='relu', padding="same"), name="Conv_1a")(X)
    X = TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding="same"), name="Conv_1b")(X)
    X = TimeDistributed(MaxPool2D((2, 2)), name = "Pool_1")(X)
    X = TimeDistributed(Dropout(0.2), name='Dropout_a')(X)
    
    X = TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding = "same"), name ="Conv_2a")(X)
    #X = TimeDistributed(Conv2D(32, (3, 3), name ="Conv_2b", activation='relu', padding = "same"))(X)
    X = TimeDistributed(MaxPool2D((2, 2)), name = "Pool_2")(X)
    X = TimeDistributed(Dropout(0.2), name='Dropout_b')(X)
    X = TimeDistributed(Conv2D(32,(3,3),activation='relu'), name='Conv_3a')(X)
    X = TimeDistributed(MaxPool2D((2, 2)), name = "Pool_3")(X)
    
    X = TimeDistributed(Conv2D(8,(1,1),activation='relu'), name='Conv_1x1')(X)
    X = TimeDistributed(Flatten(), name='Flatten')(X)
    X = TimeDistributed(Dropout(0.3), name='Dropout_c')(X)
    Y = TimeDistributed(Dense(9,activation='softmax',name='final'))(X)

    X = Conv1D(64, 4, dilation_rate=2, name = 'Conv1Da', activation='relu')(X)
    X = Conv1D(48, 3, dilation_rate=4, name = 'Conv1Db', activation='relu')(X)
    X = Conv1D(32, 3, dilation_rate=4, name = 'Conv1Dc', activation='relu')(X)
    X = Lambda(lambda x : x[:, -1, :], name = "Extractoutput")(X)
    X = Dense(9, activation='softmax', name = 'Output')(X)
    return Model(X_input, [X,Y])

In [5]:
dcnn = dilated_conv((40, 172, 172, 3))

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead


In [6]:
dcnn.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 40, 172, 172, 0                                            
__________________________________________________________________________________________________
BatchNorm_1 (TimeDistributed)   (None, 40, 172, 172, 12          input_1[0][0]                    
__________________________________________________________________________________________________
Conv_1a (TimeDistributed)       (None, 40, 43, 43, 3 4736        BatchNorm_1[0][0]                
__________________________________________________________________________________________________
Conv_1b (TimeDistributed)       (None, 40, 43, 43, 3 9248        Conv_1a[0][0]                    
__________________________________________________________________________________________________
Pool_1 (Ti

In [7]:
dcnn.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], loss_weights=[1, 0.6],
        metrics=['accuracy'],
        optimizer='adam')

## DATASET

In [8]:
def build_dataset_end_to_end(image_size, max_len = 40, stride = 10):
    
    X_train_images = []
    Y_train_images = []
    
    X_test_images = []
    Y_test_images = []
    
    test_videos = [['004', '011', '007'], ['006', '010'], ['007', '002'], \
                   ['003','001'], ['006', '012', '009'], ['004', '005'], ['008','002'], ['004', '012', '002'], ['001', '013', '006']]
    
    for i in range(len(classes)):
        cls = classes[i]
        test = test_videos[i] 
        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_train_images_class = np.array(X_train_images_class)        
            X_test_frames = []                                
            for k in range(0,len(X_train_images_class),stride):
                lower = k
                upper = min(len(X_train_images_class),k+max_len)
                if upper == len(X_train_images_class):
                    if vid not in test:                
                        X_train_images.append(pad(X_train_images_class[lower:upper],max_len))
                        Y_train_images.append(i)
                    else:
                        print("Test Vid" ,vid)
                        X_test_frames.append(pad(X_train_images_class[lower:upper],max_len))
                        X_test_images.append(np.array(X_test_frames))        
                        Y_test_images.append(i)
                    #print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    if vid not in test:                
                        X_train_images.append(X_train_images_class[lower:upper])
                        Y_train_images.append(i)
                    else:
                        print("Test Vid" ,vid)
                        X_test_frames.append(X_train_images_class[lower:upper])
                    #print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])
    return np.array(X_train_images,dtype=np.uint8),np.array(Y_train_images,dtype=np.uint8), np.array(X_test_images), np.array(Y_test_images)

In [9]:
X_train = None
X_test = None
Y_train = None
Y_test = None
try:
    X_train = np.load('../Numpy/End2End/X_train.npy')
    Y_train = np.load('../Numpy/End2End/Y_train.npy')
    X_test = np.load('../Numpy/End2End/X_test.npy')
    Y_test = np.load('../Numpy/End2End/Y_test.npy')
except FileNotFoundError:
    X_train, Y_train, X_test, Y_test = build_dataset_end_to_end((172, 172))
    np.save('../Numpy/End2End/X_train.npy', X_train)
    np.save('../Numpy/End2End/Y_train.npy', Y_train)
    np.save('../Numpy/End2End/X_test.npy', X_test)
    np.save('../Numpy/End2End/Y_test.npy', Y_test)

In [10]:
X_train.shape, X_test.shape,Y_train.shape, Y_test.shape

((340, 40, 172, 172, 3), (22,), (340,), (22,))

In [11]:
X_train, Y_train = permute(X_train, Y_train)
Y_train = convert_to_one_hot(Y_train,9)

In [12]:
Y_train2 = np.tile(Y_train, (40, 1, 1))
Y_train2 = Y_train2.transpose(1, 0, 2)

In [13]:
Y_test

array([0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8])

## TRAINING

In [16]:
#filepath="weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
#checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_weights_only = False, save_best_only = False, mode='auto',period=1)
dcnn = load_model('temp_11.h5')
for i in range(1):
    dcnn.fit(X_train, [Y_train,Y_train2] , batch_size=64, epochs=2 , validation_split=0.1)
    dcnn.save('temp_'+ str(i+12) + '.h5')

Train on 306 samples, validate on 34 samples
Epoch 1/2
Epoch 2/2


## TESTING

In [19]:
dcnn = load_model('temp_12.h5')

In [20]:
evaluate(dcnn, X_test, Y_test, verbose = False)

Confusion Matrix
[[3 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0]
 [0 1 1 0 0 0 0 0 0]
 [0 0 1 1 0 0 0 0 0]
 [0 0 0 0 3 0 0 0 0]
 [0 0 0 0 0 2 0 0 0]
 [0 0 0 0 0 1 1 0 0]
 [0 2 0 0 0 0 0 1 0]
 [0 0 0 0 0 1 0 1 1]]


59.09090909090909

In [22]:
del X_train
del Y_train

NameError: name 'X_train' is not defined

In [26]:
def build_test_dataset(image_size, stride = 10, max_len = 40, 
                       video_path = '../UCF_Unseen/',
                       image_path = '../UCF_Images/'):
    
    X_test_images = []
    Y_test_images = []
    
    VIDEOS_DIR = video_path
    IMAGES_DIR = image_path
    classes = ['Kicking', 'Riding-Horse', 'Running', 'SkateBoarding', 'Swing-Bench', 'Lifting', 'Swing-Side', 'Walking', 'Golf-Swing']
    videos = []
    for x in classes:
        videos.append(list(os.listdir(VIDEOS_DIR+x+'/')))
    
    for i in range(len(classes)):
        cls = classes[i]
        
        for j in range(len(videos[i])):
            vid = videos[i][j]
            video_r = VIDEOS_DIR+cls+'/'+ vid +'/'
            image_r = IMAGES_DIR+cls+'/'+ vid +'/'
            filelist = sorted(list(os.listdir(image_r)))
            X_train_images_class = []
            for file in filelist:
                if file.endswith(".png"):
                    image = load_image(image_r+file,image_size)
                    X_train_images_class.append(image)
            X_train_images_class = np.array(X_train_images_class)        
            X_test_frames = []                                
            for k in range(0,len(X_train_images_class),stride):
                lower = k
                upper = min(len(X_train_images_class),k+max_len)
                if upper == len(X_train_images_class):             
                    X_test_frames.append(pad(X_train_images_class[lower:upper],max_len))
                    X_test_images.append(np.array(X_test_frames))        
                    Y_test_images.append(i)
                    print("Padded frames" , lower , "to" , upper)
                    break
                else:
                    X_test_frames.append(X_train_images_class[lower:upper])
                    print("Added frames" , lower , "to" , upper)
                    
            print("Processed",videos[i][j],"of","class",classes[i])
    return np.array(X_test_images), np.array(Y_test_images)

In [21]:
X_test_full = None
Y_test_full = None
try:
    X_test_full = np.load('../Numpy/End2End/X_test_full_training.npy')
    Y_test_full = np.load('../Numpy/End2End/Y_test_full_training.npy')
except FileNotFoundError:
    X_test_full, Y_test_full = build_test_dataset((172,172),
                                                  video_path='../Videos/',
                                                  image_path='../Images/')
    np.save('../Numpy/End2End/X_test_full_training.npy', X_test_full)
    np.save('../Numpy/End2End/Y_test_full_training.npy', Y_test_full)

In [22]:
evaluate(dcnn, X_test_full, Y_test_full, verbose = False)

Confusion Matrix
[[16  0  0  0  0  0  0  0  0]
 [ 0  8  2  0  0  0  0  0  0]
 [ 0  1  8  0  0  0  0  0  0]
 [ 0  0  1  9  0  0  0  0  0]
 [ 0  0  0  0 14  3  0  0  0]
 [ 0  0  0  0  0  5  0  0  0]
 [ 0  0  0  0  0  3  8  0  0]
 [ 0  2  0  0  0  0  0 17  0]
 [ 0  0  0  0  0  1  0  1 12]]


87.38738738738738

In [23]:
X_test_unseen = None
Y_test_unseen = None
try:
    X_test_unseen = np.load('../Numpy/End2End/X_test_unseen.npy')
    Y_test_unseen = np.load('../Numpy/End2End/Y_test_unseen.npy')
except FileNotFoundError:
    X_test_unseen, Y_test_unseen = build_test_dataset((172,172))
    np.save('../Numpy/End2End/X_test_unseen.npy', X_test_unseen)
    np.save('../Numpy/End2End/Y_test_unseen.npy', Y_test_unseen)

In [25]:
evaluate(dcnn, X_test_unseen, Y_test_unseen, verbose = False)

Confusion Matrix
[[3 0 0 0 0 1 0 0 0]
 [0 1 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0]
 [0 0 1 1 0 0 0 0 0]
 [0 0 0 0 1 2 0 0 0]
 [0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 1 1 0 0]
 [0 0 0 0 0 0 0 3 0]
 [0 0 0 0 0 0 0 0 4]]


68.18181818181817