#This is an example of performing Video Activity Recognition using LSTM
Modified from "Hands-on Computer Vision with TensorFlow 2" by B. Planche and E. Andres

### Step 1: import modules

In [1]:
import tensorflow as tf
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tqdm
from sklearn.preprocessing import LabelBinarizer


  from ._conv import register_converters as _register_converters


### Step 2: setup variables

In [2]:
BASE_PATH = '/Users/subhi/Downloads/demo/Data1'
VIDEOS_PATH = os.path.join(BASE_PATH, '**','*.mp4')

#this specifies the sequence length will process by LSTM
SEQUENCE_LENGTH = 40
BATCH_SIZE = 16
print(VIDEOS_PATH)

/Users/subhi/Downloads/demo/Data1/**/*.mp4


### STEP 3:  sample the video --do not process every frame
PART 1: define function frame_generator(), that creates Sequence_length samples by taking every Kth sample were K= num_frames_in_video / SEQUENCE LENGTH.

PART 2: you load the DataSet and specify the output will be frames of size 224x224 x3(rgb) AND you create batches of size 16 together at a time.

In [3]:
#PART1
def frame_generator():
    video_paths = tf.io.gfile.glob(VIDEOS_PATH)
    np.random.shuffle(video_paths)
    for video_path in video_paths:
        frames = []
        cap = cv2.VideoCapture(video_path)
        num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        sample_every_frame = max(1, num_frames // SEQUENCE_LENGTH)
        current_frame = 0
        
        label = os.path.basename(os.path.dirname(video_path))
        
        max_images = SEQUENCE_LENGTH
        while True:
            success, frame = cap.read()
            if not success:
                break
                
            if current_frame % sample_every_frame == 0:
                frame = frame[:, :, ::-1]
                img = tf.image.resize(frame, (224, 224))
                img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
                max_images -= 1
                yield img, video_path
                
            if max_images == 0:
                break
            current_frame += 1
            

In [4]:
#PART2
dataset = tf.data.Dataset.from_generator(frame_generator,
                                         output_types=(tf.float32, tf.string),
                                         output_shapes=((224, 224, 3), ()))

dataset = dataset.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

In [5]:
print(dataset)

<DatasetV1Adapter shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


### STEP 5: 
For Feature Extraction we are going to use a existing CNN model called MobileNet which is built into TensorFlow

In [6]:
mobilenet_v2 = tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape=(224,224,3), include_top=False, weights='imagenet')
x = mobilenet_v2.output

# We add Average Pooling to transform the feature map from
# 8 * 8 * 1280 to 1 x 1280, as we don't need spatial information
pooling_output = tf.keras.layers.GlobalAveragePooling2D()(x)
feature_extraction_model = tf.keras.Model(mobilenet_v2.input,pooling_output)


### STEP 6: 
Extract Features using our MobileNet Model

In [None]:
#Don't run this for if done with feature extraction 
current_path = None
all_features = []

#cycle through the dataset and visit each image, note the tdqm is a progress bar
#that updates each time a new iteration is called 
#call feature_extraction_model above (Inception v3) for the image to extract the features
for img, batch_paths in tqdm.tqdm(dataset):
    batch_features = feature_extraction_model(img)
    #reshape the tensor 
    batch_features = tf.reshape(batch_features, 
                              (batch_features.shape[0], -1))
    
    for features, path in zip(batch_features.numpy(), batch_paths.numpy()):
        if path != current_path and current_path is not None:
            output_path = current_path.decode().replace('.mp4', '.npy')
            np.save(output_path, all_features)
            all_features = []
            
        current_path = path
        all_features.append(features)

In [7]:
class MyLabelBinarizer(LabelBinarizer):
    def transform(self, y):
        Y = super().transform(y)
        if self.y_type_ == 'binary':
            return np.hstack((Y, 1-Y))
        else:
            return Y
    def inverse_transform(self, Y, threshold=None):
        if self.y_type_ == 'binary':
            return super().inverse_transform(Y[:, 0], threshold)
        else:
            return super().inverse_transform(Y, threshold)

In [9]:
LABELS = ['Stairs','Doors']
encoder = MyLabelBinarizer()
encoder.fit(LABELS)
#print(encoder.classes_)
#print(encoder.transform(['Doors', 'Stairs']))

#t= encoder.transform(['Doors', 'Stairs', 'Stairs'])
#print(t)
#print(encoder.inverse_transform(t))
#print("length of labrels " + str(len(LABELS)))

MyLabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)

In [None]:
# LABELS = ['Doors','Stairs','Other']
# encoder = LabelBinarizer()
# encoder.fit(LABELS)
# print(encoder.classes_)
# #print(encoder.transform(['Doors','stairs']))
# print("length of labels is " + str(len(LABELS)))
# print(str(encoder))

### STEP 8: 
    Create the LSTM model:    1) Masking layer  2) LSTM layer with 512 cells, dropout 0.5, recurrent_dropout of 0.5  
 3) a fully connected relu activation layer with 256 outputs,  4) a droupout layer 0.5  5) a final decision fully connected layer of putput length of labels  (which is the number of classes) with softmax activation.

In [10]:
#setup a keras Sequential model with 1) Masking layer  2) LSTM layer with 512 cells, dropout 0.5, recurrent_dropout of 0.5  
# 3) a fully connected relu activation layer with 256 outputs,  4) a droupout layer 5) a final decision fully connected layer of length of labels
# (which is the number of classes) with softmax activation.
model = tf.keras.Sequential([
    tf.keras.layers.Masking(mask_value=0.),
    tf.keras.layers.LSTM(512, dropout=0.5, recurrent_dropout=0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(2, activation='softmax')
    #tf.keras.layers.Dense(len(LABELS), activation='softmax')
])

### STEP 8: 
Setup for the model the Loss function, the Optimizer function, and any metrics want to compute in training

In [11]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy', 'top_k_categorical_accuracy'])

### STEP 9: 
Setup  the training and test list which are lists of the training filenames.   Note you will need to change the location of these files to point to your location.  Define a function make_generator that returns a generator which will randomly shuffle a file list (either training or testing that will be passed later) and then changes the file extension of the avi files listed in the list to .npy which is our features for that avi video which were calcluated in step 6

In [12]:
train_file = '/Users/subhi/Downloads/demo/Data1/trainlist.txt'
test_file = '/Users/subhi/Downloads/demo/Data1/testlist.txt'
#test_file = 'C:/Grewe/Classes/CS663/Mat/LSTM/data/testlist01.txt'
#train_file = 'C:/Grewe/Classes/CS663/Mat/LSTM/data/trainlist01.txt'

with open(test_file) as f:
    test_list = [row.strip() for row in list(f)]

with open(train_file) as f:
     train_list = [row.strip() for row in list(f)]
     #train_list=[row.split(' ')[0] for row in train_list]
     train_list = [row.split(' ')[0] for row in train_list]
#print(train_list)
def make_generator(file_list):
    def generator():
        np.random.shuffle(file_list)
        for path in file_list:
            full_path = os.path.join(BASE_PATH, path).replace('.mp4', '.npy')
            
            label = os.path.basename(os.path.dirname(path))
            features = np.load(full_path)
            
            
            padded_sequence = np.zeros((SEQUENCE_LENGTH, 1280))
            padded_sequence[0:len(features)] = np.array(features)
            
            transformed_label = encoder.transform([label])
            
            yield padded_sequence, transformed_label[0]
    return generator

In [13]:
print(train_list)

['Doors/1.mp4', 'Doors/10.mp4', 'Doors/100.mp4', 'Doors/101.mp4', 'Doors/102.mp4', 'Doors/103.mp4', 'Doors/104.mp4', 'Doors/105.mp4', 'Doors/106.mp4', 'Doors/109.mp4', 'Doors/11.mp4', 'Doors/110.mp4', 'Doors/111.mp4', 'Doors/112.mp4', 'Doors/113.mp4', 'Doors/114.mp4', 'Doors/116.mp4', 'Doors/117.mp4', 'Doors/118.mp4', 'Doors/119.mp4', 'Doors/12.mp4', 'Doors/120.mp4', 'Doors/121.mp4', 'Doors/122.mp4', 'Doors/124.mp4', 'Doors/125.mp4', 'Doors/126.mp4', 'Doors/128.mp4', 'Doors/129.mp4', 'Doors/130.mp4', 'Doors/131.mp4', 'Doors/132.mp4', 'Doors/133.mp4', 'Doors/135.mp4', 'Doors/136.mp4', 'Doors/137.mp4', 'Doors/14.mp4', 'Doors/140.mp4', 'Doors/141.mp4', 'Doors/142.mp4', 'Doors/143.mp4', 'Doors/144.mp4', 'Doors/145.mp4', 'Doors/146.mp4', 'Doors/147.mp4', 'Doors/148.mp4', 'Doors/15.mp4', 'Doors/153.mp4', 'Doors/154.mp4', 'Doors/155.mp4', 'Doors/156.mp4', 'Doors/157.mp4', 'Doors/159.mp4', 'Doors/16.mp4', 'Doors/160.mp4', 'Doors/161.mp4', 'Doors/162.mp4', 'Doors/163.mp4', 'Doors/165.mp4', 'Doo

In [14]:
print(test_list)

['Doors/107.mp4', 'Doors/108.mp4', 'Doors/115.mp4', 'Doors/123.mp4', 'Doors/127.mp4', 'Doors/13.mp4', 'Doors/134.mp4', 'Doors/138.mp4', 'Doors/139.mp4', 'Doors/149.mp4', 'Doors/150.mp4', 'Doors/151.mp4', 'Doors/152.mp4', 'Doors/158.mp4', 'Doors/164.mp4', 'Doors/171.mp4', 'Doors/175.mp4', 'Doors/18.mp4', 'Doors/180.mp4', 'Doors/182.mp4', 'Doors/184.mp4', 'Doors/189.mp4', 'Doors/191.mp4', 'Doors/193.mp4', 'Doors/197.mp4', 'Doors/198.mp4', 'Doors/20.mp4', 'Doors/203.mp4', 'Doors/210.mp4', 'Doors/212.mp4', 'Doors/215.mp4', 'Doors/221.mp4', 'Doors/224.mp4', 'Doors/231.mp4', 'Doors/245.mp4', 'Doors/254.mp4', 'Doors/256.mp4', 'Doors/261.mp4', 'Doors/264.mp4', 'Doors/267.mp4', 'Doors/268.mp4', 'Doors/271.mp4', 'Doors/272.mp4', 'Doors/274.mp4', 'Doors/278.mp4', 'Doors/281.mp4', 'Doors/284.mp4', 'Doors/285.mp4', 'Doors/286.mp4', 'Doors/29.mp4', 'Doors/292.mp4', 'Doors/302.mp4', 'Doors/304.mp4', 'Doors/305.mp4', 'Doors/313.mp4', 'Doors/314.mp4', 'Doors/326.mp4', 'Doors/330.mp4', 'Doors/333.mp4', 

### STEP 10: 
Setup the train_dataset and valid_dataset (validation/testing).   Here we setting up training batch sets of 16.  

In [15]:
#for tesnorflow 2.*
train_dataset = tf.data.Dataset.from_generator(make_generator(train_list),
                output_types=(tf.float32, tf.int16),
                output_shapes=((SEQUENCE_LENGTH, 1280), (len(LABELS))))
                 

train_dataset = train_dataset.batch(BATCH_SIZE,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)


valid_dataset = tf.data.Dataset.from_generator(make_generator(test_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=((SEQUENCE_LENGTH, 1280), (len(LABELS))))
valid_dataset = valid_dataset.batch(BATCH_SIZE,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

In [16]:
print(train_dataset)

<DatasetV1Adapter shapes: ((16, 40, 1280), (16, 2)), types: (tf.float32, tf.int16)>


In [17]:
print(valid_dataset)

<DatasetV1Adapter shapes: ((16, 40, 1280), (16, 2)), types: (tf.float32, tf.int16)>


In [18]:
BASE_DATA_PATH = '/Users/subhi/Downloads/demo/Data1'
mylog_dir = os.path.join( BASE_DATA_PATH, "train_log")
print("Mylog directory = " + mylog_dir)

Mylog directory = /Users/subhi/Downloads/demo/Data1/train_log


In [19]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(os.path.join('tmp'), update_freq=1000)
model.fit(train_dataset, epochs=50, callbacks=[tensorboard_callback], validation_data=valid_dataset)
#model.fit(train_dataset, epochs=50,validation_data=valid_dataset)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50


Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1a36d5a9e8>

### STEP 11:  save the tensorflow model to an h5 file

In [None]:
model.file=os.path.join(BASE_PATH,'my_model.h5')
model.save(model.file)

### STEP 13: try to conver the model to tflite --- Support to come 2019 (when?)--Curently LSTM conversion to TFLite NOT supported

### STEP 14: run evaluation on the test data feature extraction

### STEP 15: Run predictions on the test data feature extracted