Train model

In [None]:
# Provides similar functionality to ImageDataGenerators for videos
!pip install keras-video-generators



In [None]:
# Please email me at nini16@tamu.edu if you do not ave access to the google drive.
# Permissions should have been granted but if not please email me!

from google.colab import drive
drive.mount('/content/drive/')
# drive.flush_and_unmount()

Mounted at /content/drive/


In [None]:
# !rm -rf "/content/new_train_2_classes"

In [None]:
# training data.
# Please ensure the file is present before running.
# !unzip -q "/content/drive/MyDrive/CSCE636/v3/train_v3_1.zip"

In [1]:
import keras
from keras.regularizers import l2
from keras.preprocessing.image import load_img
import matplotlib.pyplot as plt
import numpy as np
from keras.layers import Conv2D, BatchNormalization, \
    MaxPool2D, GlobalMaxPool2D, Dense, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras_video import VideoFrameGenerator

from keras.layers import TimeDistributed, GRU, Dense, Dropout, LSTM

from keras.models import load_model, Model

from keras.applications import ResNet50V2, DenseNet121

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import os
import numpy as np

import math

In [2]:
# All frames should be resized
# Please select a batch-size that divides the number of samples!
# THE DATASET WILL PROBABLY BE INCREASED FOR THE NEXT SUBMISSION SO
# BE SURE TO ADJUST THE BATCH SIZE!!

img_shape = (224, 224)
BS = 19

In [3]:
# Apply image augmentation to each frame
# Please confirm that this directory is present before running

vid_gen = VideoFrameGenerator(
    glob_pattern=r"C:\Users\cotua\Desktop\python scripts\train_v3_1\{classname}\*",
    nb_frames=20,
    split_val=.101, 
    shuffle=True,
    batch_size=BS,
    target_shape=img_shape,
    nb_channel=3,
    transformation=ImageDataGenerator(rescale=1./255,
                                      samplewise_center=True,
                                      # rotation_range=30,
                                      # width_shift_range=0.1,
                                      # height_shift_range=0.1,
                                      # shear_range=0.1,
                                      # zoom_range=0.1,
                                      # horizontal_flip=True,
                                      fill_mode="nearest"),
    use_frame_cache=False)

class brushing_hair, validation count: 54, train count: 484
class miscellaneous, validation count: 41, train count: 371
Total data: 2 classes for 855 files for train


In [4]:
validation_gen = vid_gen.get_validation_generator()

Total data: 2 classes for 95 files for validation


In [None]:
# Can use this for visualization
# from keras_video import utils
# utils.show_sample(vid_gen, random=True)

In [4]:
# model structure for Feature Extractor
def build_convnet_3(shape=(224, 224, 3)):
    resnet = DenseNet121(include_top=False, weights='imagenet', input_shape=shape)

    train = False
    for layer in resnet.layers:
        layer.trainable = train
        if layer.name == "conv5_block3_2_relu":
            train = True
    
    globMaxpool = GlobalMaxPool2D()(resnet.output)
    model = Model(inputs=resnet.input, outputs=globMaxpool)
    return model

In [4]:
test = build_convnet_3()
test.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
zero_padding2d (ZeroPadding2D)  (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d[0][0]             
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
______________________________________________________________________________________________

In [5]:
def action_model(shape=(20,) + img_shape + (3,)):
    # Create our feature extractor convnet with img_shape input shape
    convnet = build_convnet_3()
    
    # then create our final model
    model = keras.Sequential()
    # add the convnet with img_shape shape
    model.add(TimeDistributed(convnet, input_shape=shape))
    # add GRU
    model.add(LSTM(64))
    # and finally, we make a decision network
    model.add(Dense(1024, activation='relu', kernel_regularizer=keras.regularizers.l2(l2=0.01)))
    model.add(Dropout(.5))
    model.add(Dense(2, activation='softmax'))

    model.summary()
    return model

In [56]:
# instantiate and compile model
model = action_model()
optimizer = keras.optimizers.Adam(0.0005)
model.compile(
    optimizer,
    'categorical_crossentropy',
    metrics=['acc']
)

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_16 (TimeDis (None, 20, 1024)          7037504   
_________________________________________________________________
lstm_16 (LSTM)               (None, 64)                278784    
_________________________________________________________________
dense_32 (Dense)             (None, 1024)              66560     
_________________________________________________________________
dropout_16 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_33 (Dense)             (None, 2)                 2050      
Total params: 7,384,898
Trainable params: 347,394
Non-trainable params: 7,037,504
_________________________________________________________________


In [37]:
vid_gen.files_count

855

In [57]:
# Adjust epochs and other parameters as needed
# Callbacks have been commented out to avoid overwriting existin data.
# Whoever is running this can uncomment them as needed

callbacks = [
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=3, min_lr=0.0001),
    keras.callbacks.EarlyStopping(
        monitor='val_acc',
        patience=5,
        ),
    keras.callbacks.ModelCheckpoint(
        r'C:\Users\cotua\Desktop\python scripts\Weights\weights.{epoch:02d}-{val_loss:.2f}.hdf5',
        monitor='val_acc',
        save_best_only=True,
        verbose=1),
]

history = model.fit_generator(
    vid_gen,
    steps_per_epoch=math.ceil(vid_gen.files_count/BS),
    validation_data=validation_gen,
    verbose=1,
    epochs=30, # last used 80,
    shuffle=True,
    callbacks=callbacks
)

Epoch 1/30

Epoch 00001: val_acc improved from -inf to 0.85263, saving model to C:\Users\cotua\Desktop\python scripts\Weights\weights.01-1.11.hdf5
Epoch 2/30

Epoch 00002: val_acc improved from 0.85263 to 0.87368, saving model to C:\Users\cotua\Desktop\python scripts\Weights\weights.02-0.78.hdf5
Epoch 3/30

Epoch 00003: val_acc improved from 0.87368 to 0.91579, saving model to C:\Users\cotua\Desktop\python scripts\Weights\weights.03-0.57.hdf5
Epoch 4/30

Epoch 00004: val_acc did not improve from 0.91579
Epoch 5/30

Epoch 00005: val_acc did not improve from 0.91579
Epoch 6/30

Epoch 00006: val_acc improved from 0.91579 to 0.92632, saving model to C:\Users\cotua\Desktop\python scripts\Weights\weights.06-0.40.hdf5
Epoch 7/30

Epoch 00007: val_acc improved from 0.92632 to 0.93684, saving model to C:\Users\cotua\Desktop\python scripts\Weights\weights.07-0.38.hdf5
Epoch 8/30

Epoch 00008: val_acc did not improve from 0.93684
Epoch 9/30

Epoch 00009: val_acc did not improve from 0.93684
Epoch

In [58]:
model.evaluate_generator(validation_gen, steps=validation_gen.files_count//BS)

[0.32464393973350525, 0.9157894849777222]

In [59]:
# uncomment only if you need to
model.save(r'C:\Users\cotua\Desktop\python scripts\main_model_V3_lr_0.0005.h5')

In [60]:
# uncomment only if you need to
np.save(r'C:\Users\cotua\Desktop\python scripts\train_history_main_model_V3_lr_0.0005.npy',history.history)



Testing model

In [61]:
# just in case
!pip install tqdm

Collecting tqdm
  Downloading tqdm-4.59.0-py2.py3-none-any.whl (74 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.59.0


In [6]:
pos = [0,0.05,0.1,0.15,.02,.25,.30,.35,.40,.45,.50,.55,.60,.65,.70,.75,.80,.85,.9,.95]

In [7]:
def ExtractFrames(file_path, pos=pos):
    # Extracts frames from file_path at the positions (relative between 0 and 1) in pos
    
    import os
    
    if not len(pos):
        print("[ExtractFrames]: Invalid positions")
        return None
    
    if not os.path.isfile(file_path) :
        print("[ExtractFrames]: Invalid file path")
        return None
    
    import cv2
    
    # container for frames
    arr = np.empty((len(pos),224,224,3))
    
    cap = cv2.VideoCapture(file_path)
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    
    for k,i in enumerate(pos):
        # get frame number
        position = int(i * total_frames)
        
        # set frame pointer at i and extract frame
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        
        # preprocessing
        frame = cv2.resize(frame, (224,224))
        frame = frame * 1/255.
        frame = np.float32(frame)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # insert in container
        arr[k] = frame
        
    # cleanup
    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
    cap.release()
    
    return arr

In [13]:
def test_flow_from_directory(dir_path, limit=False, max_sample=0):
    import numpy as np
    import os
    import math
    from tqdm import tqdm
    
    # 2 classes: "brushing_teeth", "miscellaneous"
    posDIR = os.path.join(dir_path, "brushing_hair")
    negDIR = os.path.join(dir_path, "miscellaneous")
    
    pos_samples = os.listdir(posDIR)
    neg_samples = os.listdir(negDIR)
    
    # Get all the videos in both classes
    pos_samples = [os.path.join(posDIR, fname) for fname in pos_samples]
    neg_samples = [os.path.join(negDIR, fname) for fname in neg_samples]
    
    # split
    p_ratio = len(pos_samples)/(len(pos_samples) + len(neg_samples))
    
    sample_size = len(pos_samples) + len(neg_samples)
    
    # Array for data, labels and files
    if limit:
        test_data   = np.empty( (max_sample, 20, 224, 224, 3) )
        test_label  = np.empty( (max_sample, 1) )
    else:
        test_data   = np.empty( (sample_size, 20, 224, 224, 3) )
        test_label  = np.empty( (sample_size, 1) )
    
    test_files = []#pos_samples+neg_samples
    
    # Extract frames from all videos using default positions
    index = 0
    print("Now extracting brushing_teeth videos")
    if limit:
        num = int(round(p_ratio*max_sample))
    else:
        num = len(pos_samples)
    
    for vid in tqdm(pos_samples):
        test_data[index] = ExtractFrames(str(vid))
        test_label[index] = 1.
        index += 1
        test_files.append(vid)
        if index >= num:
            break
    
    print("Now extracting miscellaneous videos")
    
    for vid in tqdm(neg_samples):
        test_data[index] = ExtractFrames(vid)
        test_label[index] = 0.
        index += 1
        test_files.append(vid)
        if index >= max_sample:
            break
    
    return test_data, test_label, test_files

In [None]:
!unzip -q "/content/drive/MyDrive/CSCE636/YoutubeTest_v2.zip"

In [14]:
model = load_model(r"C:\Users\cotua\Desktop\python scripts\main_model_V3_lr_0.0005.h5")

In [15]:
# Loads data, label and filenames
test_data, test_label, test_files = test_flow_from_directory(r"C:\Users\cotua\Desktop\python scripts\train_v3_1", limit=True, max_sample=100)
test_label = np.reshape(test_label, test_label.shape[0])

  0%|▏                                                                                 | 1/538 [00:00<01:35,  5.63it/s]

Now extracting brushing_teeth videos


 10%|████████▍                                                                        | 56/538 [00:07<01:03,  7.60it/s]
  0%|▍                                                                                 | 2/412 [00:00<00:24, 17.06it/s]

Now extracting miscellaneous videos


 10%|████████▎                                                                        | 42/412 [00:03<00:28, 12.98it/s]


In [16]:
BS = 19
num_steps = math.ceil(test_data.shape[0]/BS)
num_steps

6

In [18]:
pred = model.predict(test_data,verbose=1,batch_size=BS, steps=num_steps)



In [19]:
index_max = np.argmax(pred, axis=1)
# "brushing_teeth" - 1, "not brushing_teeth" - 0
# if argmax is index 0, then it predicted brushing teeth, hence
# assign a 1 or else assign a 0
lookup = {1:0, 0:1}
predicted_labels = np.array([lookup[i] for i in index_max])

In [20]:
accuracy_score(test_label, predicted_labels)

0.87

In [24]:
conf_mat = confusion_matrix(test_label, predicted_labels)
conf_mat

array([[33, 10],
       [ 3, 54]], dtype=int64)

In [22]:
print("False Negative Rate: {}".format(75/(319+75)))

False Negative Rate: 0.19035532994923857


In [23]:
print("False Positive Rate: {}".format(145/(214+145)))

False Positive Rate: 0.403899721448468


In [None]:
# import csv
# fields = ['file', 'Label']
# expData = []
# for i in range(394):
#     expData.append([test_files[i], predicted_labels[i]])

In [None]:
# with open('/content/filecheck.csv', 'w') as f: 
      
#     # using csv.writer method from CSV package 
#     write = csv.writer(f) 
      
#     write.writerow(fields) 
#     write.writerows(expData)