In [None]:
# just in case
!pip install tqdm



In [1]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive/')
# drive.flush_and_unmount()

Mounted at /content/drive/


In [2]:
import keras
from keras.models import load_model
import numpy as np
from sklearn.metrics import accuracy_score

In [3]:
def ExtractFrames(file_path, pos=[0.1,0.3,0.5,0.7,0.9]):
    # Extracts frames from file_path at the positions (relative between 0 and 1) in pos
    
    import os
    
    if not len(pos):
        print("[ExtractFrames]: Invalid positions")
        return None
    
    if not os.path.isfile(file_path) :
        print("[ExtractFrames]: Invalid file path")
        return None
    
    import cv2
    
    # container for frames
    arr = np.empty((len(pos),224,224,3))
    
    cap = cv2.VideoCapture(file_path)
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    
    for k,i in enumerate(pos):
        # get frame number
        position = int(i * total_frames)
        
        # set frame pointer at i and extract frame
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        
        # preprocessing
        frame = cv2.resize(frame, (224,224))
        frame = frame * 1/255.
        frame = np.float32(frame)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # insert in container
        arr[k] = frame
        
    # cleanup
    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
    cap.release()
    
    return arr

In [5]:
# Load our already trained model
model = load_model("/content/drive/MyDrive/CSCE636/main_model.h5")

In [7]:
# Get frames from our desired test video
TestVideo = ExtractFrames(r"/content/drive/MyDrive/CSCE636/DemoApplyTestVideo.mp4")

In [9]:
# need to reshape input to include sample size.
# in this case sample size is 1
TestVideo = np.expand_dims(TestVideo, axis=0)
TestVideo.shape

(1, 5, 224, 224, 3)

In [10]:
# perform prediction. Note this command works for only a single video
pred = model(TestVideo)

In [15]:
# convert tensor to array
pred = pred.numpy()

In [17]:
# The first index represents brusing teeth, the second is not brushing teeth
pred

array([[0.93660915, 0.06339087]], dtype=float32)

In [19]:
index_max = np.argmax(pred, axis=1)
# "brushing_teeth" - 1, "not brushing_teeth" - 0
# if argmax is index 0, then it predicted brushing teeth, hence
# assign a 1 or else assign a 0
lookup = {1:0, 0:1}
predicted_labels = np.array([lookup[i] for i in index_max])

In [21]:
# a value of one means we detected the action!
predicted_labels[0]

1