In [1]:

import tensorflow as tf
from tensorflow.keras.models import load_model
import cv2
import numpy as np

## Real time testing

In [2]:
### load model
model_path = r"model\ep_range(0, 50)_lr0001.h5"
model= load_model(model_path)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet101 (Functional)      (None, 8, 8, 2048)        42695808  
                                                                 
 flatten_1 (Flatten)         (None, 131072)            0         
                                                                 
 dense_3 (Dense)             (None, 256)               33554688  
                                                                 
 dense_4 (Dense)             (None, 256)               65792     
                                                                 
 dense_5 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 76,317,316
Trainable params: 76,211,972
Non-trainable params: 105,344
_________________________________________________________________


In [3]:
img_interval = 5
IMG_WIDTH = 256
IMG_HEIGHT = 256
CHANNEL = 3

In [27]:
def prepare_frame(image):
    
    img = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    img = cv2.resize(img,(IMG_WIDTH,IMG_HEIGHT))
    
    #convert to tf and normalize
    tf_img = tf.cast(img, tf.float32)
    tf_img = (tf_img/127.5)-1


    return tf_img

## Sampel prediction
def image_stack(image_list):
    width, height,_ = image_list[0].shape
    stacked_image = np.zeros((IMG_WIDTH ,IMG_HEIGHT,len(image_list)*CHANNEL), np.uint8)
    
    start_index = 0
    for i,cur_img in enumerate(image_list):
        end_index = start_index + CHANNEL

        stacked_image[:,:,start_index:end_index]=cur_img
        start_index = end_index
    return stacked_image

In [50]:
import tensorflow as tf
import numpy as np
import cv2 

video_path = r"test_videos\all.mp4"
video_path = r"test_videos\v_ApplyEyeMakeup_g01_c03.avi"
#video_path = r"test_videos\v_Archery_g01_c03.avi"
#video_path = r"test_videos\v_Basketball_g01_c03.avi"
#video_path = r"test_videos\v_BaseballPitch_g03_c02.avi"

In [54]:
classMap = ["Archery","ApplyEyeMakeup","BaseballPitch","Basketball"]

In [55]:
cap = cv2.VideoCapture(video_path)
count = 0
buffer_frame= []
predicted_action = None

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break
    


    count = count+1

    tf_image = prepare_frame(frame)
    buffer_frame.append(tf_image)
 
    if len(buffer_frame)%img_interval ==0:
        
        stack_image = image_stack(buffer_frame)
        buffer_frame = []
        stack_image = np.array([stack_image]) 
        predicted_result = model.predict(stack_image,batch_size=1)

        index = (np.argmax(predicted_result))

        predicted_action = (classMap[index])
    else:
        buffer_frame.append(tf_image)

    overlay = frame.copy()

    x, y, w, h = 0, 200, 256, 56

    cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), cv2.FILLED)
    alpha = 0.4 
    frame = image_new = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0)
    cv2.putText(frame,predicted_action, (0,220), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
    cv2.imshow('Real time video based action recognition ', frame)

    if cv2.waitKey(1) == ord('q'):
        break




Can't receive frame (stream end?). Exiting ...


In [52]:
print("Total Frame :",count)
cap.release()
cv2.destroyAllWindows()

Total Frame : 826
