In [1]:
import cv2 # openCV 4.5.1
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import time 

from skimage.io import imread
from skimage.transform import resize 
from PIL import Image, ImageFont, ImageDraw # add caption by using custom font
from collections import deque

In [2]:
base_model=keras.applications.EfficientNetB0(input_shape=(160, 160, 3),
                                                  include_top=False,
                                                  weights='imagenet')

In [3]:
model=keras.models.load_model(r"EfficientNet_RNN_Model.h5")

In [4]:
### ADD CAPTION TO STREAMING SCREEN & SAVE OUTPUT VIDEO FILE

input_path = 1
output_path = 'cam_output/output.mp4'

In [5]:
vid=cv2.VideoCapture(input_path)
fps=vid.get(cv2.CAP_PROP_FPS) # recognize frames per secone(fps) of input_path video file.
# fps = 30
print(f'fps : {fps}') # print fps.

writer=None
(W, H)=(None, None)
i=0 # number of seconds in video = The number of times that how many operated while loop .
Q=deque(maxlen=128) 

video_frm_ar=np.zeros((1, int(fps), 160, 160, 3), dtype=np.float) #frames
frame_counter=0 # frame number in 1 second. 1~30
frame_list=[] 
preds=None
maxprob=None

#. While loop : Until the end of input video, it read frame, extract features, predict violence True or False.
# ----- Reshape & Save frame img as (30, 160, 160, 3) Numpy array  -----
while True: 
    frame_counter+=1
    grabbed, frm=vid.read()  # read each frame img. grabbed=True, frm=frm img. ex: (240, 320, 3)
    
    if not grabbed:
        print('There is no frame. Streaming ends.')
        break
            
    if fps!=30: 
        print('Please set fps=30')
        break
        
    if W is None or H is None: # W: width, H: height of frame img
        (H, W)=frm.shape[:2]
            
    output=frm.copy() # It is necessary for streaming captioned output video, and to save that.
    
    frame=resize(frm, (160, 160, 3)) #> Resize frame img array to (160, 160, 3)
    frame_list.append(frame) # Append each frame img Numpy array : element is (160, 160, 3) Numpy array.
    
    if frame_counter>=fps: # fps=30 et al
        #. ----- we'll predict violence True or False every 30 frame -----
        #. ----- Insert (1, 30, 160, 160, 3) Numpy array to LSTM model ---
        #. ----- We'll renew predict result caption on output video every 1 second. -----
        # 30-element-appended list -> Transform to Numpy array -> Predict -> Initialize list (repeat)
        frame_ar=np.array(frame_list, dtype=np.float16) #> (30, 160, 160, 3)
        frame_list=[] # Initialize frame list when frame_counter is same or exceed 30, after transforming to Numpy array.
            
        if(np.max(frame_ar)>1): # Scaling RGB value in Numpy array
            frame_ar=frame_ar/255.0
            
        pred_imgarr=base_model.predict(frame_ar) #> Extract features from each frame img by using MobileNet. (30, 5, 5, 1024)
        pred_imgarr_dim=pred_imgarr.reshape(1, pred_imgarr.shape[0], 5*5*1024)#> (1, 30, 25600)
        
        preds=model.predict(pred_imgarr_dim) #> (True, 0.99) : (Violence True or False, Probability of Violence)
        print(f'preds:{preds}')
        Q.append(preds) #> Deque Q
    
        # Predict Result : Average of Violence probability in last 5 second
        if i<5:
            results=np.array(Q)[:i].mean(axis=0)
        else:
            results=np.array(Q)[(i-5):i].mean(axis=0)
        
        print(f'Results = {results}') #> ex : (0.6, 0.650)
            
        maxprob=np.max(results) #> Select Maximum Probability
        print(f'Maximum Probability : {maxprob}')
        print('')
            
        rest=1-maxprob # Probability of Non-Violence
        diff=maxprob-rest # Difference between Probability of Violence and Non-Violence's
        th=100
            
        if diff>0.50:
            th=diff # ?? What is supporting basis?
        
        frame_counter=0 #> Initialize frame_counter to 0
        i+=1 #> 1 second elapsed
        
        # When frame_counter>=30, Initialize frame_counter to 0, and repeat above while loop.
                
    # ----- Setting caption option of output video -----
    # Renewed caption is added every 30 frames(if fps=30, it means 1 second.)
    font1=ImageFont.truetype('fonts/Raleway-ExtraBold.ttf', 24) # font option
    font2=ImageFont.truetype('fonts/Raleway-ExtraBold.ttf', 48) # font option
    
    if preds is not None and maxprob is not None:
        if (preds[0][1])<th : #> if violence probability < th, Violence=False (Normal, Green Caption)
            text1_1='Normal'
            text1_2='{:.2f}%'.format(100-(maxprob*100))
            img_pil=Image.fromarray(output)
            draw=ImageDraw.Draw(img_pil)
            draw.text((int(0.025*W), int(0.025*H)), text1_1, font=font1, fill=(0,255,0,0))
            draw.text((int(0.025*W), int(0.095*H)), text1_2, font=font2, fill=(0,255,0,0))
            output=np.array(img_pil)
                
        else : #> if violence probability > th, Violence=True (Violence Alert!, Red Caption)
            text2_1='Violence Alert!'
            text2_2='{:.2f}%'.format(maxprob*100)
            img_pil=Image.fromarray(output)
            draw=ImageDraw.Draw(img_pil)
            draw.text((int(0.025*W), int(0.025*H)), text2_1, font=font1, fill=(0,0,255,0))
            draw.text((int(0.025*W), int(0.095*H)), text2_2, font=font2, fill=(0,0,255,0))
            output=np.array(img_pil) 
        
    # Save captioned video file by using 'writer'
    if writer is None:
        fourcc=cv2.VideoWriter_fourcc(*'DIVX')
        writer=cv2.VideoWriter(output_path, fourcc, 30, (W, H), True)
            
    cv2.imshow('This is output', output) # View output in new Window.
    writer.write(output) # Save output in output_path
        
    key=cv2.waitKey(round(1000/fps)) # time gap of frame and next frame
    if key==27: # If you press ESC key, While loop will be breaked and output file will be saved.
        print('ESC is pressed. Video recording ends.')
        break
    
print('Video recording ends. Release Memory.')  #Output file will be saved.
writer.release()
vid.release()
cv2.destroyAllWindows()

fps : 30.0


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  video_frm_ar=np.zeros((1, int(fps), 160, 160, 3), dtype=np.float) #frames


preds:[[0.6020629  0.39793712]]
Results = [[nan nan]]
Maximum Probability : nan



  results=np.array(Q)[:i].mean(axis=0)
  ret = um.true_divide(


preds:[[0.8116602  0.18833977]]
Results = [[0.6020629  0.39793712]]
Maximum Probability : 0.6020628809928894

preds:[[0.81166756 0.18833241]]
Results = [[0.7068615  0.29313844]]
Maximum Probability : 0.7068614959716797

preds:[[0.81166875 0.18833125]]
Results = [[0.7417968  0.25820312]]
Maximum Probability : 0.7417967915534973

preds:[[0.81028897 0.18971105]]
Results = [[0.7592648  0.24073514]]
Maximum Probability : 0.7592648267745972

preds:[[0.8110485  0.18895146]]
Results = [[0.7694696  0.23053034]]
Maximum Probability : 0.7694696187973022

preds:[[0.81168044 0.1883195 ]]
Results = [[0.8112668  0.18873318]]
Maximum Probability : 0.8112667798995972

preds:[[0.8119116  0.18808842]]
Results = [[0.81127083 0.18872914]]
Maximum Probability : 0.8112708330154419

preds:[[0.81186104 0.1881389 ]]
Results = [[0.81131965 0.18868032]]
Maximum Probability : 0.8113196492195129

preds:[[0.8121978  0.18780223]]
Results = [[0.8113581  0.18864188]]
Maximum Probability : 0.8113580942153931

preds:[[0.

KeyboardInterrupt: 