# 06. Actual Use : Violence detection for laptop webcam streaming
* By using MobileNet base model & trained LSTM model, we can detect violent behavior of streaming video(laptop webcam)
* **`Before run this file, Please check this`**:
    * 01_video-to-numpy-save.ipynb
    * 02_create-numpy-datasets_training-test.ipynb
    * 03_MobileNet.ipynb
    * 04_MobileNet_LSTM_model.ipynb
* **`Are those files exist on there?`** Those files were made by 01~04_MobileNet.ipynb files.
    * Trained LSTM model : 210512_MobileNet_model_epoch100.h5

# Imports

In [None]:
import cv2 # openCV 4.5.1
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import time 

from skimage.io import imread
from skimage.transform import resize 
from PIL import Image, ImageFont, ImageDraw # add caption by using custom font

from collections import deque

# 06-A. Load Model Files
* **`base_model`** : MobileNet
* **`model`** : trained LSTM model file. `210512_MobileNet_model_epoch100.h5`

## 1. base_model : MobileNet

In [None]:
base_model=keras.applications.mobilenet.MobileNet(input_shape=(160, 160, 3),
                                                  include_top=False,
                                                  weights='imagenet', classes=2)

## 2. model : trained LSTM model(.h5)

In [None]:
model=keras.models.load_model('MobileNet_model_retrained2.h5')

# 06-B. Add caption to streaming screen & Save output video file

## 1. Setting : Input path & Output path
* **`input_path`** : input laptop webcam
* **`output_path`** : You'll save output video file in output_path.

In [None]:
input_path=0

In [None]:
output_path='output04.mp4'

## 2. Distinguish Violence True or False & Add caption on Video file

In [10]:
vid=cv2.VideoCapture(input_path)
fps=vid.get(cv2.CAP_PROP_FPS) # recognize frames per secone(fps) of input_path video file.
print(f'fps : {fps}') # print fps.

writer=None
(W, H)=(None, None)
i=0 # number of seconds in video = The number of times that how many operated while loop .
Q=deque(maxlen=128) 

video_frm_ar=np.zeros((1, int(fps), 160, 160, 3), dtype=np.float64) #frames
frame_counter=0 # frame number in 1 second. 1~30
frame_list=[] 
preds=None
maxprob=None

#. While loop : Until the end of input video, it read frame, extract features, predict violence True or False.
# ----- Reshape & Save frame img as (30, 160, 160, 3) Numpy array  -----
while True: 
    frame_counter+=1
    grabbed, frm=vid.read()  # read each frame img. grabbed=True, frm=frm img. ex: (240, 320, 3)
    
    if not grabbed:
        print('There is no frame. Streaming ends.')
        break
            
    if fps!=30: 
        print('Please set fps=30')
        break
        
    if W is None or H is None: # W: width, H: height of frame img
        (H, W)=frm.shape[:2]
            
    output=frm.copy() # It is necessary for streaming captioned output video, and to save that.
    
    frame=resize(frm, (160, 160, 3)) #> Resize frame img array to (160, 160, 3)
    frame_list.append(frame) # Append each frame img Numpy array : element is (160, 160, 3) Numpy array.
    
    if frame_counter>=fps: # fps=30 et al
        #. ----- we'll predict violence True or False every 30 frame -----
        #. ----- Insert (1, 30, 160, 160, 3) Numpy array to LSTM model ---
        #. ----- We'll renew predict result caption on output video every 1 second. -----
        # 30-element-appended list -> Transform to Numpy array -> Predict -> Initialize list (repeat)
        frame_ar=np.array(frame_list, dtype=np.float16) #> (30, 160, 160, 3)
        frame_list=[] # Initialize frame list when frame_counter is same or exceed 30, after transforming to Numpy array.
            
        if(np.max(frame_ar)>1): # Scaling RGB value in Numpy array
            frame_ar=frame_ar/255.0
            
        pred_imgarr=base_model.predict(frame_ar) #> Extract features from each frame img by using MobileNet. (30, 5, 5, 1024)
        pred_imgarr_dim=pred_imgarr.reshape(1, pred_imgarr.shape[0], 5*5*1024)#> (1, 30, 25600)
        
        preds=model.predict(pred_imgarr_dim) #> (True, 0.99) : (Violence True or False, Probability of Violence)
        print(f'preds:{preds}')
        Q.append(preds) #> Deque Q
    
        # Predict Result : Average of Violence probability in last 5 second
        if i<5:
            results=np.array(Q)[:i].mean(axis=0)
        else:
            results=np.array(Q)[(i-5):i].mean(axis=0)
        
        print(f'Results = {results}') #> ex : (0.6, 0.650)
            
        maxprob=np.max(results) #> Select Maximum Probability
        print(f'Maximum Probability : {maxprob}')
        print('')
            
        rest=1-maxprob # Probability of Non-Violence
        diff=maxprob-rest # Difference between Probability of Violence and Non-Violence's
        th=100
            
        if diff>0.50:
            th=diff # ?? What is supporting basis?
        
        frame_counter=0 #> Initialize frame_counter to 0
        i+=1 #> 1 second elapsed
        
        # When frame_counter>=30, Initialize frame_counter to 0, and repeat above while loop.
                
    # ----- Setting caption option of output video -----
    # Renewed caption is added every 30 frames(if fps=30, it means 1 second.)
    font1=ImageFont.truetype('D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\font\\ARLRDBD.TTF', 24) # font option
    font2=ImageFont.truetype('D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\font\\ARLRDBD.TTF', 48) # font option
    
    if preds is not None and maxprob is not None:
        if (preds[0][1])<th : #> if violence probability < th, Violence=False (Normal, Green Caption)
            text1_1='Normal'
            text1_2='{:.2f}%'.format(100-(maxprob*100))
            img_pil=Image.fromarray(output)
            draw=ImageDraw.Draw(img_pil)
            draw.text((int(0.025*W), int(0.025*H)), text1_1, font=font1, fill=(0,255,0,0))
            draw.text((int(0.025*W), int(0.095*H)), text1_2, font=font2, fill=(0,255,0,0))
            output=np.array(img_pil)
                
        else : #> if violence probability > th, Violence=True (Violence Alert!, Red Caption)
            text2_1='Violence Alert!'
            text2_2='{:.2f}%'.format(maxprob*100)
            img_pil=Image.fromarray(output)
            draw=ImageDraw.Draw(img_pil)
            draw.text((int(0.025*W), int(0.025*H)), text2_1, font=font1, fill=(0,0,255,0))
            draw.text((int(0.025*W), int(0.095*H)), text2_2, font=font2, fill=(0,0,255,0))
            output=np.array(img_pil) 
        
    # Save captioned video file by using 'writer'
    if writer is None:
        fourcc=cv2.VideoWriter_fourcc(*'DIVX')
        writer=cv2.VideoWriter(output_path, fourcc, 30, (W, H), True)
            
    cv2.imshow('This is output', output) # View output in new Window.
    writer.write(output) # Save output in output_path
        
    key=cv2.waitKey(1) # time gap of frame and next frame
    if key==27: # If you press ESC key, While loop will be breaked and output file will be saved.
        print('ESC is pressed. Video recording ends.')
        break
    
print('Video recording ends. Release Memory.')  #Output file will be saved.
writer.release()
vid.release()
cv2.destroyAllWindows()

fps : 30.0
preds:[[0.0499832  0.95001674]]
Results = [[nan nan]]
Maximum Probability : nan



  results=np.array(Q)[:i].mean(axis=0)


preds:[[0.00486751 0.9951325 ]]
Results = [[0.0499832  0.95001674]]
Maximum Probability : 0.9500167369842529

preds:[[0.00567545 0.9943245 ]]
Results = [[0.02742536 0.9725746 ]]
Maximum Probability : 0.9725745916366577

preds:[[0.01049869 0.9895013 ]]
Results = [[0.02017539 0.97982454]]
Maximum Probability : 0.9798245429992676

preds:[[0.01098603 0.98901397]]
Results = [[0.01775621 0.9822437 ]]
Maximum Probability : 0.9822437167167664

preds:[[9.995252e-01 4.748577e-04]]
Results = [[0.01640218 0.98359776]]
Maximum Probability : 0.9835977554321289

preds:[[0.99883884 0.0011612 ]]
Results = [[0.20631059 0.7936894 ]]
Maximum Probability : 0.7936894297599792

preds:[[0.0076413  0.99235874]]
Results = [[0.40510482 0.5948952 ]]
Maximum Probability : 0.5948951840400696

preds:[[0.00337266 0.9966273 ]]
Results = [[0.40549803 0.59450203]]
Maximum Probability : 0.5945020318031311

preds:[[0.0065195  0.99348056]]
Results = [[0.40407282 0.59592724]]
Maximum Probability : 0.5959272384643555

preds: