## Effective Version - Batch-wise processing (DeQueue method)

In [1]:
# Import modules
import numpy as np
import imutils
import sys
import cv2


# load the contents of the class labels file, then define the sample
# duration (i.e., # of frames for classification) and sample size
# (i.e., the spatial dimensions of the frame)
CLASSES = open('action_recognition_kinetics.txt').read().strip().split("\n")
SAMPLE_DURATION = 16
SAMPLE_SIZE = 112

# load the human activity recognition model
print("[INFO] loading human activity recognition model...")
net = cv2.dnn.readNet('resnet-34_kinetics.onnx')

# grab a pointer to the input video stream
print("[INFO] accessing video stream...")
cap = cv2.VideoCapture('bike-2.mp4')

# loop until we explicitly break from it
while True:
    frames = []
    # Loop for every 'SAMPLE_DURATION' frames
    for i in range(0, SAMPLE_DURATION):
        # read each frame
        grabbed, frame = cap.read()

        # If no frame exists or end of the video.
        if not grabbed:
            print("[Info] No frame read from stream - exiting")
            sys.exit(0)
        # Not end of the video
        frame = imutils.resize(frame, width=400)
        frames.append(frame)

    # now that our frames array is filled we can construct our blob
    blob = cv2.dnn.blobFromImages(frames, 1.0, (SAMPLE_SIZE, SAMPLE_SIZE), (114.7748, 107.7354, 99.4750),
                                  swapRB=True, crop=True)
    blob = np.transpose(blob, (1, 0, 2, 3))
    blob = np.expand_dims(blob, axis=0)

    # pass the blob through the model to obtain our human activity recognition predictions!!
    net.setInput(blob)
    outputs = net.forward()
    label = CLASSES[np.argmax(outputs)]

    # loop over our frames
    for frame in frames:
        # draw the predicted activity on the frame
        cv2.rectangle(frame, (0, 0), (300, 40), (0, 0, 0), -1)
        cv2.putText(frame, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX,
            0.8, (255, 255, 255), 2)

        # display the frame to our screen
        cv2.imshow("Activity Recognition", frame)
        if cv2.waitKey(1)==ord('q'):
                        break
cap.release()
cv2.destroyAllWindows()


[INFO] loading human activity recognition model...
[INFO] accessing video stream...
[Info] No frame read from stream - exiting


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


## Less Effective Version- Frame by Frame processing

In [1]:
# Import necessary packages
from collections import deque
import numpy as np
import argparse
import imutils
import cv2
import matplotlib.pyplot as plt

* Warning images to Overlay
* Helper Function

In [2]:
sm = cv2.imread('./disclaimers-new/capture4.png')
ca = cv2.imread('./disclaimers-new/capture.png')
bi = cv2.imread('./disclaimers-new/capture5.png')
beer = cv2.imread('./disclaimers-new/capture3.png')
def image_disclaimer(frame, label):
    smoking = ['smoking hookah','smoking']
    car = ['driving car']
    bike = ['riding a bike']
    drinking = ['tasting beer']
    if(label in smoking):
        img1 = frame
        wm = sm
        wm =cv2.resize(wm,(img1.shape[1]//3+100,img1.shape[0]//3))

        x_offset=img1.shape[1]-wm.shape[1]
        y_offset=img1.shape[0]-wm.shape[0]
        roi = img1[y_offset:img1.shape[0],x_offset:img1.shape[1]]
        
        img2gray = cv2.cvtColor(wm,cv2.COLOR_BGR2GRAY)
        mask_inv = cv2.bitwise_not(img2gray)
        white_background = np.full(wm.shape, 255, dtype=np.uint8)
        bk = cv2.bitwise_or(white_background, white_background, mask=mask_inv)
        final_roi = cv2.bitwise_or(roi,bk)

        large_img = img1
        small_img = final_roi
        large_img[y_offset:y_offset+small_img.shape[0], x_offset:x_offset+small_img.shape[1]] = small_img
    elif(label in car):
        img1 = frame
        wm = ca
        wm =cv2.resize(wm,(img1.shape[1]//3+100,img1.shape[0]//3))

        x_offset=img1.shape[1]-wm.shape[1]
        y_offset=img1.shape[0]-wm.shape[0]
        roi = img1[y_offset:img1.shape[0],x_offset:img1.shape[1]]
        
        img2gray = cv2.cvtColor(wm,cv2.COLOR_BGR2GRAY)
        mask_inv = cv2.bitwise_not(img2gray)
        white_background = np.full(wm.shape, 255, dtype=np.uint8)
        bk = cv2.bitwise_or(white_background, white_background, mask=mask_inv)
        final_roi = cv2.bitwise_or(roi,bk)

        large_img = img1
        small_img = final_roi
        large_img[y_offset:y_offset+small_img.shape[0], x_offset:x_offset+small_img.shape[1]] = small_img
    elif(label in bike):
        img1 = frame
        wm = bi
        wm =cv2.resize(wm,(img1.shape[1]//3+100,img1.shape[0]//3))

        x_offset=img1.shape[1]-wm.shape[1]
        y_offset=img1.shape[0]-wm.shape[0]
        roi = img1[y_offset:img1.shape[0],x_offset:img1.shape[1]]
        
        img2gray = cv2.cvtColor(wm,cv2.COLOR_BGR2GRAY)
        mask_inv = cv2.bitwise_not(img2gray)
        white_background = np.full(wm.shape, 255, dtype=np.uint8)
        bk = cv2.bitwise_or(white_background, white_background, mask=mask_inv)
        final_roi = cv2.bitwise_or(roi,bk)

        large_img = img1
        small_img = final_roi
        large_img[y_offset:y_offset+small_img.shape[0], x_offset:x_offset+small_img.shape[1]] = small_img
    elif(label in drinking):
        img1 = frame
        wm = beer
        wm =cv2.resize(wm,(img1.shape[1]//3+100,img1.shape[0]//3))

        x_offset=img1.shape[1]-wm.shape[1]
        y_offset=img1.shape[0]-wm.shape[0]
        roi = img1[y_offset:img1.shape[0],x_offset:img1.shape[1]]
        
        img2gray = cv2.cvtColor(wm,cv2.COLOR_BGR2GRAY)
        mask_inv = cv2.bitwise_not(img2gray)
        white_background = np.full(wm.shape, 255, dtype=np.uint8)
        bk = cv2.bitwise_or(white_background, white_background, mask=mask_inv)
        final_roi = cv2.bitwise_or(roi,bk)

        large_img = img1
        small_img = final_roi
        large_img[y_offset:y_offset+small_img.shape[0], x_offset:x_offset+small_img.shape[1]] = small_img
    
    return large_img

* Reading input from webcam/Video file
* Display Output

In [None]:
CLASSES = open('action_recognition_kinetics.txt').read().strip().split("\n")
SAMPLE_DURATION = 16
SAMPLE_SIZE = 112

frames = deque(maxlen=SAMPLE_DURATION)

print("[INFO] loading human activity recognition model...")
net = cv2.dnn.readNet('resnet-34_kinetics.onnx')

print("[INFO] accessing video stream...")
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('car.mp4')

smoking = ['smoking hookah','smoking']
car = ['driving car']
bike = ['riding a bike']
drinking = ['tasting beer']
count = 0
flag = False
while True:
    # read a frame from the video stream
    (grabbed, frame) = cap.read()

    # if end of the video, exit
    if not grabbed:
        print("[INFO] no frame read from stream - exiting")
        break
    frame = imutils.resize(frame, width=400)
    frames.append(frame)

    if len(frames) < SAMPLE_DURATION:
        continue

    # now that our frames array is filled we can construct our blob
    blob = cv2.dnn.blobFromImages(frames, 1.0,
        (SAMPLE_SIZE, SAMPLE_SIZE), (114.7748, 107.7354, 99.4750),
        swapRB=True, crop=True)
    blob = np.transpose(blob, (1, 0, 2, 3))
    blob = np.expand_dims(blob, axis=0)

    net.setInput(blob)
    outputs = net.forward()
    label = CLASSES[np.argmax(outputs)]
    
    # draw the predicted activity on the frame
    if(label in smoking):
        print(label)
        frame = image_disclaimer(frame, label)
    elif(label in car):
        print(label)
        frame = image_disclaimer(frame, label)
    elif(label in bike):
        print(label)
        frame = image_disclaimer(frame, label)
    elif(label in drinking):
        print(label)
        frame = image_disclaimer(frame, label)
        
    cv2.imshow("Activity Recognition", frame)
    if cv2.waitKey(1)==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [7]:
import jovian
jovian.commit(environment=None)

<IPython.core.display.Javascript object>

[jovian] Attempting to save notebook..
[jovian] Uploading notebook..
[jovian] Committed successfully! https://jovian.ml/v-snehith999/movie-cautioning-system


'https://jovian.ml/v-snehith999/movie-cautioning-system'