In [1]:
import cv2
import numpy as np

In [2]:
from ultralytics import YOLO

In [3]:
model_pose=YOLO('yolov8\yolov8x-pose-p6.pt')

In [4]:
class violence:
  def pose(self,frame):
    b=np.zeros_like(frame)
    res=model_pose(frame)
    plt=res[0].plot(img=b,labels=False, boxes=False, masks=False, probs=False)
    return plt
  def change(self,prev,curr):
    prev_gray = cv2.cvtColor(prev,cv2.COLOR_BGR2GRAY)
    mask = np.zeros_like(prev)
    mask[..., 1] = 255
    gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(prev_gray, gray,None,0.5, 3, 15, 3, 5, 1.2, 0)
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    mask[..., 0] = angle * 180 / np.pi / 2
    mask[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
    rgb = cv2.cvtColor(mask, cv2.COLOR_HSV2BGR)
    return rgb
  def fusion(self,rgb,cd):
    img = cv2.addWeighted(rgb, 0.4, cd, 0.6, 0)
    return img

In [5]:
import tensorflow as tf

In [6]:
new_model = tf.keras.models.load_model('result/violence.h5')

In [7]:
vgg_model = tf.keras.applications.VGG16(include_top=True, weights='imagenet')


In [8]:
transfer_layer = vgg_model.get_layer('fc2')

In [9]:
vgg_model_final = tf.keras.models.Model(inputs=vgg_model.input,
                             outputs=transfer_layer.output)

In [11]:
import pyfirmata
import time
port = 'COM3' 
board = pyfirmata.Arduino(port)
board.digital[11].mode=pyfirmata.PWM
alarm_pin = board.get_pin('d:11:o')
def trigger_alarm():
    alarm_pin.write(1)  
    time.sleep(2)
    alarm_pin.write(0) 
    time.sleep(1) 

In [18]:
cap=cv2.VideoCapture('fi87_xvid.avi')
v=violence()
count=1
res1=[]
while True:
    print(count)
    ret,frame=cap.read()
    prev=np.zeros_like(frame)
    if ret:
        pose=v.pose(frame)
        opt=v.change(prev,frame)
        fus=v.fusion(pose,opt)
        fus=fus[32:-32,68:-68]
        fus=fus.reshape(1,224,224,3)
        fus=vgg_model_final(fus)
        res1.append(fus[0])
        count+=1
        cv2.imshow('video',frame)
        if count>=21:
            res1=np.array(res1)
            res1=np.array(res1).reshape(1,20,4096)
            res=new_model.predict(res1)
            res1=res1[0,1:,:]
            res1 = res1.tolist()
            if res[0][0]>0.9906:
                print('violence')
                #for i in range(5):
                #    trigger_alarm()
            else:
                print("Non-violence")
    else:
        break
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        break
    prev=frame
cap.release() 
cv2.destroyAllWindows() 




1


0: 1024x1280 3 persons, 15259.3ms
Speed: 30.2ms preprocess, 15259.3ms inference, 3.0ms postprocess per image at shape (1, 3, 1024, 1280)



2


0: 1024x1280 2 persons, 15169.1ms
Speed: 35.7ms preprocess, 15169.1ms inference, 2.2ms postprocess per image at shape (1, 3, 1024, 1280)



3


0: 1024x1280 2 persons, 17175.6ms
Speed: 28.2ms preprocess, 17175.6ms inference, 0.0ms postprocess per image at shape (1, 3, 1024, 1280)



4


0: 1024x1280 2 persons, 17703.6ms
Speed: 24.3ms preprocess, 17703.6ms inference, 2.5ms postprocess per image at shape (1, 3, 1024, 1280)



5


0: 1024x1280 2 persons, 18599.7ms
Speed: 38.1ms preprocess, 18599.7ms inference, 4.4ms postprocess per image at shape (1, 3, 1024, 1280)



6


0: 1024x1280 2 persons, 18374.6ms
Speed: 22.4ms preprocess, 18374.6ms inference, 3.0ms postprocess per image at shape (1, 3, 1024, 1280)



7


0: 1024x1280 2 persons, 18295.4ms
Speed: 31.2ms preprocess, 18295.4ms inference, 4.0ms postprocess per image at shape (1, 3, 1024, 1280)



8


0: 1024x1280 2 persons, 17822.2ms
Speed: 22.2ms preprocess, 17822.2ms inference, 3.0ms postprocess per image at shape (1, 3, 1024, 1280)



9


0: 1024x1280 1 person, 8462.2ms
Speed: 21.0ms preprocess, 8462.2ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



10


0: 1024x1280 2 persons, 7115.1ms
Speed: 14.5ms preprocess, 7115.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



11


0: 1024x1280 2 persons, 6923.9ms
Speed: 15.2ms preprocess, 6923.9ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)



12


0: 1024x1280 1 person, 7385.9ms
Speed: 13.0ms preprocess, 7385.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



13


0: 1024x1280 2 persons, 6923.0ms
Speed: 12.6ms preprocess, 6923.0ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)



14


0: 1024x1280 1 person, 6970.7ms
Speed: 13.6ms preprocess, 6970.7ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)



15


0: 1024x1280 1 person, 7146.3ms
Speed: 17.4ms preprocess, 7146.3ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



16


0: 1024x1280 2 persons, 7261.1ms
Speed: 13.7ms preprocess, 7261.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



17


0: 1024x1280 2 persons, 7571.3ms
Speed: 15.3ms preprocess, 7571.3ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)



18


0: 1024x1280 2 persons, 7651.8ms
Speed: 18.0ms preprocess, 7651.8ms inference, 3.0ms postprocess per image at shape (1, 3, 1024, 1280)



19


0: 1024x1280 2 persons, 7720.0ms
Speed: 14.1ms preprocess, 7720.0ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



20


0: 1024x1280 3 persons, 7584.3ms
Speed: 15.9ms preprocess, 7584.3ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
21


0: 1024x1280 3 persons, 7473.9ms
Speed: 19.5ms preprocess, 7473.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
22


0: 1024x1280 3 persons, 7535.1ms
Speed: 23.5ms preprocess, 7535.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
23


0: 1024x1280 3 persons, 7250.2ms
Speed: 20.5ms preprocess, 7250.2ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
24


0: 1024x1280 3 persons, 7383.7ms
Speed: 11.3ms preprocess, 7383.7ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
25


0: 1024x1280 3 persons, 7479.7ms
Speed: 12.0ms preprocess, 7479.7ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
26


0: 1024x1280 3 persons, 7395.9ms
Speed: 12.2ms preprocess, 7395.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
27


0: 1024x1280 3 persons, 7497.6ms
Speed: 13.3ms preprocess, 7497.6ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
28


0: 1024x1280 3 persons, 8032.5ms
Speed: 13.5ms preprocess, 8032.5ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
29


0: 1024x1280 3 persons, 7412.9ms
Speed: 17.5ms preprocess, 7412.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
30


0: 1024x1280 3 persons, 7341.8ms
Speed: 23.9ms preprocess, 7341.8ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
31


0: 1024x1280 3 persons, 7858.1ms
Speed: 14.2ms preprocess, 7858.1ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
32


0: 1024x1280 3 persons, 8919.5ms
Speed: 13.4ms preprocess, 8919.5ms inference, 3.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
33


0: 1024x1280 3 persons, 8265.2ms
Speed: 15.0ms preprocess, 8265.2ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
34


0: 1024x1280 3 persons, 7615.3ms
Speed: 13.6ms preprocess, 7615.3ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
35


0: 1024x1280 3 persons, 7437.1ms
Speed: 12.3ms preprocess, 7437.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
36


0: 1024x1280 3 persons, 7266.3ms
Speed: 13.5ms preprocess, 7266.3ms inference, 4.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
37


0: 1024x1280 3 persons, 7311.3ms
Speed: 13.0ms preprocess, 7311.3ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
38


0: 1024x1280 3 persons, 7343.5ms
Speed: 15.0ms preprocess, 7343.5ms inference, 0.6ms postprocess per image at shape (1, 3, 1024, 1280)







violence
39


0: 1024x1280 3 persons, 7377.1ms
Speed: 15.0ms preprocess, 7377.1ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
40


0: 1024x1280 3 persons, 7397.3ms
Speed: 12.5ms preprocess, 7397.3ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







violence
41


0: 1024x1280 3 persons, 7362.7ms
Speed: 12.4ms preprocess, 7362.7ms inference, 4.0ms postprocess per image at shape (1, 3, 1024, 1280)


violence
42


In [20]:
cap=cv2.VideoCapture('no471_xvid.avi')
v=violence()
count=1
res1=[]
while True:
    print(count)
    ret,frame=cap.read()
    prev=np.zeros_like(frame)
    if ret:
        pose=v.pose(frame)
        opt=v.change(prev,frame)
        fus=v.fusion(pose,opt)
        fus=fus[32:-32,68:-68]
        #fus=fus[128:-128,208:-208]
        fus=fus.reshape(1,224,224,3)
        fus=vgg_model_final(fus)
        res1.append(fus[0])
        count+=1
        cv2.imshow('video',frame)
        if count>=21:
            res1=np.array(res1)
            res1=np.array(res1).reshape(1,20,4096)
            res=new_model.predict(res1)
            res1=res1[0,1:,:]
            res1 = res1.tolist()
            if res[0][0]>0.9906:
                print('violence')
                for i in range(5):
                    trigger_alarm()
            else:
                print("Non-violence")
    else:
        break
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        break
    prev=frame
cap.release() 
cv2.destroyAllWindows() 




1


0: 1024x1280 5 persons, 7054.6ms
Speed: 15.2ms preprocess, 7054.6ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



2


0: 1024x1280 5 persons, 7276.5ms
Speed: 13.5ms preprocess, 7276.5ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



3


0: 1024x1280 4 persons, 7591.1ms
Speed: 15.4ms preprocess, 7591.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



4


0: 1024x1280 4 persons, 7466.8ms
Speed: 17.0ms preprocess, 7466.8ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)



5


0: 1024x1280 5 persons, 7444.0ms
Speed: 12.2ms preprocess, 7444.0ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)



6


0: 1024x1280 4 persons, 7406.1ms
Speed: 16.1ms preprocess, 7406.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



7


0: 1024x1280 2 persons, 7557.5ms
Speed: 15.5ms preprocess, 7557.5ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



8


0: 1024x1280 1 person, 7440.6ms
Speed: 14.6ms preprocess, 7440.6ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



9


0: 1024x1280 3 persons, 7595.1ms
Speed: 18.5ms preprocess, 7595.1ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)



10


0: 1024x1280 3 persons, 7467.3ms
Speed: 13.9ms preprocess, 7467.3ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



11


0: 1024x1280 1 person, 7651.0ms
Speed: 13.7ms preprocess, 7651.0ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



12


0: 1024x1280 1 person, 7583.3ms
Speed: 16.7ms preprocess, 7583.3ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



13


0: 1024x1280 2 persons, 7559.1ms
Speed: 18.4ms preprocess, 7559.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



14


0: 1024x1280 2 persons, 7505.7ms
Speed: 14.1ms preprocess, 7505.7ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



15


0: 1024x1280 3 persons, 7691.9ms
Speed: 14.7ms preprocess, 7691.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



16


0: 1024x1280 2 persons, 7663.1ms
Speed: 14.8ms preprocess, 7663.1ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



17


0: 1024x1280 2 persons, 7556.9ms
Speed: 14.0ms preprocess, 7556.9ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)



18


0: 1024x1280 2 persons, 7592.9ms
Speed: 22.0ms preprocess, 7592.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



19


0: 1024x1280 3 persons, 7580.2ms
Speed: 14.0ms preprocess, 7580.2ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)



20


0: 1024x1280 4 persons, 7618.4ms
Speed: 16.4ms preprocess, 7618.4ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
21


0: 1024x1280 3 persons, 7506.2ms
Speed: 13.5ms preprocess, 7506.2ms inference, 3.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
22


0: 1024x1280 3 persons, 7629.2ms
Speed: 14.0ms preprocess, 7629.2ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
23


0: 1024x1280 3 persons, 7735.4ms
Speed: 19.1ms preprocess, 7735.4ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
24


0: 1024x1280 3 persons, 7612.8ms
Speed: 18.6ms preprocess, 7612.8ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
25


0: 1024x1280 3 persons, 7582.2ms
Speed: 13.6ms preprocess, 7582.2ms inference, 4.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
26


0: 1024x1280 3 persons, 7571.9ms
Speed: 14.5ms preprocess, 7571.9ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
27


0: 1024x1280 3 persons, 7540.6ms
Speed: 14.0ms preprocess, 7540.6ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
28


0: 1024x1280 3 persons, 7604.6ms
Speed: 15.0ms preprocess, 7604.6ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
29


0: 1024x1280 4 persons, 7648.5ms
Speed: 13.7ms preprocess, 7648.5ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
30


0: 1024x1280 3 persons, 7665.7ms
Speed: 13.4ms preprocess, 7665.7ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
31


0: 1024x1280 2 persons, 7740.3ms
Speed: 13.0ms preprocess, 7740.3ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
32


0: 1024x1280 3 persons, 7606.7ms
Speed: 12.4ms preprocess, 7606.7ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
33


0: 1024x1280 3 persons, 7474.2ms
Speed: 20.0ms preprocess, 7474.2ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
34


0: 1024x1280 4 persons, 7573.5ms
Speed: 14.7ms preprocess, 7573.5ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
35


0: 1024x1280 4 persons, 8048.0ms
Speed: 16.8ms preprocess, 8048.0ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
36


0: 1024x1280 4 persons, 7673.9ms
Speed: 14.2ms preprocess, 7673.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
37


0: 1024x1280 4 persons, 7729.9ms
Speed: 13.4ms preprocess, 7729.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
38


0: 1024x1280 5 persons, 7474.4ms
Speed: 13.2ms preprocess, 7474.4ms inference, 4.5ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
39


0: 1024x1280 5 persons, 7640.9ms
Speed: 18.9ms preprocess, 7640.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
40


0: 1024x1280 4 persons, 7733.3ms
Speed: 14.1ms preprocess, 7733.3ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1280)







Non-violence
41


0: 1024x1280 4 persons, 8201.5ms
Speed: 17.0ms preprocess, 8201.5ms inference, 2.5ms postprocess per image at shape (1, 3, 1024, 1280)


Non-violence
42


In [14]:
cap=cv2.VideoCapture(0)
v=violence()
count=1
res1=[]
while True:
    print(count)
    ret,frame=cap.read()
    prev=np.zeros_like(frame)
    if ret:
        pose=v.pose(frame)
        opt=v.change(prev,frame)
        fus=v.fusion(pose,opt)
        fus=fus[128:-128,208:-208]
        print(fus.shape)
        fus=fus.reshape(1,224,224,3)
        fus=vgg_model_final(fus)
        print(fus.shape)
        res1.append(fus[0])
        count+=1
        cv2.imshow('video',frame)
        if count>=21:
            res1=np.array(res1)
            print(res1.shape)
            res1=np.array(res1).reshape(1,20,4096)
            res=new_model.predict(res1)
            res1=res1[0,1:,:]
            res1 = res1.tolist()
            print(res)
            if res[0][0]>0.9907:
                print('violence')
                print(res)
                for i in range(5):
                     trigger_alarm()
            else:
                print("Non-violence")
    else:
        break
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        break
    prev=frame
cap.release() 
cv2.destroyAllWindows() 

1



0: 960x1280 1 person, 15977.1ms
Speed: 39.1ms preprocess, 15977.1ms inference, 0.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
2


0: 960x1280 1 person, 16299.8ms
Speed: 21.9ms preprocess, 16299.8ms inference, 5.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
3


0: 960x1280 1 person, 16529.9ms
Speed: 31.6ms preprocess, 16529.9ms inference, 2.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
4


0: 960x1280 1 person, 15972.5ms
Speed: 36.5ms preprocess, 15972.5ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
5


0: 960x1280 1 person, 16221.9ms
Speed: 25.2ms preprocess, 16221.9ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
6


0: 960x1280 1 person, 16139.9ms
Speed: 22.2ms preprocess, 16139.9ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
7


0: 960x1280 1 person, 16165.3ms
Speed: 19.8ms preprocess, 16165.3ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
8


0: 960x1280 1 person, 16079.3ms
Speed: 22.4ms preprocess, 16079.3ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
9


0: 960x1280 1 person, 15670.2ms
Speed: 22.5ms preprocess, 15670.2ms inference, 2.2ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
10


0: 960x1280 1 person, 16350.4ms
Speed: 22.5ms preprocess, 16350.4ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
11


0: 960x1280 1 person, 16329.8ms
Speed: 21.0ms preprocess, 16329.8ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
12


0: 960x1280 1 person, 16071.0ms
Speed: 27.2ms preprocess, 16071.0ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
13


0: 960x1280 1 person, 16188.7ms
Speed: 21.1ms preprocess, 16188.7ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
14


0: 960x1280 1 person, 16104.0ms
Speed: 32.7ms preprocess, 16104.0ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
15


0: 960x1280 1 person, 15587.4ms
Speed: 37.5ms preprocess, 15587.4ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
16


0: 960x1280 1 person, 16081.9ms
Speed: 24.1ms preprocess, 16081.9ms inference, 4.3ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
17


0: 960x1280 1 person, 15766.2ms
Speed: 25.4ms preprocess, 15766.2ms inference, 1.3ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
18


0: 960x1280 1 person, 12371.6ms
Speed: 22.5ms preprocess, 12371.6ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
19


0: 960x1280 1 person, 15394.9ms
Speed: 20.9ms preprocess, 15394.9ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)





(1, 4096)
20


0: 960x1280 1 person, 16104.9ms
Speed: 20.6ms preprocess, 16104.9ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99001   0.0099854]]
Non-violence
21


0: 960x1280 1 person, 15661.5ms
Speed: 26.2ms preprocess, 15661.5ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99051   0.0094865]]
Non-violence
22


0: 960x1280 1 person, 16075.4ms
Speed: 25.1ms preprocess, 16075.4ms inference, 2.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99054   0.0094606]]
Non-violence
23



0: 960x1280 1 person, 15984.6ms
Speed: 22.9ms preprocess, 15984.6ms inference, 2.5ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[     0.9908   0.0091961]]
violence
[[     0.9908   0.0091961]]





24


0: 960x1280 1 person, 15458.9ms
Speed: 35.6ms preprocess, 15458.9ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99072   0.0092844]]
violence
[[    0.99072   0.0092844]]





25


0: 960x1280 1 person, 16229.3ms
Speed: 34.8ms preprocess, 16229.3ms inference, 3.9ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99098   0.0090195]]
violence
[[    0.99098   0.0090195]]





26


0: 960x1280 1 person, 15970.7ms
Speed: 37.6ms preprocess, 15970.7ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99101   0.0089946]]
violence
[[    0.99101   0.0089946]]





27


0: 960x1280 1 person, 15938.8ms
Speed: 36.4ms preprocess, 15938.8ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[      0.991   0.0090012]]
violence
[[      0.991   0.0090012]]





28


0: 960x1280 1 person, 16065.5ms
Speed: 28.2ms preprocess, 16065.5ms inference, 2.4ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[     0.9907   0.0093038]]
Non-violence
29


0: 960x1280 1 person, 15679.5ms
Speed: 23.8ms preprocess, 15679.5ms inference, 3.4ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99079   0.0092136]]
violence
[[    0.99079   0.0092136]]





30


0: 960x1280 1 person, 15991.7ms
Speed: 36.0ms preprocess, 15991.7ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99091   0.0090888]]
violence
[[    0.99091   0.0090888]]





31


0: 960x1280 (no detections), 16188.0ms
Speed: 41.3ms preprocess, 16188.0ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[     0.9906   0.0093993]]
Non-violence
32


0: 960x1280 (no detections), 16743.6ms
Speed: 22.9ms preprocess, 16743.6ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99059   0.0094129]]
Non-violence
33


0: 960x1280 (no detections), 15386.8ms
Speed: 26.3ms preprocess, 15386.8ms inference, 3.5ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[     0.9905   0.0094989]]
Non-violence
34


0: 960x1280 1 person, 15992.1ms
Speed: 18.7ms preprocess, 15992.1ms inference, 2.3ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99046   0.0095395]]
Non-violence
35


0: 960x1280 1 person, 15708.3ms
Speed: 26.7ms preprocess, 15708.3ms inference, 2.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99057   0.0094284]]
Non-violence





36


0: 960x1280 1 person, 16260.5ms
Speed: 21.7ms preprocess, 16260.5ms inference, 2.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99079   0.0092148]]
violence
[[    0.99079   0.0092148]]





37


0: 960x1280 1 person, 15757.6ms
Speed: 35.2ms preprocess, 15757.6ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99093   0.0090692]]
violence
[[    0.99093   0.0090692]]





38


0: 960x1280 1 person, 16103.4ms
Speed: 41.6ms preprocess, 16103.4ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[     0.9906   0.0093966]]
Non-violence
39


0: 960x1280 1 person, 15105.0ms
Speed: 20.9ms preprocess, 15105.0ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99097   0.0090311]]
violence
[[    0.99097   0.0090311]]





40


0: 960x1280 1 person, 16415.0ms
Speed: 35.3ms preprocess, 16415.0ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99087   0.0091259]]
violence
[[    0.99087   0.0091259]]





41


0: 960x1280 (no detections), 16145.0ms
Speed: 45.2ms preprocess, 16145.0ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99054   0.0094643]]
Non-violence
42


0: 960x1280 (no detections), 16040.6ms
Speed: 20.3ms preprocess, 16040.6ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99051   0.0094852]]
Non-violence
43


0: 960x1280 (no detections), 15551.6ms
Speed: 20.8ms preprocess, 15551.6ms inference, 2.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99059   0.0094077]]
Non-violence
44


0: 960x1280 1 person, 15941.7ms
Speed: 16.3ms preprocess, 15941.7ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99084   0.0091644]]
violence
[[    0.99084   0.0091644]]





45


0: 960x1280 1 person, 16066.3ms
Speed: 42.5ms preprocess, 16066.3ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99093   0.0090681]]
violence
[[    0.99093   0.0090681]]





46


0: 960x1280 1 person, 15855.0ms
Speed: 35.5ms preprocess, 15855.0ms inference, 4.6ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99053   0.0094652]]
Non-violence
47


0: 960x1280 1 person, 15564.3ms
Speed: 17.4ms preprocess, 15564.3ms inference, 3.5ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99057   0.0094268]]
Non-violence
48



0: 960x1280 1 person, 15880.0ms
Speed: 21.7ms preprocess, 15880.0ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99029   0.0097052]]





Non-violence
49


0: 960x1280 1 person, 15848.1ms
Speed: 25.1ms preprocess, 15848.1ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99055   0.0094503]]
Non-violence
50



0: 960x1280 1 person, 15812.4ms
Speed: 34.7ms preprocess, 15812.4ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99051   0.0094875]]
Non-violence
51


0: 960x1280 1 person, 16042.5ms
Speed: 27.8ms preprocess, 16042.5ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99052   0.0094751]]
Non-violence
52


0: 960x1280 1 person, 15811.9ms
Speed: 23.3ms preprocess, 15811.9ms inference, 2.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99069   0.0093137]]
Non-violence
53



0: 960x1280 2 persons, 15710.9ms
Speed: 26.5ms preprocess, 15710.9ms inference, 5.1ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99075   0.0092539]]
violence
[[    0.99075   0.0092539]]





54


0: 960x1280 2 persons, 15145.0ms
Speed: 24.4ms preprocess, 15145.0ms inference, 3.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99096   0.0090401]]
violence
[[    0.99096   0.0090401]]





55


0: 960x1280 1 person, 15700.5ms
Speed: 45.8ms preprocess, 15700.5ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99041   0.0095868]]
Non-violence
56


0: 960x1280 1 person, 15687.8ms
Speed: 20.8ms preprocess, 15687.8ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99047   0.0095298]]
Non-violence
57



0: 960x1280 (no detections), 12982.7ms
Speed: 20.4ms preprocess, 12982.7ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)





[[    0.99061   0.0093873]]
Non-violence
58


0: 960x1280 1 person, 13747.0ms
Speed: 19.7ms preprocess, 13747.0ms inference, 4.0ms postprocess per image at shape (1, 3, 960, 1280)


(224, 224, 3)
(1, 4096)
(20, 4096)
[[    0.99092   0.0090838]]
violence
[[    0.99092   0.0090838]]


SerialException: WriteFile failed (PermissionError(13, 'The device does not recognize the command.', None, 22))