In [1]:
import cv2
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy



num_frames = 16
num_samples = 500





## Preparing dataset for training

In [7]:
video_data = np.zeros((1000, num_frames, 128, 128, 3), dtype=np.float32)
output_data = []

video_path = 'Real Life Violence Dataset/Violence'
list1 = os.listdir(video_path)[:num_samples]





for i in range(num_samples):
    video = cv2.VideoCapture(os.path.join(video_path, list1[i]))

    frames = np.zeros((num_frames, 128, 128, 3), dtype=np.float32)

    for j in range(num_frames):
        ret, frame = video.read()

        frames[j]  =  np.array(cv2.resize(frame, (128, 128)), dtype=np.float64) / 255.0

       

    video_data[i] = frames
    output_data.append(1)



In [8]:
vid_path = 'Real Life Violence Dataset/NonViolence'
list2 = os.listdir(vid_path)[:num_samples]


for i in range(num_samples):
    video = cv2.VideoCapture(os.path.join(vid_path, list2[i]))

    frames = np.zeros((num_frames, 128, 128, 3), dtype=np.float32)

    for j in range(num_frames):
        ret, frame = video.read()
        if ret:
            frames[j]  =  np.array(cv2.resize(frame, (128, 128)), dtype=np.float64) / 255.0
        else:
            continue

    video_data[i + num_samples] = frames
    output_data.append(0)



In [9]:
X = video_data
y = output_data
mean = np.mean(X)
std = np.std(X)
X = (X - mean) / std

print(X.shape)
#print(y.shape)





(1000, 16, 128, 128, 3)


In [10]:
y = np.array(output_data,dtype=np.float32)
print(y.shape)

(1000,)


## Model Training


In [6]:
from tensorflow.keras.layers import Reshape ,BatchNormalization,Dropout

model = tf.keras.models.Sequential()
model.add(Conv3D(128, kernel_size=(3, 3, 3), activation='relu', input_shape=(num_frames, 128, 128, 3)))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

model.add(Conv3D(64, kernel_size=(3, 3, 3), activation='relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(BatchNormalization())

#model.add(Conv3D(32, kernel_size=(3, 3, 3), activation='relu'))
#model.add(MaxPooling3D(pool_size=(2, 2, 2)))
#model.add(BatchNormalization())

model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))

model.add(Reshape((1, 128)))

model.add(LSTM(128,  return_sequences=True))
model.add(LSTM(64))

model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 14, 126, 126, 128  10496     
                             )                                   
                                                                 
 max_pooling3d (MaxPooling3D  (None, 7, 63, 63, 128)   0         
 )                                                               
                                                                 
 conv3d_1 (Conv3D)           (None, 5, 61, 61, 64)     221248    
                                                                 
 max_pooling3d_1 (MaxPooling  (None, 2, 30, 30, 64)    0         
 3D)                                                             
                                                                 
 batch_normalization (BatchN  (None, 2, 30, 30, 64)    256       
 ormalization)                                          

In [11]:
X = np.array(X).reshape(-1, 128, 128, 3)

y = np.array(y)

In [None]:
model.fit(X, y, epochs=10 ,batch_size=32)

## Preparing dataset for testing

In [19]:
video_data = np.zeros((1000, num_frames, 128, 128, 3), dtype=np.float32)
output_data = []

video_path = 'threat/real life violence situations/Violence'
list1 = os.listdir(video_path)[:num_samples]





for i in range(num_samples):
    video = cv2.VideoCapture(os.path.join(video_path, list1[i]))

    frames = np.zeros((num_frames, 128, 128, 3), dtype=np.float32)

    for j in range(num_frames):
        ret, frame = video.read()

        frame = cv2.resize(frame, (128, 128))
        frame = np.array(frame, dtype=np.float64) / 255.0

        frames[j] = frame

    video_data[i] = frames
    output_data.append(1)



In [20]:
vid_path = 'threat/real life violence situations/NonViolence'
list2 = os.listdir(vid_path)[:num_samples]


for i in range(num_samples):
    video = cv2.VideoCapture(os.path.join(vid_path, list2[i]))

    frames = np.zeros((num_frames, 128, 128, 3), dtype=np.float32)

    for j in range(num_frames):
        ret, frame = video.read()
        if ret:
            frame = cv2.resize(frame, (128, 128))
            frame = np.array(frame, dtype=np.float32) / 255.0
            frames[j] = frame
        else:
            continue

    video_data[i + num_samples] = frames
    output_data.append(0)

In [21]:
X_test = video_data
#y_test = tf.keras.utils.to_categorical(output_data, num_classes=2)
y_test = np.array(output_data,dtype=np.float32)

#mean = np.mean(X)
#std = np.std(X)
X_test = (X_test- mean) / std

print(X_test.shape)
print(y_test.shape)

(1000, 16, 128, 128, 3)
(1000,)


## Model Evaluation

In [22]:
#eva = model.evaluate(X_test, y_test) 
eva = md.evaluate(X_test, y_test) 



In [15]:
sele = np.expand_dims(X_test[250], axis=0) 
print(sele.shape)
y_pred = md.predict(sele)
print(y_pred)

(1, 16, 128, 128, 3)
[[0.9597803]]


In [16]:
pred = np.round(y_pred)
print(pred)

[[1.]]


In [17]:
print(y_test[250])

1.0


In [16]:
model.save("mdl_presentation.h5")

In [3]:
from tensorflow.keras.models import load_model
md = load_model('mdl_presentation.h5')

In [4]:
md.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_2 (Conv3D)           (None, 14, 126, 126, 128  10496     
                             )                                   
                                                                 
 max_pooling3d_2 (MaxPooling  (None, 7, 63, 63, 128)   0         
 3D)                                                             
                                                                 
 conv3d_3 (Conv3D)           (None, 5, 61, 61, 64)     221248    
                                                                 
 max_pooling3d_3 (MaxPooling  (None, 2, 30, 30, 64)    0         
 3D)                                                             
                                                                 
 batch_normalization_1 (Batc  (None, 2, 30, 30, 64)    256       
 hNormalization)                                      

## Violence Predection


In [27]:
#video = cv2.VideoCapture('NV_98.mp4')
#video = cv2.VideoCapture('NV_99.mp4')
#video = cv2.VideoCapture('V_997.mp4')
video = cv2.VideoCapture('v3.mp4')
#video = cv2.VideoCapture('v2.mp4')
#video = cv2.VideoCapture('tf1.mp4')
#video = cv2.VideoCapture('vl1.mp4')


fram = np.zeros((num_frames, 128, 128, 3), dtype=np.float32)

for j in range(num_frames):
    ret, fra = video.read()

    fra= cv2.resize(fra, (128, 128))
    fra = np.array(fra, dtype=np.float64) / 255.0
    fram[j] = fra
    cv2.imshow("fra",fra)

sele = np.expand_dims(fram, axis=0) 

mn = np.mean(sele)
st= np.std(sele)

sele = (sele - mn) / st
print(sele.shape)
y_pred = md.predict(sele)
print(y_pred)
#print(sele)
pred = np.round(y_pred)
print(pred)
if(pred == 0):
    print("non violent")
elif(pred ==1):
    print("violent")

            
    

(1, 16, 128, 128, 3)
[[0.05814742]]
[[0.]]
non violent


# Threat Classification In Live Video

In [None]:
import cv2
import numpy as np
import os
import tensorflow as tf

num_frames = 16


model = tf.keras.models.load_model('mdl_presentation.h5')

cap = cv2.VideoCapture(0)

frames = np.zeros((num_frames, 128, 128, 3), dtype=np.float32)
j=0
while True:
    ret, frame = cap.read()
    print(j)
    if not ret:
        continue
    
    frame = cv2.resize(frame, (128, 128))
    frame = np.array(frame, dtype=np.float32) / 255.0
    frames[j] = frame
   
    
    
    if(j<num_frames-1):
        
        
        j+=1
        #print(j)
    
    if (j==15):
        
        sele = np.expand_dims(frames, axis=0) 
        mn = np.mean(sele)
        st= np.std(sele)

        sele = (sele - mn) / st
        y_pred = model.predict(sele)
        pred = np.round(y_pred)
        print(y_pred)
        print("pred",pred)
        clr=(0, 255, 0)
        if pred == 0:
            label = 'N-V'
        else:
            label = 'V'
            clr=(0, 0, 255)

        cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, clr, 2)
        cv2.imshow('frame', frame)
        j=0
    
    

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
