In [1]:
# Importing essential libraries

import tensorflow as tf
import keras
from keras.layers import *
from keras.models import Sequential, load_model
from tensorflow.keras.utils import to_categorical, plot_model
from sklearn.model_selection import train_test_split

import os
import cv2
import math
import numpy as np
import matplotlib.pyplot as plt
from collections import deque
import datetime as dt
import shutil
import random

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")  # Configures Seaborn plots
%matplotlib inline



In [3]:
VIDEO_FRAME_HEIGHT = 64
VIDEO_FRAME_WIDTH = 64

FRAMES_IN_SEQUENCE = 16

DATASET_DIRECTORY = "D:/New folder/violence Detection/archive/real life violence situations/Real Life Violence Dataset"

CLASS_LABELS = ["NonViolence", "Violence"]


In [6]:
def extract_frames(video_path, sequence_length=FRAMES_IN_SEQUENCE, image_height=VIDEO_FRAME_HEIGHT, image_width=VIDEO_FRAME_WIDTH):

    frames_list = []
    video_reader = cv2.VideoCapture(video_path)
    total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(total_frames // sequence_length, 1)

    for frame_index in range(0, total_frames, skip_frames_window):
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
        frame_found, frame = video_reader.read()
        if not frame_found:
            break
        resized_frame = cv2.resize(frame, (image_height, image_width))
        normalized_frame = resized_frame / 255 
        frames_list.append(normalized_frame)
    video_reader.release()
    return frames_list


In [7]:
def create_dataset():
    features = []
    labels = []
    video_files_paths = []
    for class_index, class_name in enumerate(CLASS_LABELS):
        print(f'Extracting Data of Class: {class_name}')
        class_dir = os.path.join(DATASET_DIRECTORY, class_name)
        files_list = os.listdir(class_dir)
        for file_name in files_list:
            video_file_path = os.path.join(class_dir, file_name)
            frames = extract_frames(video_file_path)
            if len(frames) == FRAMES_IN_SEQUENCE:
                features.append(frames)
                labels.append(class_index)
                video_files_paths.append(video_file_path)
    features = np.array(features)
    labels = np.array(labels)  
    return features, labels, video_files_paths

In [8]:
# Create the dataset.
features, labels, video_files_paths = create_dataset()

Extracting Data of Class: NonViolence
Extracting Data of Class: Violence


In [9]:
# Saving the extracted data

np.save("features.npy",features)
np.save("labels.npy",labels)
np.save("video_files_paths.npy",video_files_paths)

In [10]:
features = np.load("features.npy")
labels = np.load("labels.npy")
video_files_paths = np.load("video_files_paths.npy")

In [11]:
# convert labels into one-hot-encoded vectors
one_hot_encoded_labels = to_categorical(labels)
one_hot_encoded_labels

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.]])

In [12]:
# Split the Data into Train ( 90% ) and Test Set ( 10% ).

features_train, features_test, labels_train, labels_test = train_test_split(
    features, 
    one_hot_encoded_labels,
    test_size=0.1,
    shuffle=True,
    random_state=42
)


In [13]:
from keras.applications.mobilenet_v2 import MobileNetV2

mobilenet = MobileNetV2(include_top=False , weights="imagenet")

mobilenet.trainable=True
for layer in mobilenet.layers[:-50]:
  layer.trainable=False

  mobilenet = MobileNetV2(include_top=False , weights="imagenet")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [14]:

def construct_model():
    model = Sequential()
    model.add(Input(shape=(FRAMES_IN_SEQUENCE, VIDEO_FRAME_HEIGHT, VIDEO_FRAME_WIDTH, 3)))
    model.add(TimeDistributed(mobilenet))
    model.add(Dropout(0.25))
    model.add(TimeDistributed(Flatten()))
    lstm_fw = LSTM(units=32)
    lstm_bw = LSTM(units=32, go_backwards=True)
    model.add(Bidirectional(lstm_fw, backward_layer=lstm_bw))
    dense_units = [256, 128, 64, 32]
    for units in dense_units:
        model.add(Dense(units, activation='relu'))
        model.add(Dropout(0.25))
    model.add(Dense(len(CLASS_LABELS), activation='softmax'))
    model.summary()
    return model

In [15]:
# Constructing the Model

my_model = construct_model()

In [16]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stopping_callback = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.000001, verbose=1)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

my_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

MobBiLSTM_model_history = my_model.fit(x=features_train, y=labels_train, epochs=50, batch_size=8,
                                             shuffle=True, validation_split=0.2,
                                             callbacks=[early_stopping_callback, reduce_lr])


Epoch 1/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 5s/step - accuracy: 0.6472 - loss: 0.6698 - val_accuracy: 0.7273 - val_loss: 0.6549 - learning_rate: 1.0000e-04
Epoch 2/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 944ms/step - accuracy: 0.5297 - loss: 0.6735 - val_accuracy: 0.8182 - val_loss: 0.6351 - learning_rate: 1.0000e-04
Epoch 3/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 867ms/step - accuracy: 0.6306 - loss: 0.6441 - val_accuracy: 0.8182 - val_loss: 0.6105 - learning_rate: 1.0000e-04
Epoch 4/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 863ms/step - accuracy: 0.6936 - loss: 0.6254 - val_accuracy: 0.8182 - val_loss: 0.5909 - learning_rate: 1.0000e-04
Epoch 5/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 842ms/step - accuracy: 0.7403 - loss: 0.5480 - val_accuracy: 0.8182 - val_loss: 0.5651 - learning_rate: 1.0000e-04
Epoch 6/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [17]:
model_evaluation_history = my_model.evaluate(features_test, labels_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299ms/step - accuracy: 0.5000 - loss: 0.7215


In [18]:
my_model.save('Model.h5')



In [19]:
my_model = load_model('Model.h5')
my_model.summary()



In [20]:
def predict_video_class(video_file_path, SEQUENCE_LENGTH=16):
    try:
        video_reader = cv2.VideoCapture(video_file_path)
        if not video_reader.isOpened():
            print("Error: Unable to open video file.")
            return
        video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        frames_list = []
        skip_frames_window = max(int(video_frames_count / SEQUENCE_LENGTH), 1)
        for frame_counter in range(SEQUENCE_LENGTH):
            video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
            frame_found, frame = video_reader.read()
            if not frame_found:
                print("Error: Unable to read frame from video.")
                return
            resized_frame = cv2.resize(frame, (VIDEO_FRAME_HEIGHT, VIDEO_FRAME_WIDTH))
            normalized_frame = resized_frame / 255
            frames_list.append(normalized_frame)
        predicted_labels_probabilities = my_model.predict(np.expand_dims(frames_list, axis=0))[0]
        predicted_label_index = np.argmax(predicted_labels_probabilities)
        predicted_class_name = CLASS_LABELS[predicted_label_index]
        prediction_confidence = predicted_labels_probabilities[predicted_label_index]
        return (predicted_class_name, prediction_confidence)
    except Exception as e:
        print(f"An error occurred: {str(e)}")
    finally:
        if video_reader:
            video_reader.release()


In [21]:
random_class = random.choice(CLASS_LABELS)
path = os.path.join(DATASET_DIRECTORY, random_class)
random_video = random.choice(os.listdir(path))

# Specifying video to be predicted
input_video_file_path = os.path.join(path, random_video)

# Perform Single Prediction on the Test Video.
predicted_class_name, prediction_confidence = predict_video_class(input_video_file_path, FRAMES_IN_SEQUENCE)

# Output
print(f'Predicted Class: {predicted_class_name}')
print(f'Confidence: {prediction_confidence}')

print("Prediction is ",predicted_class_name == random_class)

print(f"\nFor Referene: Choosen Video = {random_video}\nPath: \'{input_video_file_path}\'")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19s/step
Predicted Class: Violence
Confidence: 0.504985511302948
Prediction is  False

For Referene: Choosen Video = NV_329.mp4
Path: 'D:/New folder/violence Detection/archive/real life violence situations/Real Life Violence Dataset\NonViolence\NV_329.mp4'
