# **Voilence Detection Model**
A Deep Learning model that can detect in Violence in Videos and Live Feeds

To Use only the GUI Part, please save the model first at the correct location and then proceed with the GUI.
The model is available here:

https://drive.google.com/drive/folders/1eYFkV4TVnYlCwhSINmhCt9l4nWoLaMUQ?usp=sharing
Please download it and keep it at the correct path first.
#### By Aryan Mathur

Importing Required Libraries


In [1]:
import os
import shutil
import cv2
import math
import random
import numpy as np
import datetime as dt
import tensorflow
import keras
from collections import deque
import matplotlib.pyplot as plt
plt.style.use("seaborn")
from decimal import Decimal


from sklearn.model_selection import train_test_split

from keras.layers import *
from keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from IPython.display import HTML
from base64 import b64encode
import tensorflow as tf
from IPython.display import HTML
from base64 import b64encode
import mimetypes


  plt.style.use("seaborn")


Mount The Drive

#Frame Extraction


In [2]:
from google.colab import drive

# Mount Google Drive
#drive.mount('/content/drive')


In [3]:

# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 16


In [4]:
def frames_extraction(video_path, SEQUENCE_LENGTH):
    frames_list = []

    # Read the Video File
    video_reader = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the the interval after which frames will be added to the list.
    skip_frames_window = max(int(video_frames_count / SEQUENCE_LENGTH), 1)

    # Iterate through the Video Frames.
    for frame_counter in range(SEQUENCE_LENGTH):
        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

        # Reading the frame from the video.
        success, frame = video_reader.read()

        if not success:
            break

        # Resize the Frame to fixed height and width.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        # Normalize the resized frame
        normalized_frame = resized_frame / 255

        # Append the normalized frame into the frames list
        frames_list.append(normalized_frame)

    video_reader.release()

    return frames_list


In [5]:
import cv2
import numpy as np
from collections import deque

def predict_frames(video_file_path, output_file_path, SEQUENCE_LENGTH):
    # Open the video file for reading.
    video_reader = cv2.VideoCapture(video_file_path)
    # Load the model
    model_path = '/content/drive/MyDrive/aryxn/aryxn_model.h5'
    model = tf.keras.models.load_model(model_path)

    # Get the width and height of the video.
    original_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # VideoWriter to save the output video.
    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc(*'mp4v'),
                                   video_reader.get(cv2.CAP_PROP_FPS), (original_width, original_height))

    # Queue to store video frames.
    frames_queue = deque(maxlen=SEQUENCE_LENGTH)

    # Initialize variables to track the percentage of video processed.
    total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    frames_processed = 0

    # Iterate over frames in the video.
    while video_reader.isOpened():
        # Read a frame from the video.
        ok, frame = video_reader.read()
        if not ok:
            break

        # Increment frames processed count.
        frames_processed += 1

        # Calculate the percentage of video processed.
        percentage_processed = (frames_processed / total_frames) * 100
        print(f"Percentage of video processed: {percentage_processed:.2f}%")

        # Resize the frame.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        # Normalize the resized frame.
        normalized_frame = resized_frame / 255

        # Add the normalized frame to the queue.
        frames_queue.append(normalized_frame)

        # Check if enough frames are in the queue for prediction.
        if len(frames_queue) == SEQUENCE_LENGTH:
            # Predict labels for the sequence of frames.
            predicted_labels_probabilities = model.predict(np.expand_dims(frames_queue, axis=0))[0]

            # Get the index of the predicted label.
            predicted_label = np.argmax(predicted_labels_probabilities)

            # Get the class name corresponding to the predicted label.
            predicted_class_name = CLASSES_LIST[predicted_label]

            # Add label to the frame.
            text = f'Predicted: {predicted_class_name}'
            text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.3, 2)[0]
            text_x = max(original_width - text_size[0] - 20, 0)
            text_y = max(original_height - text_size[1] - 20, 0)
            color = (0, 0, 255) if predicted_class_name == "Violence" else (0, 255, 0)
            cv2.putText(frame, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1.3, color, 2)

        # Write the frame to the output video.
        video_writer.write(frame)

    # Release video reader and writer.
    video_reader.release()
    video_writer.release()


In [6]:
def show_pred_frames(pred_video_path, SEQUENCE_LENGTH):
    plt.figure(figsize=(20, 15))

    # Open the predicted video file
    video_reader = cv2.VideoCapture(pred_video_path)

    # Get the number of frames in the video.
    frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Maximum number of frames to select
    max_frames_to_select = min(frames_count - SEQUENCE_LENGTH, 12)

    # Select random frames if available
    if max_frames_to_select > 0:
        random_frames = random.sample(range(SEQUENCE_LENGTH, frames_count), max_frames_to_select)
        random_frames.sort()

        for counter, random_index in enumerate(random_frames, 1):
            plt.subplot(5, 4, counter)

            # Set the current frame position of the video.
            video_reader.set(cv2.CAP_PROP_POS_FRAMES, random_index)

            success, frame = video_reader.read()

            if not success:
                break

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            plt.imshow(frame)
            plt.tight_layout()

    else:
        print(f"The output video has less than {SEQUENCE_LENGTH + 11} frames. Showing all available frames.")
        for i in range(SEQUENCE_LENGTH, frames_count):
            # Read and display each frame
            video_reader.set(cv2.CAP_PROP_POS_FRAMES, i)
            success, frame = video_reader.read()
            if not success:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            plt.subplot(int(np.ceil(frames_count - SEQUENCE_LENGTH / 4)), 4, i - SEQUENCE_LENGTH + 1)  # Adjust layout for variable number of frames
            plt.imshow(frame)
            plt.tight_layout()

    video_reader.release()
    plt.show()

In [7]:
def predict_video(video_file_path, SEQUENCE_LENGTH):
    # Open the video file
    video_reader = cv2.VideoCapture(video_file_path)

    # Load the model
    model_path = '/content/aryxn_model.h5'
    model = tf.keras.models.load_model(model_path)

    # Declare a list to store video frames.
    frames_list = []

    # Get the number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the skip interval for frames.
    skip_frames_window = max(int(video_frames_count / SEQUENCE_LENGTH), 1)

    # Iterate over frames to create a sequence.
    for frame_counter in range(SEQUENCE_LENGTH):
        # Set the current frame position.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

        success, frame = video_reader.read()

        if not success:
            break

        # Resize and normalize the frame.
        resized_frame = cv2.resize(frame, (64, 64))  # Adjust dimensions if needed
        normalized_frame = resized_frame / 255.0

        # Append the pre-processed frame to the list.
        frames_list.append(normalized_frame)

    # Convert the list of frames to a numpy array.
    frames_array = np.array(frames_list)

    # Predict labels probabilities for the sequence.
    predicted_labels_probabilities = model.predict(np.expand_dims(frames_array, axis=0))[0]

    # Get the index of class with highest probability.
    predicted_label = np.argmax(predicted_labels_probabilities)

    # Get the class name using the retrieved index.
    predicted_class_name = CLASSES_LIST[predicted_label]

    # Display the predicted class along with the prediction confidence.
    Confi= (f'Predicted: {predicted_class_name}\nConfidence: {predicted_labels_probabilities[predicted_label]}')

    # Release the video reader.
    video_reader.release()

    return predicted_class_name, Confi


In [8]:
# Construct the output video path.
test_videos_directory = 'test_videos'
os.makedirs(test_videos_directory, exist_ok = True)

output_video_file_path = f'{test_videos_directory}/Output-Test-Video.mp4'

Predict a Violent Video

In [9]:
CLASSES_LIST = ["NonViolence","Violence"]

In [10]:
pip install gradio

Collecting gradio
  Downloading gradio-4.28.3-py3-none-any.whl (12.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.2/12.2 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.2-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.9/91.9 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.16.0 (from gradio)
  Downloading gradio_client-0.16.0-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.4/314.4 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━

In [11]:
pip install moviepy



In [12]:
import cv2
import numpy as np

def viola_frames(video_file_path, output_file_path, SEQUENCE_LENGTH):
    # Open the video file for reading.
    video_reader = cv2.VideoCapture(video_file_path)
    # Load the model
    model_path = '/content/aryxn_model.h5'
    model = tf.keras.models.load_model(model_path)

    # Get the width and height of the video.
    original_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # VideoWriter to save the output video.
    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc(*'mp4v'),
                                   video_reader.get(cv2.CAP_PROP_FPS), (original_width, original_height))

    # Queue to store video frames.
    frames_queue = deque(maxlen=SEQUENCE_LENGTH)

    # Iterate over frames in the video.
    while video_reader.isOpened():
        # Read a frame from the video.
        ok, frame = video_reader.read()
        if not ok:
            break

        # Resize the frame.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        # Normalize the resized frame.
        normalized_frame = resized_frame / 255

        # Add the normalized frame to the queue.
        frames_queue.append(normalized_frame)

        # Check if enough frames are in the queue for prediction.
        if len(frames_queue) == SEQUENCE_LENGTH:
            # Predict labels for the sequence of frames.
            predicted_labels_probabilities = model.predict(np.expand_dims(frames_queue, axis=0))[0]

            # Get the index of the predicted label.
            predicted_label = np.argmax(predicted_labels_probabilities)

            # Get the class name corresponding to the predicted label.
            predicted_class_name = CLASSES_LIST[predicted_label]

            # Check if the frame is classified as "Violence"
            if predicted_class_name == "Violence":
                # Write the frame to the output video.
                video_writer.write(frame)

    # Release video reader and writer.
    video_reader.release()
    video_writer.release()

    # Return the output file path
    return output_file_path


In [None]:
import cv2
import numpy as np
import gradio as gr
from collections import deque
import os
import time
from moviepy.editor import VideoFileClip

def viola_frames(video_file_path, output_file_path, SEQUENCE_LENGTH):
    # Open the video file for reading.
    video_reader = cv2.VideoCapture(video_file_path)
    # Load the model
    model_path = '/content/aryxn_model.h5'
    model = tf.keras.models.load_model(model_path)

    # Get the width and height of the video.
    original_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # VideoWriter to save the output video.
    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc(*'mp4v'),
                                   video_reader.get(cv2.CAP_PROP_FPS), (original_width, original_height))

    # Queue to store video frames.
    frames_queue = deque(maxlen=SEQUENCE_LENGTH)

    # Get the total number of frames in the video
    total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Variable to store the number of violent frames
    violent_frames = 0

    # Iterate over frames in the video.
    processed_frames = 0
    while video_reader.isOpened():
        # Read a frame from the video.
        ok, frame = video_reader.read()
        if not ok:
            break

        # Resize the frame.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        # Normalize the resized frame.
        normalized_frame = resized_frame / 255

        # Add the normalized frame to the queue.
        frames_queue.append(normalized_frame)

        # Check if enough frames are in the queue for prediction.
        if len(frames_queue) == SEQUENCE_LENGTH:
            # Predict labels for the sequence of frames.
            predicted_labels_probabilities = model.predict(np.expand_dims(frames_queue, axis=0))[0]

            # Get the index of the predicted label.
            predicted_label = np.argmax(predicted_labels_probabilities)

            # Get the class name corresponding to the predicted label.
            predicted_class_name = CLASSES_LIST[predicted_label]

            # Check if the frame is classified as "Violence"
            if predicted_class_name == "Violence":
                # Increment the count of violent frames
                violent_frames += 1
                # Write the frame to the output video.
                video_writer.write(frame)

        # Increment the processed frames count
        processed_frames += 1

        # Calculate and print the progress
        progress_percentage = (processed_frames / total_frames) * 100
        print(f"Processing: {progress_percentage:.2f}%")

    # Release video reader and writer.
    video_reader.release()
    video_writer.release()

    # Wait for the output video to be fully written
    start_time = time.time()
    while True:
        if os.path.exists(output_file_path):
            current_time = time.time()
            if current_time - os.path.getmtime(output_file_path) > 5:  # Check if file hasn't been modified for 5 seconds
                break
            else:
              print("Video is still being saved")
        time.sleep(1)

    # Calculate the percentage of violent frames
    violent_percentage = (violent_frames / total_frames) * 100

    # Return the output file path and violent percentage
    return output_file_path, violent_percentage

def predict_and_playback(input_video_file_path):
    SEQUENCE_LENGTH = 16

    # Get the output video path and violent percentage
    output_file_path, violent_percentage = viola_frames(input_video_file_path, 'output.mp4', SEQUENCE_LENGTH)
    predicted_class_name, Confid = predict_video(input_video_file_path, SEQUENCE_LENGTH)

    # Return the output video path and violent percentage
    return output_file_path, f"Percentage of violent frames: {violent_percentage:.2f}% and {Confid}"

# Interface definition with output video format set to ".mp4"
iface = gr.Interface(
    fn=predict_and_playback,
    inputs=["file"],
    outputs=["video", "text"],  # Set output video type to "file" and text for percentage
    title="Fight Scene Detection for Highlight Generation in Movies",
    description="Upload a video file and see the processed video.",
    allow_flagging=False,  # Disabling flagging
)

# Launch the interface
iface.launch(debug=True)





Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://95f477e6e39c62702b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Processing: 0.10%
Processing: 0.21%
Processing: 0.31%
Processing: 0.41%
Processing: 0.52%
Processing: 0.62%
Processing: 0.72%
Processing: 0.82%
Processing: 0.93%
Processing: 1.03%
Processing: 1.13%
Processing: 1.24%
Processing: 1.34%
Processing: 1.44%
Processing: 1.55%
Processing: 1.65%
Processing: 1.75%
Processing: 1.86%
Processing: 1.96%
Processing: 2.06%
Processing: 2.16%
Processing: 2.27%
Processing: 2.37%
Processing: 2.47%
Processing: 2.58%
Processing: 2.68%
Processing: 2.78%
Processing: 2.89%
Processing: 2.99%
Processing: 3.09%
Processing: 3.20%
Processing: 3.30%
Processing: 3.40%
Processing: 3.51%
Processing: 3.61%
Processing: 3.71%
Processing: 3.81%
Processing: 3.92%
Processing: 4.02%
Processing: 4.12%
Processing: 4.23%
Processing: 4.33%
Processing: 4.43%
Processing: 4.54%
Processing: 4.64%
Processing: 4.74%
Processing: 4.85%
Processing: 4.95%
Processing: 5.05%
Processing: 5.15%
Processing: 5.26%
Processing: 5.36%
Processing: 5.46%
Processing: 5.57%
Processing: 5.67%
Processing


