In [None]:
import tensorflow as tf

tf.random.set_seed(73)
TPU_INIT = False

if TPU_INIT:
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
        tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

    except ValueError:
        raise BaseException('ERROR: Not connected to a TPU runtime!')
else:
    !nvidia-smi
;
print("Tensorflow version " + tf.__version__)

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import os.path
from pathlib import Path

DATASET_PATH = Path("../input/real-life-violence-situations-dataset/Real Life Violence Dataset")

# Extract video paths and labels
video_files = list(DATASET_PATH.glob(r"**/*.mp4")) + list(DATASET_PATH.glob(r"**/*.avi"))

# Check if the path exists and contains videos
if DATASET_PATH.exists():
    if video_files:
        print(f"Found {len(video_files)} video files (including .mp4 and .avi).")
        print("Sample files:")
        for file in video_files[:5]:  # Show a sample of files
            print(file)
    else:
        print("No video files found in the specified path.")
else:
    print(f"The path {DATASET_PATH} does not exist.")

In [None]:
# Check if the path exists
if not os.path.exists(DATASET_PATH):
    raise FileNotFoundError(f"Dataset path is invalid: {DATASET_PATH}")

# Classes Directories, os.path.join is better than concat '/'
NonViolenceVideos_Dir = os.path.join(DATASET_PATH, "NonViolence")
ViolenceVideos_Dir = os.path.join(DATASET_PATH, "Violence")

# Retrieve the list of all the video files present in the Class Directory.
NonViolence_files_names_list = os.listdir(NonViolenceVideos_Dir)
Violence_files_names_list = os.listdir(ViolenceVideos_Dir)

# Check if Video Directories Exist
if not os.path.exists(NonViolenceVideos_Dir) or not os.path.exists(ViolenceVideos_Dir):
    raise FileNotFoundError("One or both class directories are missing!")

# Ensure the Dataset is Not Empty and contains videos
if not NonViolence_files_names_list or not Violence_files_names_list:
    raise ValueError("One or both class directories are empty!")

In [None]:
total_videos = len(NonViolence_files_names_list) + len(Violence_files_names_list)
print(f"Total Dataset Size: {total_videos} videos")
print(f"Non-Violence Videos: {len(NonViolence_files_names_list)}")
print(f"Violence Videos: {len(Violence_files_names_list)}")

# Print first few file names (head of each directory)
print("\nFirst 5 Non-Violence Videos:")
print("\n".join(NonViolence_files_names_list[:5]))

print("\nFirst 5 Violence Videos:")
print("\n".join(Violence_files_names_list[:5]))

In [None]:
import random
from IPython.display import Image, HTML

from base64 import b64encode

def play_video(filepath):
    html = ''
    video = open(filepath,'rb').read()
    src = 'data:video/mp4;base64,' + b64encode(video).decode()
    html += '<video width=640 muted controls autoplay loop><source src="%s" type="video/mp4"></video>' % src
    return HTML(html)

# Randomly select a video file from the Classes Directory.
Random_NonViolence_Video = random.choice(NonViolence_files_names_list)
Random_Violence_Video = random.choice(Violence_files_names_list)

random_nonviolence_video_path = os.path.join(NonViolenceVideos_Dir, Random_NonViolence_Video)
random_violence_video_path = os.path.join(ViolenceVideos_Dir, Random_Violence_Video)

play_video(random_nonviolence_video_path)

In [None]:
IMG_HEIGHT, IMG_WIDTH = 112, 112 # Small objects or intricate actions, increasing it to 224x224
FRAMES = 10  # Number of frames || sequence per video
FRAME_SIZE = (IMG_HEIGHT, IMG_WIDTH)  # Target frame size (resize) (IMG_HEIGHT, IMG_WIDTH)
FRAME_SKIP = 10  # Frame skip for extracting frames
BATCH_SIZE = 16
EPOCHS = 20
LEARNING_RATE = 0.0001
COLOR_CHANNELS = 3
NUM_WORKERS = 8  # Number of parallel workers
CLASSES_LIST = ["NonViolence","Violence"]

In [None]:
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

def normalize_frame(frames):
    return preprocess_input(frames) # Normalize for MobileNetV2

def process_video(video_path, label, frame_count=FRAMES, frame_size=FRAME_SIZE):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Get the number of frames in the video

    if total_frames == 0:
        cap.release()
        raise ValueError(f"Video {video_path} has zero frames!")

    # Calculate the interval after which frames will be added to the list
    frame_skip = max(1, total_frames // frame_count) # fixed size of frames (FRAME_SKIP) or dynamically, ensure at least 1

    frames = [] # Declare a list to store video frames we will extract
    extracted_frames = 0 # Track the number of successfully extracted frames

    while cap.isOpened() and extracted_frames < FRAMES:
        # FRAME_SKIPS condition
        # frame_id = extracted_frames * frame_skip  # Compute frame index

        # skipping frames to avoid duplications
        # if frame_id >= total_frames:
        #     break  # Stop if exceeding total frames

        # cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id) # handles skipping no need to if frame_count % frame_skip == 0:

        success, frame = cap.read()
        if not success:
            break  # Stop if frame cannot be read
        if extracted_frames % frame_skip == 0:
            frame = cv2.resize(frame, frame_size)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
            # frame = frame / 255.0  # Normalize
        # IMAGE AUGMENTATION (if needed)
        # frame = augment_frame(frame)

            frames.append(frame)
        extracted_frames += 1

    cap.release()

    # Pad with black frames if not enough frames extracted -> can lead to bias
    while len(frames) < frame_count:
        # frames.append(np.zeros((*frame_size, 3), dtype=np.float32))
        frames.append(frames[-1])  # Pad with the last frame

    frames = np.array(frames, dtype=np.float32)
    frames = normalize_frame(frames)
    return frames, label  # Shape: (frame_count, 224, 224, 3)

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
from tqdm import tqdm  # Progress bar

# Process videos in parallel using ThreadPoolExecutor for faster processing
# Use ProcessPoolExecutor for better parallelism with CPU-bound tasks
def prepare_data():
    X, y = [], []
    with ProcessPoolExecutor(max_workers=8) as executor:
        results = list(tqdm(executor.map(process_video, video_paths, labels), total=len(video_paths)))
        for frames, label in results:
            X.append(frames)
            y.append(label)

    return np.array(X), np.array(y)

In [None]:
video_paths = []
labels = []

for video_file in os.listdir(ViolenceVideos_Dir):
      video_paths.append(os.path.join(ViolenceVideos_Dir, video_file))
      labels.append(1)  # Violence label

for video_file in os.listdir(NonViolenceVideos_Dir):
    video_paths.append(os.path.join(NonViolenceVideos_Dir, video_file))
    labels.append(0)  # NonViolence label

In [None]:
import cv2
from sklearn.model_selection import train_test_split

# Run the processing function
X, y = prepare_data()

# Print dataset shape
print(f"Dataset Size: {len(X)} videos")
print(f"X Shape: {X.shape}")  # Expected: (num_videos, FRAMES, 224, 224, 3)
print(f"y Shape: {y.shape}")  # Expected: (num_videos,)

In [None]:
# Train-validation-test split (70-15-15)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Train: {len(X_train)}, Validation: {len(X_val)}, Test: {len(X_test)}")

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# Early stopping to prevent overfitting, can monitor val_accuracy
early_stopping = EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True
)

# Learning rate schedular
# Create ReduceLROnPlateau Callback to reduce overfitting by decreasing learning
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6, verbose=1
)

# Save the best model during training
model_checkpoint = ModelCheckpoint(
    "cnn_best_model.keras", monitor="val_loss", save_best_only=True
)

In [None]:
from keras.applications import VGG16, VGG19, inception_v3, InceptionV3, MobileNetV2
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv3D, MaxPooling3D, Conv2D, MaxPool2D, BatchNormalization, MaxPooling2D,\
                        Permute, TimeDistributed, Bidirectional,GRU, SimpleRNN, LSTM, LeakyReLU, \
                        GlobalAveragePooling2D, SeparableConv2D, ZeroPadding2D, Convolution2D, ZeroPadding2D,Reshape, Conv2DTranspose, Conv1D, AveragePooling1D, MaxPooling1D
import keras
from keras.optimizers import RMSprop, Adam, Optimizer, SGD, AdamW

def build_MobileNet_model():
    # Load Pretrained MobileNetV2 (Remove last FC layers)
    # baseModel = MobileNetV2(pooling='avg', input_tensor=input_tensor)
    base_model = MobileNetV2(include_top=False , weights="imagenet", input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)) # Specifying Input to match features shape

    # Freeze the layers in the base model
    base_model.trainable = False
    
    # Fine-Tuning to make the last 4 layer trainable
    for layer in base_model.layers[:-4]:
      layer.trainable=False

    model = Sequential([
            # Use TimeDistributed to apply the base model to each frame
            # Passing mobilenet in the TimeDistributed layer to handle the sequence
            TimeDistributed(base_model, input_shape=(FRAMES, IMG_HEIGHT, IMG_WIDTH, 3)),

            # model.add(Dropout(0.25))
            # Flatten each frame's output
            # TimeDistributed(Flatten()),
            TimeDistributed(GlobalAveragePooling2D()),

            # Apply LSTM for sequence processing
            # LSTM(64, return_sequences=False),
            Bidirectional(LSTM(64, return_sequences=False)),

            # model.add(Dropout(0.25))

            # model.add(Dense(256,activation='relu')) # decrease by half till 8 units
            # model.add(Dropout(0.25))

            # Add Dropout for regularization to prevent overfitting
            Dropout(0.5),

            # Final Dense layer for classification
            Dense(len(CLASSES_LIST), activation="softmax")
        ])

    return model

In [None]:
MoBiLSTM_model = build_MobileNet_model()

# MoBiLSTM_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
MoBiLSTM_model.compile(loss="sparse_categorical_crossentropy", optimizer=Adam(LEARNING_RATE), metrics=["accuracy"])
# MoBiLSTM_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

MoBiLSTM_model.summary()

In [None]:
MobBiLSTM_model_history = MoBiLSTM_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs = EPOCHS,
    batch_size = BATCH_SIZE,
    callbacks = [early_stopping, model_checkpoint, lr_scheduler]
)

In [None]:
MoBiLSTM_model.save("mobileNetV2_violence_detection_model.h5")

from tensorflow.keras.models import load_model

loaded_model = load_model("mobileNetV2_violence_detection_model.h5")

In [None]:
test_loss, test_acc = MobBiLSTM_model_history.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")