In [1]:
import tensorflow as tf

tf.random.set_seed(73)
TPU_INIT = False

if TPU_INIT:
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
        tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

    except ValueError:
        raise BaseException('ERROR: Not connected to a TPU runtime!')
else:
    !nvidia-smi
;
print("Tensorflow version " + tf.__version__)

Tue Feb 11 17:13:38 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   40C    P8              9W /   70W |       1MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  2


In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/real-life-violence-situations-dataset/real life violence situations/Real Life Violence Dataset/NonViolence/NV_759.mp4
/kaggle/input/real-life-violence-situations-dataset/real life violence situations/Real Life Violence Dataset/NonViolence/NV_564.mp4
/kaggle/input/real-life-violence-situations-dataset/real life violence situations/Real Life Violence Dataset/NonViolence/NV_126.mp4
/kaggle/input/real-life-violence-situations-dataset/real life violence situations/Real Life Violence Dataset/NonViolence/NV_601.mp4
/kaggle/input/real-life-violence-situations-dataset/real life violence situations/Real Life Violence Dataset/NonViolence/NV_397.mp4
/kaggle/input/real-life-violence-situations-dataset/real life violence situations/Real Life Violence Dataset/NonViolence/NV_782.mp4
/kaggle/input/real-life-violence-situations-dataset/real life violence situations/Real Life Violence Dataset/NonViolence/NV_143.mp4
/kaggle/input/real-life-violence-situations-dataset/real life violence situa

In [4]:
import os
import os.path
from pathlib import Path

DATASET_PATH = Path("../input/real-life-violence-situations-dataset/Real Life Violence Dataset")

In [5]:
# Check if the path exists
if not os.path.exists(DATASET_PATH):
    raise FileNotFoundError(f"Dataset path is invalid: {DATASET_PATH}")

# Classes Directories, os.path.join is better than concat '/'
NonViolenceVideos_Dir = os.path.join(DATASET_PATH, "NonViolence")
ViolenceVideos_Dir = os.path.join(DATASET_PATH, "Violence")

# Retrieve the list of all the video files present in the Class Directory.
NonViolence_files_names_list = os.listdir(NonViolenceVideos_Dir)
Violence_files_names_list = os.listdir(ViolenceVideos_Dir)

# Check if Video Directories Exist
if not os.path.exists(NonViolenceVideos_Dir) or not os.path.exists(ViolenceVideos_Dir):
    raise FileNotFoundError("One or both class directories are missing!")

# Ensure the Dataset is Not Empty and contains videos
if not NonViolence_files_names_list or not Violence_files_names_list:
    raise ValueError("One or both class directories are empty!")

In [6]:
total_videos = len(NonViolence_files_names_list) + len(Violence_files_names_list)
print(f"Total Dataset Size: {total_videos} videos")
print(f"Non-Violence Videos: {len(NonViolence_files_names_list)}")
print(f"Violence Videos: {len(Violence_files_names_list)}")

# Print first few file names (head of each directory)
print("\nFirst 5 Non-Violence Videos:")
print("\n".join(NonViolence_files_names_list[:5]))

print("\nFirst 5 Violence Videos:")
print("\n".join(Violence_files_names_list[:5]))

Total Dataset Size: 2000 videos
Non-Violence Videos: 1000
Violence Videos: 1000

First 5 Non-Violence Videos:
NV_759.mp4
NV_564.mp4
NV_126.mp4
NV_601.mp4
NV_397.mp4

First 5 Violence Videos:
V_465.mp4
V_313.mp4
V_753.mp4
V_963.mp4
V_751.mp4


In [7]:
import random
from IPython.display import Image, HTML

from base64 import b64encode

def play_video(filepath):
    html = ''
    video = open(filepath,'rb').read()
    src = 'data:video/mp4;base64,' + b64encode(video).decode()
    html += '<video width=640 muted controls autoplay loop><source src="%s" type="video/mp4"></video>' % src
    return HTML(html)

# Randomly select a video file from the Classes Directory.
Random_NonViolence_Video = random.choice(NonViolence_files_names_list)
Random_Violence_Video = random.choice(Violence_files_names_list)

random_nonviolence_video_path = os.path.join(NonViolenceVideos_Dir, Random_NonViolence_Video)
random_violence_video_path = os.path.join(ViolenceVideos_Dir, Random_Violence_Video)

play_video(random_nonviolence_video_path)

In [8]:
IMG_HEIGHT, IMG_WIDTH = 112, 112 # Small objects or intricate actions, increasing it to 224x224
FRAMES = 20  # Number of frames || sequence per video
FRAME_SIZE = (IMG_HEIGHT, IMG_WIDTH)  # Target frame size (resize) (IMG_HEIGHT, IMG_WIDTH)
FRAME_SKIP = 10  # Frame skip for extracting frames
BATCH_SIZE = 16
EPOCHS = 20
LEARNING_RATE = 0.0001
COLOR_CHANNELS = 3
NUM_WORKERS = 8  # Number of parallel workers
CLASSES_LIST = ["NonViolence","Violence"]

In [9]:
def normalize_frame(frames):
  return frames / 255.0 # Scale pixel values to [0, 1]

def process_video(video_path, label, frame_count=FRAMES, frame_size=FRAME_SIZE):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Get the number of frames in the video

    if total_frames == 0:
        cap.release()
        raise ValueError(f"Video {video_path} has zero frames!")
    # print(f"Total frames in video: {total_frames}")

    # Calculate the interval after which frames will be added to the list
    frame_skip = max(1, total_frames // frame_count) # fixed size of frames (FRAME_SKIP) or dynamically, ensure at least 1

    frames = [] # Declare a list to store video frames we will extract
    extracted_frames = 0 # Track the number of successfully extracted frames

    while cap.isOpened() and extracted_frames < FRAMES:
        # FRAME_SKIPS condition
        # frame_id = extracted_frames * frame_skip  # Compute frame index

        # skipping frames to avoid duplications
        # if frame_id >= total_frames:
        #     break  # Stop if exceeding total frames

        # cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id) # handles skipping no need to if frame_count % frame_skip == 0:

        success, frame = cap.read()
        if not success:
            break  # Stop if frame cannot be read
        if extracted_frames % frame_skip == 0:
            frame = cv2.resize(frame, frame_size)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
            # frame = frame / 255.0  # Normalize
        # IMAGE AUGMENTATION (if needed)
        # frame = augment_frame(frame)

            frames.append(frame)
        extracted_frames += 1

    cap.release()

    # Select a single frame representation:
    # middle_index = len(frames) // 2
    # selected_frame = frames[middle_index]  # Use the middle frame

    # Pad with black frames if not enough frames extracted -> can lead to bias
    while len(frames) < frame_count:
        # frames.append(np.zeros((*frame_size, 3), dtype=np.float32))
        frames.append(frames[-1])  # Pad with the last frame

    frames = np.array(frames, dtype=np.float32)
    frames = normalize_frame(frames)
    return frames, label  # Shape: (frame_count, 112, 112, 3)

In [10]:
from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
from tqdm import tqdm  # Progress bar

# Process videos in parallel using ThreadPoolExecutor for faster processing
# Use ProcessPoolExecutor for better parallelism with CPU-bound tasks
def prepare_data(num_frames=FRAMES):
    X, y = [], []
    with ThreadPoolExecutor(max_workers=8) as executor:
        results = list(tqdm(executor.map(process_video, video_paths, labels), total=len(video_paths)))
        for frames, label in results:
            X.append(frames)
            y.append(label)

    return np.array(X), np.array(y)

In [11]:
video_paths = []
labels = []

for video_file in os.listdir(ViolenceVideos_Dir):
      video_paths.append(os.path.join(ViolenceVideos_Dir, video_file))
      labels.append(1)  # Violence label

for video_file in os.listdir(NonViolenceVideos_Dir):
    video_paths.append(os.path.join(NonViolenceVideos_Dir, video_file))
    labels.append(0)  # NonViolence label

In [12]:
import cv2
# Run the processing function
X, y = prepare_data()

# Print dataset shape
print(f"Dataset Size: {len(X)} videos")
print(f"X Shape: {X.shape}")  # Expected: (num_videos, FRAMES, 224, 224, 3)
print(f"y Shape: {y.shape}")  # Expected: (num_videos,)

100%|██████████| 2000/2000 [00:37<00:00, 52.82it/s]


Dataset Size: 2000 videos
X Shape: (2000, 20, 112, 112, 3)
y Shape: (2000,)


In [13]:
from sklearn.model_selection import train_test_split

# Train-validation-test split (70-15-15)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Train: {len(X_train)}, Validation: {len(X_val)}, Test: {len(X_test)}")

Train: 1400, Validation: 300, Test: 300


In [14]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# Early stopping to prevent overfitting, can monitor val_accuracy
early_stopping = EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True
)

# Learning rate schedular
# Create ReduceLROnPlateau Callback to reduce overfitting by decreasing learning
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6, verbose=1
)

# Save the best model during training
model_checkpoint = ModelCheckpoint(
    "best_model.keras", monitor="val_loss", save_best_only=True
)

In [16]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, TimeDistributed, LSTM 
from tensorflow.keras.models import Sequential

# Load InceptionV3 and modify for 3D input
def build_model():
    # Load pre-trained InceptionV3 (excluding the classification layer)
    base_model = InceptionV3(
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)
    )
    base_model.trainable = False # Freeze pre-trained weights

    model = Sequential([
    TimeDistributed(base_model, input_shape=(FRAMES, IMG_HEIGHT, IMG_WIDTH, 3)),  # Apply CNN to each frame
    TimeDistributed(GlobalAveragePooling2D()),  # Convert feature maps to 1D feature vectors
    LSTM(256, return_sequences=False),  # Process sequence of frames
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')  # Output: Violence (1) or Non-Violence (0)
])

    return model
    
model = build_model()

model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [17]:
history = model.fit(
    # train_ds.prefetch(tf.data.AUTOTUNE),
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs = EPOCHS,
    batch_size = BATCH_SIZE,
    callbacks = [early_stopping, model_checkpoint, lr_scheduler]
)

Epoch 1/20
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m213s[0m 985ms/step - accuracy: 0.5253 - loss: 0.7992 - val_accuracy: 0.7133 - val_loss: 0.6197 - learning_rate: 1.0000e-04
Epoch 2/20
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 437ms/step - accuracy: 0.6613 - loss: 0.6173 - val_accuracy: 0.7033 - val_loss: 0.5847 - learning_rate: 1.0000e-04
Epoch 3/20
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 438ms/step - accuracy: 0.7186 - loss: 0.5564 - val_accuracy: 0.7033 - val_loss: 0.5374 - learning_rate: 1.0000e-04
Epoch 4/20
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 434ms/step - accuracy: 0.7592 - loss: 0.4889 - val_accuracy: 0.7233 - val_loss: 0.5352 - learning_rate: 1.0000e-04
Epoch 5/20
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 438ms/step - accuracy: 0.7781 - loss: 0.4560 - val_accuracy: 0.7167 - val_loss: 0.5151 - learning_rate: 1.0000e-04
Epoch 6/20
[1m88/88[0m [32m━━━━━━━━━━━━━━

In [18]:
model.save("inceptionV3_violence_detection_model.h5")

from tensorflow.keras.models import load_model

loaded_model = load_model("inceptionV3_violence_detection_model.h5")

In [19]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 470ms/step - accuracy: 0.7883 - loss: 0.4700
Test Accuracy: 0.8200
