In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)


In [None]:

# Upload kaggle.json file
from google.colab import files
files.upload()

# Set up Kaggle API credentials
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


Saving kaggle.json to kaggle.json


In [None]:
# Download dataset from Kaggle
!kaggle datasets download -d mohamedmustafa/real-life-violence-situations-dataset

# Extract dataset
import zipfile
with zipfile.ZipFile("real-life-violence-situations-dataset.zip", "r") as zip_ref:
    zip_ref.extractall("violence_dataset")

# Verify dataset structure
!ls -R "violence_dataset/Real Life Violence Dataset"


Dataset URL: https://www.kaggle.com/datasets/mohamedmustafa/real-life-violence-situations-dataset
License(s): copyright-authors
Downloading real-life-violence-situations-dataset.zip to /content
100% 3.57G/3.58G [00:42<00:00, 137MB/s]
100% 3.58G/3.58G [00:42<00:00, 90.5MB/s]
'violence_dataset/Real Life Violence Dataset':
NonViolence  Violence

'violence_dataset/Real Life Violence Dataset/NonViolence':
NV_1000.mp4  NV_212.mp4  NV_325.mp4  NV_438.mp4  NV_550.mp4  NV_663.mp4  NV_776.mp4  NV_889.avi
NV_100.mp4   NV_213.mp4  NV_326.mp4  NV_439.mp4  NV_551.mp4  NV_664.mp4  NV_777.mp4  NV_88.mp4
NV_101.mp4   NV_214.mp4  NV_327.mp4  NV_43.mp4	 NV_552.mp4  NV_665.mp4  NV_778.mp4  NV_890.avi
NV_102.mp4   NV_215.mp4  NV_328.mp4  NV_440.mp4  NV_553.mp4  NV_666.mp4  NV_779.mp4  NV_891.avi
NV_103.mp4   NV_216.mp4  NV_329.mp4  NV_441.mp4  NV_554.mp4  NV_667.mp4  NV_77.mp4   NV_892.avi
NV_104.mp4   NV_217.mp4  NV_32.mp4   NV_442.mp4  NV_555.mp4  NV_668.mp4  NV_780.mp4  NV_893.avi
NV_105.mp4   NV_218.mp

In [None]:
def load_video_frames(video_path, frame_count=16):  # Increased frames from 1 to 16
    cap = cv2.VideoCapture(video_path)
    frames = []

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_gap = max(1, total_frames // frame_count)  # Select frames uniformly

    for i in range(frame_count):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * frame_gap)
        success, frame = cap.read()
        if not success:
            break
        frame = cv2.resize(frame, (64, 64))  # Resize to 64x64 pixels
        frames.append(frame)

    cap.release()

    return np.array(frames) if len(frames) == frame_count else None


In [None]:

violence_path = "violence_dataset/Real Life Violence Dataset/Violence"
non_violence_path = "violence_dataset/Real Life Violence Dataset/NonViolence"

X, y, timestamps = [], [], []

# Process all available videos
violence_videos = os.listdir(violence_path)  # Removed the 50-video limit
non_violence_videos = os.listdir(non_violence_path)  # Removed the 50-video limit

print(f"🔍 Processing {len(violence_videos)} violence videos...")
print(f"🔍 Processing {len(non_violence_videos)} non-violence videos...")

def load_video_frames_with_timestamps(video_path, frame_count=16):
    cap = cv2.VideoCapture(video_path)
    frames = []
    timestamps = []

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = cap.get(cv2.CAP_PROP_FPS)  # Get video FPS
    frame_gap = max(1, total_frames // frame_count)  # Uniform frame selection

    for i in range(frame_count):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * frame_gap)
        success, frame = cap.read()
        if not success:
            break
        frame = cv2.resize(frame, (64, 64))
        frames.append(frame)
        timestamps.append((i * frame_gap) / frame_rate)  # Convert frame number to seconds

    cap.release()
    return np.array(frames) if len(frames) == frame_count else None, timestamps

# Process videos and store timestamps
for video in violence_videos:
    video_path = os.path.join(violence_path, video)
    frames, time_stamps = load_video_frames_with_timestamps(video_path, frame_count=16)
    if frames is not None:
        X.append(frames)
        y.append(1)  # Violence label
        timestamps.append(time_stamps)

for video in non_violence_videos:
    video_path = os.path.join(non_violence_path, video)
    frames, time_stamps = load_video_frames_with_timestamps(video_path, frame_count=16)
    if frames is not None:
        X.append(frames)
        y.append(0)  # Non-Violence label
        timestamps.append(time_stamps)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

# Normalize pixel values
X = X / 255.0

# Split dataset
X_train, X_test, y_train, y_test, timestamps_train, timestamps_test = train_test_split(
    X, y, timestamps, test_size=0.2, random_state=42
)

print(f"✅ Dataset ready: {X.shape}")


🔍 Processing 1000 violence videos...
🔍 Processing 1000 non-violence videos...
✅ Dataset ready: (2000, 16, 64, 64, 3)


In [None]:
model = Sequential()

# 3D Convolution Layers with "same" padding
model.add(Conv3D(64, (3, 3, 3), activation='relu', padding="same", input_shape=(16, 64, 64, 3)))
model.add(MaxPooling3D(pool_size=(1, 2, 2)))  # Reduced pooling on depth

model.add(Conv3D(128, (3, 3, 3), activation='relu', padding="same"))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

model.add(Conv3D(256, (3, 3, 3), activation='relu', padding="same"))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

model.add(Flatten())

# Fully Connected Layers
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 251ms/step - accuracy: 0.5312 - loss: 1.0638 - val_accuracy: 0.7975 - val_loss: 0.4776
Epoch 2/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 213ms/step - accuracy: 0.8040 - loss: 0.4461 - val_accuracy: 0.8125 - val_loss: 0.3770
Epoch 3/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 220ms/step - accuracy: 0.8663 - loss: 0.3441 - val_accuracy: 0.8100 - val_loss: 0.7173
Epoch 4/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 216ms/step - accuracy: 0.8471 - loss: 0.3657 - val_accuracy: 0.8525 - val_loss: 0.4081
Epoch 5/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 218ms/step - accuracy: 0.8733 - loss: 0.3078 - val_accuracy: 0.8575 - val_loss: 0.4404
Epoch 6/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 211ms/step - accuracy: 0.8972 - loss: 0.2401 - val_accuracy: 0.8550 - val_loss: 0.4279
Epoch 7/10

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"📊 Model Accuracy: {accuracy * 100:.2f}%")


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 156ms/step - accuracy: 0.8540 - loss: 0.5428
📊 Model Accuracy: 84.75%


In [None]:
def predict_violence(video_path):
    frames = load_video_frames(video_path, frame_count=16)
    if frames is None:
        print("⚠️ Not enough frames in the video")
        return

    frames = np.expand_dims(frames, axis=0)  # Add batch dimension
    prediction = model.predict(frames)[0][0]

    if prediction > 0.5:
        print("🚨 Violence Detected!")
    else:
        print("✅ No Violence Detected")

# Example Usage
video_path = "violence_dataset/Real Life Violence Dataset/Violence/V_101.mp4"
predict_violence(video_path)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
🚨 Violence Detected!
