# Violence Detection Iteration-1

## Constant information to set the data and engineering resources

In [None]:
# Path information to load videos and annotations
import os

ROOT_PATH = r"C:\Users\margo\OneDrive\UOC\projects\thesis"
DATA_PATH = os.path.join(ROOT_PATH, 'data', 'kranok-nv')
ANNOTATIONS_PATH = os.path.join(DATA_PATH, "Annotations")
VIDEOS_PATH = os.path.join(DATA_PATH, "Videos")
INFO_PATH = os.path.join(DATA_PATH, "Info")

MODELS_PATH = os.path.join(ROOT_PATH, 'models')

LOGS_PATH = os.path.join(ROOT_PATH, "logs")

DATASET_TRAIN_PATH = os.path.join(DATA_PATH, INFO_PATH, "violence_detection_train.hdf5")
DATASET_VAL_PATH = os.path.join(DATA_PATH, INFO_PATH, "violence_detection_val.hdf5")
DATASET_TEST_PATH = os.path.join(DATA_PATH, INFO_PATH, "violence_detection_test.hdf5")

# This command is to run all the cells on testing process
CREATE_DATASET = False
IS_TEST_RUN = False
INSTALL_STUFF = False
LOAD_MODEL = False

EPOCHS = 10
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 5

MAX_NUM_FRAMES = 15000

# Dataset Description

* Folder Videos is composed by a list of videos by name: [v1, v2, v3]
* Folder Annotations is composed by a list of jsons by name: [v1, v2, v3]

The name of each video corresponds to the name of each annotation

The annotation json has the following structure:

```json
{
  "Frame_0000096": {
    "numberOfPeople": 1,
    "pedestriansData": [
      [
        "640",
        "90",
        "710",
        "198",
        "Normal"
      ]
    ]
  }
}
```

# Load/Extract data

I will create a function to extract the data information per each video-annotation pair that will contain the following structure

```
input: video-name
output: VideoInformation

class Label(enum):
    NORMAL: "Normal"
    VIOLENT: "Violent"

class PersonInfo:
    ax: int
    ay: int
    bx: int
    by: int
    # action of the person
    label: LabelEnum

class VideoInfo:
    frame_name: str
    n_people: int
    people_info: list[PersonInfo]
    label: Label

# dictionary of paris by frame_name, frame_numpy_array
frame_info: Dict[str, np.ndarray] = dict()
``` 

In [None]:
from enum import Enum


# class syntax
class Label(Enum):
    VIOLENT = "Violent"
    NORMAL = "Normal"

In [None]:
class PersonInfo:
    def __init__(self, ax: int, ay: int, bx: int, by: int, label: str):
        self.ax: int = ax
        self.ay: int = ay
        self.bx: int = bx
        self.by: int = by
        self.label: Label = Label(label)

    def __str__(self):
        return f"A: ({self.ax}, {self.ay}), B: ({self.bx}, {self.by}), Label: {self.label.value}"

    def __repr__(self):
        return f"A: ({self.ax}, {self.ay}), B: ({self.bx}, {self.by}), Label: {self.label.value}"

In [None]:
class VideoInfo:
    def __init__(self, category: Label, frame_name: str, n_people: int, people_info: list[PersonInfo]) -> None:
        self.frame_name: str = frame_name
        self.n_people: int = n_people
        self.people_info: List[PersonInfo] = people_info
        self.category = category
        self.label: int = 1 if category == Label.VIOLENT else 0

    def __str__(self):
        return f"Category: {self.category.value}, Frame: {self.frame_name}, Persons: {self.n_people}\n{[ppl for ppl in self.people_info]}\n"

    def __repr__(self):
        return f"Category: {self.category.value}, Frame: {self.frame_name}, Persons: {self.n_people}\n{[ppl for ppl in self.people_info]}\n"

In [None]:
# test videoInfo class
p = PersonInfo(0, 0, 10, 10, "Violent")
lp = [p]
vi = VideoInfo(Label.NORMAL, "Frame_0001324", 1, lp)
print(vi)
vi = VideoInfo(Label.VIOLENT, "Frame_0001324", 1, lp)
print(vi)


---

# Store Video Information

In [None]:
from typing import Tuple
import numpy as np
import os


# Define a dictionary to store frame data and category within HDF5 datasets
def create_hdf5_dataset(hdf5_file, name, shape: Tuple = (100, IMG_WIDTH, IMG_HEIGHT, 1), dtype=np.float32):
    dataset = hdf5_file.create_dataset(name, shape, dtype=dtype)
    return dataset

In [None]:
from typing import List, Dict
import json
import cv2
import numpy as np


def load_video_data(name: str, category: Label, target_size: tuple = (IMG_WIDTH, IMG_HEIGHT)) -> (
        List[VideoInfo], Dict[str, np.ndarray]):
    annotation_path = os.path.join(ANNOTATIONS_PATH, f"{name}.json")
    video_path = os.path.join(VIDEOS_PATH, f"{name}.mp4")
    video_infos = []
    frame_infos = {}

    with open(annotation_path) as f:
        data_list = json.load(f)

    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error opening video: {video_path}")
        return video_infos, frame_infos

    for frame_name in data_list.keys():
        n_people = data_list[frame_name]["numberOfPeople"]
        pedestrians = data_list[frame_name]["pedestriansData"]
        people_info = []
        for ax, ay, bx, by, label in pedestrians:
            people_info.append(PersonInfo(ax, ay, bx, by, label))

        # Read the frame based on frame name convention: Frame_0000119
        frame_id = int(frame_name.split("_")[-1])
        ret, frame = cap.read(frame_id)  # Read frame by index

        # Normalize the frame
        frame = frame.astype(np.float32) / 255.0

        # Resize all frames to same dimension
        frame = cv2.resize(frame, dsize=target_size)

        # Convert to grayscale
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Include data in storage        
        frame_infos[frame_name] = frame
        video_infos.append(VideoInfo(category, frame_name, n_people, people_info))

    cap.release()
    return video_infos, frame_infos


# Test function
vi, fi = load_video_data("Normal_00001", Label.NORMAL)
print(vi)
print(fi)

In [None]:
# Function to process a single video and its annotation
def process_video(name, category, hdf5_file):
    # Load Video Info and Frame Info
    video_infos, frame_infos = load_video_data(name, category)
    frames_dataset = create_hdf5_dataset(hdf5_file, f"{name}/frames",
                                         (len(frame_infos), IMG_WIDTH, IMG_HEIGHT, 1))  # Create dataset for frames
    labels_dataset = create_hdf5_dataset(hdf5_file, f"{name}/labels", (len(frame_infos),),
                                         np.integer)  # Create dataset for labels

    # Iterate through processed frames
    frame_count = 0
    for frame_info, video_info in zip(frame_infos.items(), video_infos):
        frame_name, frame = frame_info
        frame = frame.reshape(IMG_WIDTH, IMG_HEIGHT, 1)  # Reshape to add a single channel for grayscale
        frame = frame.reshape(1, IMG_WIDTH, IMG_HEIGHT,
                              1)  # Reshape again to add the first dimension for a single frame

        # Use indexing with dynamic dimension
        frames_dataset[frame_count] = frame
        labels_dataset[frame_count] = video_info.label
        frame_count += 1

    return frames_dataset, labels_dataset

In [None]:
def percentage_processed(l: Label, v_processed: int, n_processed: int, total_processed: int) -> float:
    if l == Label.VIOLENT:
        return v_processed / total_processed

    return n_processed / total_processed


def is_train_set(l: Label, v_processed: int, n_processed: int, total_processed: int) -> bool:
    return percentage_processed(l, v_processed, n_processed, total_processed) <= 0.8


def is_val_set(l: Label, v_processed: int, n_processed: int, total_processed: int) -> bool:
    return percentage_processed(l, v_processed, n_processed, total_processed) <= 1


In [None]:
import h5py


# Function to create HDF5 file (handles potential overwrite)
def create_hdf5_file(filepath):
    try:
        hdf5_file = h5py.File(filepath, "w")  # Try creating in write mode
        return hdf5_file
    except OSError:  # Handle potential overwrite error
        os.remove(filepath)  # Remove existing file
        print(f"Removed existing file: {filepath}")
        hdf5_file = h5py.File(filepath, "w")  # Retry creating
        return hdf5_file

In [None]:
import random
import glob


def save_datasets():
    # Create empty HDF5 file for each data split (train, validation, test)
    train_hdf5_file = create_hdf5_file(DATASET_TRAIN_PATH)
    val_hdf5_file = create_hdf5_file(DATASET_VAL_PATH)
    # test_hdf5_file = create_hdf5_file(DATASET_TEST_PATH)
    try:
        # Loop through videos and annotations, processing each and storing in appropriate HDF5 file
        videos = os.listdir(str(os.path.join(VIDEOS_PATH)))
        violent_videos = [os.path.splitext(v)[0] for v in videos if v.startswith(Label.VIOLENT.value)]
        normal_videos = [os.path.splitext(v)[0] for v in videos if v.startswith(Label.NORMAL.value)]

        # To balance the data I'll drop some data
        minimum_size = min(len(violent_videos), len(normal_videos))

        # load less volume for testing purposes
        if IS_TEST_RUN:
            minimum_size = 10

        violent_videos = random.sample(violent_videos, k=minimum_size)
        normal_videos = random.sample(normal_videos, k=minimum_size)

        total = len(violent_videos) + len(normal_videos)
        processed = 0
        for violent_video, normal_video in zip(violent_videos, normal_videos):
            if processed / total < 0.8:
                process_video(violent_video, Label.VIOLENT, train_hdf5_file)
                process_video(normal_video, Label.NORMAL, train_hdf5_file)
            else:
                process_video(violent_video, Label.VIOLENT, val_hdf5_file)
                process_video(normal_video, Label.NORMAL, val_hdf5_file)
            # else: -- no test yet
            #     process_video(name, lab, test_hdf5_file)

            print(f'Videos processed: {violent_video} {normal_video}')
            print(f'Percentage processed: {(processed / total) * 100}%')
            processed += 2
    except Exception as e:
        print(f'ERROR: {e}')

    finally:
        # Close the HDF5 files after processing all videos
        train_hdf5_file.close()
        val_hdf5_file.close()
        # test_hdf5_file.close()

    print("Datasets are saved in HDF5 files correctly")
    print(f"Train dataset: {DATASET_TRAIN_PATH}")
    print(f"Val dataset: {DATASET_VAL_PATH}")


if CREATE_DATASET:
    save_datasets()

---

# Model-1: Simple CNN for Violence Detection
This is a basic CNN structure suitable for initial exploration. We'll break it down step-by-step:

## Input Layer:
Takes a single frame as input, assuming a shape of (224, 224, 1) (grayscale).

## Convolutional Layer:
Applies a set of filters (kernels) to the input frame to extract features.
Typical choices for the first layer could be:

* Number of filters: 32
* Kernel size: 3x3
* Activation function: ReLU (Rectified Linear Unit)

## Pooling Layer (Optional):
Reduces the dimensionality of the data extracted by the convolutional layer.
Options include MaxPooling or AveragePooling with a kernel size of 2x2 and a stride of 2.

## Flatten Layer:
Converts the output from the convolutional layers (usually a 3D array) into a 1D vector suitable for feeding into a fully-connected layer.

## Fully-Connected Layer:
Performs classification based on the extracted features.
I will use a single neuron with a sigmoid activation for binary classification (violence vs. non-violence).

In [None]:
# Execute this if you are not able to install tensorflow properly
if INSTALL_STUFF:
    !pip install tensorflow[and-cuda]
    !pip install pydot
    !pip install graphviz

In [None]:
from tensorflow import keras
from keras import Sequential
from keras.utils import plot_model
from keras.layers import Conv2D

def get_basic_model():
    return Sequential([
      Conv2D(32, (3, 3), activation="relu", input_shape=(224, 224, 1)),  # Convolutional layer
      Flatten(),
      Dense(1, activation="sigmoid")
    ])

# Visualize model
test_model = get_basic_model()
test_model.summary()
plot_model(test_model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
from tensorflow import keras
from keras import Sequential
from keras.utils import plot_model
from keras.layers import Conv3D, MaxPooling3D, Flatten, LSTM, Dense, Reshape, Dropout

def get_lstm_model():
    model = Sequential()

    # 1st layer group
    model.add(Conv3D(32, (3, 3, 3), strides = 1, input_shape=(MAX_NUM_FRAMES, IMG_WIDTH, IMG_HEIGHT, 1), activation='relu', padding='valid'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=2))
    
    model.add(Conv3D(64, (3, 3, 3), activation='relu', strides=1))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=2))
    
    model.add(Conv3D(128, (3, 3, 3), activation='relu', strides=1))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=2))
    shape = model.layers[-1].output_shape
    model.add(Reshape((shape[-1],shape[1]*shape[2]*shape[3])))
    
    # LSTM - Recurrent Network Layer
    model.add(LSTM(32, return_sequences=True))
    model.add(Dropout(.5))
    
    model.add((Flatten()))
    
    # FC layers group
    model.add(Dense(2048, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.5))
    
    model.add(Dense(2, activation='softmax'))
        
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
    model.summary()

    return model



# Visualize model
test_model = get_lstm_model()
test_model.summary()
plot_model(test_model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

# Train the model

## Load the data

In [None]:
# Function to explore the datasets
def traverse_datasets(hdf_file):
    """Traverse all datasets across all groups in HDF5 file."""

    import h5py

    def h5py_dataset_iterator(g, prefix=''):
        for key in g.keys():
            item = g[key]
            path = '{}/{}'.format(prefix, key)
            if isinstance(item, h5py.Dataset):  # test for dataset
                yield (path, item)
            elif isinstance(item, h5py.Group):  # test for group (go down)
                yield from h5py_dataset_iterator(item, path)

    with h5py.File(hdf_file, 'r') as f:
        for (path, dset) in h5py_dataset_iterator(f):
            print(path, dset)

    return None


# Check content of dataset
print("Checking on the train dataset")
traverse_datasets(DATASET_TRAIN_PATH)
print("Checking on the val dataset")
traverse_datasets(DATASET_VAL_PATH)
# print("Checking on the test dataset")
# traverse_datasets(DATASET_TEST_PATH)

In [323]:
from tensorflow.keras.utils import Sequence
import h5py
import numpy as np


class HDF5DataGenerator(Sequence):
    """
    Custom data generator for HDF5 data with video fragments and labels per video.
    """

    def __init__(self, hdf5_path, batch_size, target_size=(IMG_HEIGHT, IMG_WIDTH), class_mode="binary"):
        self.hdf5_path = hdf5_path
        self.batch_size = batch_size
        self.target_size = target_size
        self.class_mode = class_mode

        # Load data information from the HDF5 file
        with h5py.File(self.hdf5_path, "r") as hdf5_file:
            self.num_videos = len(hdf5_file.keys())
            print(f"Found {self.num_videos} videos in {self.hdf5_path}.")

        self.indexes = np.arange(self.num_videos)  # Create index list for shuffling

    def __len__(self):
        # Return the number of batches per epoch
        return int(np.ceil(self.num_videos / self.batch_size))

    def __getitem__(self, index):
        # Generate a single batch of data and labels
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Load data and labels for the current batch of videos
        X, y = self._load_batch_data(indexes)

        return X, y

    def _load_batch_data(self, indexes):
        X = []
        y = []
        frame_lengths = []
        with h5py.File(self.hdf5_path, "r") as hdf5_file:
            video_names = list(hdf5_file.keys())
            for i in indexes:
                # Access frames and labels datasets for the specific video
                frames = hdf5_file[f"{video_names[i]}/frames"][:]
                label = hdf5_file[f"{video_names[i]}/labels"][:]
                
                # TODO pad the frame data here to avoid homogenous issue

                # Append data and label to batch lists
                X.append(frames)
                y.append(label)
                frame_lengths.append(frames.shape[0])

        return np.array(X), np.array(y), np.array(frame_lengths)

    def on_epoch_end(self):
        # shuffle indexes for each epoch
        np.random.shuffle(self.indexes)

In [329]:
def load_dataset_data(dataset_name):
    X = []
    y = []
    frame_lengths = []
    with h5py.File(dataset_name, "r") as hdf5_file:
        video_names = list(hdf5_file.keys())
        for video_name in video_names:
            # Access frames and labels datasets for the specific video
            frames = hdf5_file[f"{video_name}/frames"][:]
            label = hdf5_file[f"{video_name}/labels"][:]
            
            # TODO pad the frame data here to avoid homogenous issue

            # Append data and label to batch lists
            X.append(frames)
            y.append(label)
            frame_lengths.append(frames.shape[0])

    return np.array(X), np.array(y), np.array(frame_lengths)

In [None]:
# Difference of number of frames per video
max_frames = 0
min_frames = 100
with h5py.File(DATASET_TRAIN_PATH, "r") as hdf5_file:
    MAX_FRAGMENT_LENGTH = 500
    video_names = list(hdf5_file.keys())
    for video in video_names:
        n_f = len(hdf5_file[f'{video}/frames'])
        max_frames = max(max_frames, n_f)
        min_frames = min(min_frames, n_f)
print("MAX_FRAMES", max_frames)
print("MIN FRAMES", min_frames)

In [None]:
if INSTALL_STUFF:
    !pip install tensorboard

# launch this command in your terminal if you want to see the tensorboard
# !tensorboard --logdir=C:\Users\margo\OneDrive\UOC\projects\thesis\logs

# Open http://localhost:6006 in your browser to access tensorboard

In [None]:
def show_model_metrics(train_history):
    # Access training and validation loss/accuracy
    train_loss = train_history.history["loss"]
    val_loss = train_history.history["val_loss"]
    train_acc = train_history.history["accuracy"]
    val_acc = train_history.history["val_accuracy"]

    # Plot loss
    plt.plot(train_loss, label="Training Loss")
    plt.plot(val_loss, label="Validation Loss")
    plt.legend()
    plt.show()

    # Plot accuracy
    plt.plot(train_acc, label="Training Accuracy")
    plt.plot(val_acc, label="Validation Accuracy")
    plt.legend()
    plt.show()

In [325]:
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

import matplotlib.pyplot as plt


def train_model_using_generators(model, epochs=EPOCHS):
    """Trains a model on the provided data.
    
    Args:
      model: The Keras model to be trained.
      epochs: Number of training epochs (default 10).
    
    Returns:
      The trained model.
    """
    # Generators
    # Define data generators for training and validation
    train_generator = HDF5DataGenerator(DATASET_TRAIN_PATH, BATCH_SIZE, target_size=(IMG_HEIGHT, IMG_WIDTH))
    val_generator = HDF5DataGenerator(DATASET_VAL_PATH, BATCH_SIZE, target_size=(IMG_HEIGHT, IMG_WIDTH))
    
    
    # Declare callbacks
    my_callbacks = [
        EarlyStopping(patience=2),
        ModelCheckpoint(filepath=os.path.join(MODELS_PATH, 'checkpoints', 'model.{epoch:02d}-{val_loss:.2f}.h5')),
        TensorBoard(log_dir=LOGS_PATH),
    ]
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    train_history = model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),  # Number of batches per epoch for training data
        epochs=epochs,
        validation_data=val_generator,
        validation_steps=len(val_generator),  # Number of batches per epoch for validation data
        callbacks=my_callbacks
    )
    show_model_metrics(train_history)

    return model

In [327]:
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

def train_model(model, epochs=EPOCHS):
    """Trains a model on the provided data.
    
    Args:
      model: The Keras model to be trained.
      epochs: Number of training epochs (default 10).
    
    Returns:
      The trained model.
    """
    X_train, y_train, n_frames_train = load_dataset_data(DATASET_TRAIN_PATH)
    X_val, y_val, n_frames_val = load_dataset_data(DATASET_TRAIN_PATH)
    
    # Declare callbacks
    my_callbacks = [
        EarlyStopping(patience=2),
        ModelCheckpoint(filepath=os.path.join(MODELS_PATH, 'checkpoints', 'model.{epoch:02d}-{val_loss:.2f}.h5')),
        TensorBoard(log_dir=LOGS_PATH),
    ]
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    train_history = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_val, y_val), callbacks=my_callbacks)
    show_model_metrics(train_history)

    return model

In [331]:
from keras.models import load_model

# Train the model
model = get_basic_model()
if LOAD_MODEL:
    trained_model = load_model(MODELS_PATH, 'basic_model.keras')
else:
    trained_model = train_model(model, epochs=10)

model.save(os.path.join(MODELS_PATH, 'basic_model.keras'))

MemoryError: Unable to allocate 28.3 MiB for an array with shape (148, 224, 224, 1) and data type float32