In [3]:
# Importing necessary modules and libraries
from glob import glob 
import shutil
import random
import sys
import os 
import pandas as pd
import numpy as np


## Libraries for Data Augmentation
import pytorchvideo
from pytorchvideo.data import LabeledVideoDataset, Kinetics, make_clip_sampler, labeled_video_dataset

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    UniformTemporalSubsample,
    Permute
)
from torchvision.transforms import(
    Compose, 
    Lambda,
    RandomCrop,
    RandomHorizontalFlip,
    Resize
)

from torchvision.transforms._transforms_video import(
    CenterCropVideo, 
    NormalizeVideo
)

from torch.utils.data import DataLoader


# Import necessary libraries for real-life violence detection

import torch.nn as nn
import torch
from torch.optim import lr_scheduler
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
from pytorch_lightning import LightningModule, seed_everything, Trainer
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import classification_report
import torchmetrics


In [18]:
# Define a video transformation pipeline

video_transform = Compose([
    ApplyTransformToKey(key = 'video',
    transform = Compose([
        UniformTemporalSubsample(20), 
        Lambda(lambda x:x/255),
        Normalize((0.45,0.45,0.45), (0.225,0.225,0.225)),
        RandomShortSideScale(min_size = 248, max_size = 256),
        CenterCropVideo(224),
        RandomHorizontalFlip(p = .5)
    ]) ),
])

In [19]:
# path of the data
data_path = '/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset'

In [20]:

def gen_the_local_dataset(path_to_dir, train_s=0.8, val=0.2, test=0.1):
    """
    Generate the local dataset by splitting the files in the given directory into train, validation, and test sets.
    
    Args:
        path_to_dir (str): The path to the directory containing the files.
        train_s (float): The proportion of files to be included in the train set (default is 0.8).
        val (float): The proportion of files to be included in the validation set (default is 0.2).
        test (float): The proportion of files to be included in the test set (default is 0.1).
    
    Returns:
        tuple: A tuple containing the paths to the train, validation, and test sets.
    """
    
    # Create a dictionary to store the proportions for each set
    koef = {0: train_s, 1: val, 2: test}
    
    # Iterate over the keys (subdirectories) in the given directory
    for key in os.listdir(path_to_dir):
        source_path = os.path.join(path_to_dir, key)
        paths = os.listdir(source_path)
        
        # Define the destination paths for each set
        destination_path = [
            f"/kaggle/working/train/{key}",
            f"/kaggle/working/validation/{key}",
            f"/kaggle/working/test/{key}"
        ]
        
        # Create the destination directories if they don't exist
        for path_new in destination_path:
            if not os.path.exists(path_new):
                os.makedirs(path_new)
        
        i = 0
        # Iterate over the destination paths and copy the selected files
        for path_new in destination_path:
            selected_files_ = random.sample(paths, int(len(paths) * koef[i]))
            
            for file_name in selected_files_:
                destination_file = os.path.join(path_new, file_name)
                source_file = os.path.join(source_path, file_name)
                shutil.copy(source_file, destination_file)
            
            i += 1
    
    # Set the paths for the train, validation, and test sets
    train_path = '/kaggle/working/train/'
    val_path = '/kaggle/working/validation/'
    test_path = '/kaggle/working/test/'
    
    return train_path, val_path, test_path

In [21]:
train_path, val_path, test_path = gen_the_local_dataset(data_path)

In [22]:

def return_loader(train_path, val_path, test_path, video_transform, batch_size=4, numworker=3, pin_memory=True):
    """
    Returns data loaders for training, validation, and testing datasets.
    
    Args:
        train_path (str): Path to the training data.
        val_path (str): Path to the validation data.
        test_path (str): Path to the testing data.
        video_transform (torchvision.transforms.Compose): Transformations to be applied to the video frames.
        batch_size (int, optional): Number of samples per batch. Defaults to 4.
        numworker (int, optional): Number of worker threads for data loading. Defaults to 3.
        pin_memory (bool, optional): If True, the data loader will copy tensors into pinned memory. Defaults to True.
    
    Returns:
        tuple: A tuple containing the training, validation, and testing data loaders.
    """
    
    # Create training dataset
    train_dataset = pytorchvideo.data.Kinetics(
        data_path=train_path,
        clip_sampler=pytorchvideo.data.make_clip_sampler("random", 2),
        transform=video_transform, 
        decode_audio=False
    )
    
    # Create training data loader
    train_loader = DataLoader(
        train_dataset, batch_size,
        num_workers=numworker,
        pin_memory=pin_memory
    )
    
    #---------------------------------------------------------------------------------------------------------------------------------
    
    # Create validation dataset
    val_dataset = pytorchvideo.data.Kinetics(
        data_path=val_path,
        clip_sampler=pytorchvideo.data.make_clip_sampler("random", 2),
        transform=video_transform, 
        decode_audio=False
    )
    
    # Create validation data loader
    val_loader = DataLoader(
        val_dataset, batch_size,
        num_workers=numworker,
        pin_memory=pin_memory
    )
    
    #------------------------------------------------------------------------------------------------------------------------------------
    
    # Create testing dataset
    test_dataset = pytorchvideo.data.Kinetics(
        data_path=test_path,
        clip_sampler=pytorchvideo.data.make_clip_sampler("random", 2),
        transform=video_transform, 
        decode_audio=False
    )
    
    # Create testing data loader
    test_loader = DataLoader(
        test_dataset, batch_size,
        num_workers=numworker,
        pin_memory=pin_memory
    )
    
    return train_loader, val_loader, test_loader

In [23]:
train_loader, val_loader, test_loader = return_loader(train_path, val_path, test_path, video_transform, batch_size = 4, numworker = 3, pin_memory = True)

In [42]:
class Model(LightningModule):
    
    def __init__(self):
        super(Model, self).__init__()
        
        # Load the pre-trained video model
        self.video_model = torch.hub.load('facebookresearch/pytorchvideo','efficient_x3d_xs', pretrained = True)
        
        # Activation function
        self.relu = nn.ReLU()
        
        # Linear layer for classification
        self.Linear = nn.Linear(400, 1)

        # Learning rate
        self.lr = 1e-3

        # Metrics for evaluation
        self.metrics = torchmetrics.Accuracy(task='binary')
        
        # Loss function
        self.criterion = nn.BCEWithLogitsLoss()
        
    def forward(self, x):
        x = self.relu(self.video_model(x))
        x = self.Linear(x)
        return x
    
    def configure_optimizers(self):
        # Configure the optimizer and learning rate scheduler
        optimizer = torch.optim.AdamW(params = self.parameters(), lr = self.lr)
        scheduler = lr_scheduler.StepLR(optimizer, step_size = 2, gamma = 0.1)
        
        return {
            'optimizer': optimizer,
            'lr_scheduler': scheduler
        }
        
    def _common_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        label = label.unsqueeze(1)
        scores = self.forward(video)
        
        loss = self.criterion(scores, label.to(torch.float32))
        metric = self.metrics(scores, label.to(torch.int64))
        
        return loss, scores, label
    
    def training_step(self, batch, batch_idx):
        loss, scores, labels = self._common_step(batch, batch_idx)
        metric = self.metrics(scores, labels.to(torch.int64))
        
        # Log training loss and accuracy
        self.log_dict({'train_loss': loss, 'training_accuracy':metric},
                       on_step = False, on_epoch = True, prog_bar = True)
        
        return {'loss': loss, 'metric': metric.detach()}
        
    def validation_step(self, batch, batch_idx):
        loss, scores, labels = self._common_step(batch, batch_idx)
        metric = self.metrics(scores, labels.to(torch.int64))
        
        # Log validation loss and accuracy
        self.log_dict({'val_loss': loss, 'val_accuracy':metric},
                       on_step = False, on_epoch = True, prog_bar = True)
        
        return {'loss': loss, 'metric': metric.detach()}
    
    def test_step(self, batch, batch_idx):
        loss, scores, labels = self._common_step(batch, batch_idx)
        metric = self.metrics(scores, labels.to(torch.int64))
        
        # Log test loss and accuracy
        self.log_dict({'test_loss': loss, 'test_accuracy':metric},
                       on_step = False, on_epoch = True, prog_bar = True)
        
        return {'loss': loss, 'metric': metric.detach()}
    
    def predict_step(self, batch, batch_idx):
        video, label = batch['video'], batch['batch']
        label = label.unsqueeze(1)
        scores = self.forward(video)
        preds = torch.argmax(scores, dim = 1)
        
        return preds

    

In [43]:
checkpoint_callback = ModelCheckpoint(monitor = 'val_loss', dirpath = 'checkpoints',
                                     filename = 'file', save_last = True)

lr_monitor = LearningRateMonitor(logging_interval = 'epoch')

In [44]:
%%time
model = Model()

trainer = Trainer(accelerator = 'gpu',
                 devices = [0],
                 min_epochs = 1,
                 max_epochs = 4,
                 callbacks = [lr_monitor, checkpoint_callback])

trainer.fit(model, train_dataloaders = train_loader, val_dataloaders = val_loader)

Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

/opt/conda/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 3. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

CPU times: user 8min 39s, sys: 51.9 s, total: 9min 31s
Wall time: 1h 1min 12s


In [45]:
trainer.validate(model, val_loader )

Validation: |          | 0/? [00:00<?, ?it/s]

[{'val_loss': 0.061311185359954834, 'val_accuracy': 0.9796748161315918}]

In [46]:
trainer.test(model, test_loader )

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.059371467679739, 'test_accuracy': 0.9836065769195557}]

In [48]:

def save_model(model, filepath):
    """
    Save the state dictionary of the model to the specified filepath.
    
    Args:
        model (nn.Module): The model to save.
        filepath (str): The filepath to save the model state dictionary.
    """
    torch.save(model.state_dict(), filepath)

#============================================================

class Model_test(nn.Module):
    """
    A custom model for testing real-life violence detection.
    """

    def __init__(self):
        super(Model_test, self).__init__()
        self.video_model = torch.hub.load('facebookresearch/pytorchvideo', 'efficient_x3d_xs', pretrained=False)
        self.relu = nn.ReLU()
        self.Linear = nn.Linear(400, 1)

    def forward(self, x):
        """
        Forward pass of the model.
        
        Args:
            x (torch.Tensor): Input tensor.
        
        Returns:
            torch.Tensor: Output tensor.
        """
        x = self.relu(self.video_model(x))
        x = self.Linear(x)
        return x

#============================================================

model_test = Model_test()

Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main


In [49]:
model_test.load_state_dict(torch.load('/kaggle/working/model.pth'))

<All keys matched successfully>

In [51]:
from pytorchvideo.data.encoded_video import EncodedVideo

video = EncodedVideo.from_path('/kaggle/working/test/NonViolence/NV_424.mp4')

video_data = video.get_clip(0,2)

video_data = video_transform(video_data)

video_data['video'].shape

torch.Size([3, 20, 224, 224])

In [52]:
model = model.cuda()

inputs = video_data['video'].cuda()

inputs = torch.unsqueeze(inputs, 0 )
inputs.shape

torch.Size([1, 3, 20, 224, 224])

In [53]:


preds = model(inputs)

preds = preds.detach().cpu().numpy()

preds



array([[-7.669764]], dtype=float32)

In [54]:
preds = np.where(preds>0.5,1,0)

In [55]:
preds

array([[0]])