In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision.io as io
import os

# Define your device (for GPU processing)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

transform = transforms.Compose([
    # transforms.ToPILImage(),
    transforms.Resize((128, 128)),  # Resize frames to 128x128
    transforms.ToTensor(),  # Convert frames to tensor format
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the RGB channels
])

Device:  cuda


In [2]:
from sklearn.model_selection import train_test_split
import os

class VideoDataset(Dataset):
    def __init__(self, root_dir, transform=None, max_frames=240, train=True, test_size=0.2):
        self.root_dir = root_dir
        self.transform = transform
        self.max_frames = max_frames
        self.classes = ['non_violent', 'violent']
        self.data = []

        # Load all data
        for label, class_name in enumerate(self.classes):
            class_dir = os.path.join(root_dir, class_name)
            for cam_folder in os.listdir(class_dir):
                cam_dir = os.path.join(class_dir, cam_folder)
                if os.path.isdir(cam_dir):
                    for video_name in os.listdir(cam_dir):
                        if video_name.endswith('.mp4'):
                            video_path = os.path.join(cam_dir, video_name)
                            self.data.append((video_path, label))

        # Split data into training and testing sets
        train_data, test_data = train_test_split(self.data, test_size=test_size, stratify=[x[1] for x in self.data])

        # Select either train or test data
        if train:
            self.data = train_data
        else:
            self.data = test_data

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        video_path, label = self.data[idx]
        
        # Load the video and process frames as you already do
        video, _, _ = io.read_video(video_path, pts_unit='sec')

        # Perform the padding or truncating of frames
        total_frames = video.shape[0]
        if total_frames < self.max_frames:
            padded_video = torch.zeros((self.max_frames, video.shape[1], video.shape[2], video.shape[3]), dtype=video.dtype)
            padded_video[:total_frames] = video
            video = padded_video
        else:
            video = video[:self.max_frames]

        # Apply transformations
        pil_transform = transforms.ToPILImage()
        frames = [self.transform(pil_transform(frame.permute(2, 0, 1))) for frame in video] if self.transform else video
        video = torch.stack(frames).permute(1, 0, 2, 3).to(device)
        label = torch.tensor(label).to(device)
        return video, label


In [3]:
path ='/home/subru/projects/CNN/A-Dataset-for-Automatic-Violence-Detection-in-Videos-master/violence-detection-dataset'
# train_dataset = VideoDataset(root_dir=path, transform=transform)

# Create data loaders
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=5, shuffle=True)
train_dataset = VideoDataset(root_dir=path, transform=transform, train=True, test_size=0.2)
test_dataset = VideoDataset(root_dir=path, transform=transform, train=False, test_size=0.2)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# Iterate through the data loader
i = 1
for videos, labels in train_loader:
    print(videos.shape)  # Should print shape [batch_size, frames, channels, height, width]
    print(labels)        # Should print corresponding labels (0 for non-violent, 1 for violent)
    print(i*5)
    i+=1
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)


torch.Size([4, 3, 240, 128, 128])
tensor([1, 1, 0, 0], device='cuda:0')
5
torch.Size([4, 3, 240, 128, 128])
tensor([1, 0, 1, 1], device='cuda:0')
10
torch.Size([4, 3, 240, 128, 128])
tensor([0, 1, 0, 0], device='cuda:0')
15
torch.Size([4, 3, 240, 128, 128])
tensor([0, 1, 0, 1], device='cuda:0')
20
torch.Size([4, 3, 240, 128, 128])
tensor([0, 1, 0, 1], device='cuda:0')
25
torch.Size([4, 3, 240, 128, 128])
tensor([0, 1, 1, 0], device='cuda:0')
30
torch.Size([4, 3, 240, 128, 128])
tensor([0, 1, 0, 1], device='cuda:0')
35
torch.Size([4, 3, 240, 128, 128])
tensor([0, 1, 1, 0], device='cuda:0')
40
torch.Size([4, 3, 240, 128, 128])
tensor([0, 1, 1, 0], device='cuda:0')
45
torch.Size([4, 3, 240, 128, 128])
tensor([1, 1, 1, 0], device='cuda:0')
50
torch.Size([4, 3, 240, 128, 128])
tensor([1, 1, 1, 1], device='cuda:0')
55
torch.Size([4, 3, 240, 128, 128])
tensor([0, 0, 1, 1], device='cuda:0')
60
torch.Size([4, 3, 240, 128, 128])
tensor([1, 0, 0, 0], device='cuda:0')
65
torch.Size([4, 3, 240, 128

##### Total Frames: 59139
##### Average frames per video: 169
##### Time to load 350 videos: 43m 26.0s
##### VRAM used: 1190MB

In [4]:
import torch.nn as nn
from torchvision.models.video import r3d_18

# Load the pre-trained ResNet3D-18 model
model = r3d_18(pretrained=True)

# Modify the last fully connected layer for 2 classes (binary classification)
model.fc = nn.Linear(in_features=model.fc.in_features, out_features=2)
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the final fully connected layer
for param in model.fc.parameters():
    param.requires_grad = True
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)




In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)  # Only train final layer
# train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

In [6]:
for epoch in range(25):
    model.train()
    running_loss = 0.0
    for videos, labels in train_loader:
        videos, labels = videos.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(videos)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{25}], Loss: {running_loss/len(train_loader)}")

OutOfMemoryError: CUDA out of memory. Tried to allocate 960.00 MiB. GPU 0 has a total capacity of 3.81 GiB of which 581.06 MiB is free. Including non-PyTorch memory, this process has 3.24 GiB memory in use. Of the allocated memory 3.11 GiB is allocated by PyTorch, and 61.42 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for videos, labels in test_loader:
        videos, labels = videos.to(device), labels.to(device)
        outputs = model(videos)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy: {100 * correct / total}%')
