## Data Loading

In [1]:
import cv2
import numpy as np


fight_path_template = 'dataset/fight/fi{num:03d}.mp4'
nofight_path_template = 'dataset/noFight/nofi{num:03d}.mp4'

class DataLoading() :
    def load_video_with_resizing_and_frame_handling(self,video_path, target_frames=32, target_size=(112, 112)):
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Error: Could not open video {video_path}")
            return None

        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            resized_frame = cv2.resize(frame, target_size)
            normalized_frame = resized_frame / 255.0
            frames.append(normalized_frame)

        cap.release()
        video_tensor = np.array(frames)

        num_frames = video_tensor.shape[0]
        if num_frames > target_frames:
            indices = np.linspace(0, num_frames - 1, target_frames).astype(int)
            video_tensor = video_tensor[indices]
        elif num_frames < target_frames:
            pad_length = target_frames - num_frames
            padding = np.zeros((pad_length, target_size[0], target_size[1], 3))
            video_tensor = np.concatenate((video_tensor, padding), axis=0)
        return video_tensor # numpy array
     
    def get_data(self) :
        num_samples=150
        data=[]
        labels=[]
        count=0
        for i in range(1, num_samples + 1):
                    video_path = fight_path_template.format(num=i)
                    video_tensor = self.load_video_with_resizing_and_frame_handling(video_path)
                    if video_tensor is not None:
                        count+=1
                        data.append(video_tensor)
                        labels.append(1)


        for i in range(1, num_samples + 1):
                    video_path = nofight_path_template.format(num=i)
                    video_tensor = self.load_video_with_resizing_and_frame_handling(video_path)
                    if video_tensor is not None:
                        count+=1
                        data.append(video_tensor)
                        labels.append(0)
        return data,labels

instance=DataLoading()
data,labels=instance.get_data()

In [2]:
data=np.array(data)

In [3]:
labels=np.array(labels)

In [4]:
import torch
import torch.nn as nn
import torchvision
from torchvision import models
from torch.utils.data import DataLoader, Dataset
device='cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [5]:
tensor_X=torch.tensor(data)

In [6]:
tensor_Y=torch.tensor(labels)

In [7]:
print(tensor_X.shape,tensor_Y.shape)

torch.Size([300, 32, 112, 112, 3]) torch.Size([300])


## visualization

In [8]:
# import matplotlib.pyplot as plt
# image=tensor_X[0][0]
# plt.imshow(image)
# plt.title('1')

In [9]:
from sklearn.model_selection import train_test_split
train_x,test_x,train_y,test_y=train_test_split(data,labels,test_size=0.2,shuffle=True)

## Writing dataloader function 

In [10]:
import torch
from torch.utils.data import DataLoader, Dataset

# Assuming tensor_X has shape [300, 32, 112, 112, 3] and tensor_Y has shape [300]
# Convert tensor_X to the required PyTorch format (batch_size, frames, channels, height, width)
train_x=torch.tensor(train_x)
test_x=torch.tensor(test_x)
train_x_new = train_x.permute(0, 1, 4, 2, 3)  # Now shape will be [300, 32, 3, 112, 112]
test_x_new = test_x.permute(0, 1, 4, 2, 3)  # Now shape will be [300, 32, 3, 112, 112]

class VideoDataset(Dataset):
    def __init__(self, videos, labels):
        self.videos = videos  # Tensor containing video data
        self.labels = labels  # Tensor containing labels

    def __len__(self):
        return len(self.videos)

    def __getitem__(self, idx):
        video = self.videos[idx]  # Get the video tensor
        label = self.labels[idx]  # Get the corresponding label
        return video, label

# Create the dataset
dataset_train = VideoDataset(train_x_new.float(), tensor_Y.float())

# Create the DataLoader
data_loader_train = DataLoader(dataset_train, batch_size=2, shuffle=True, num_workers=4)

# Create the dataset
dataset_test = VideoDataset(test_x_new.float(), tensor_Y.float())

# Create the DataLoader
data_loader_test= DataLoader(dataset_test, batch_size=2, shuffle=True, num_workers=4)


batch_videos, batch_labels = next(iter(data_loader_train))
print(batch_labels.shape)  # This will output the shape of the batch of videos


torch.Size([2])


In [11]:
# x,y=next(iter(data_loader_train))
# print((x[0][0][0][0]))
# print(y)

## Defining the 3D CNN model 

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Simple3DCNN(nn.Module):
    def __init__(self):
        super(Simple3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(in_channels=32, out_channels=16, kernel_size=3, stride=1, padding=1)  # Output: [16, 32, 112, 112]
        self.pool = nn.MaxPool3d(kernel_size=2, stride=2)  # Halves the spatial dimensions
        self.conv2 = nn.Conv3d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)  # Output: [32, 16, 56, 56]
        self.adaptive_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.fc1 = nn.Linear(32 * 16 * 56 * 56, 128)   # Fully connected layer
        self.fc2 = nn.Linear(128, 1)  # Output layer for binary classification

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Convolution + Activation + Pooling
        x = self.pool(F.relu(self.conv2(x)))  # Convolution + Activation + Pooling
        x=self.adaptive_pool(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))  # Fully connected layer
        x = self.fc2(x)  # Output layer
        return x


## Writing the training loop

In [13]:
# # Hyperparameters
# num_epochs = 20
# # Training Loop
# for epoch in range(num_epochs):
#     for batch_videos, batch_labels in data_loader_test:
#         print(batch_videos.shape)
#         print(batch_labels.shape)


In [14]:
import torch.optim as optim
import matplotlib.pyplot as plt
from tqdm.auto import tqdm 
# Hyperparameters
num_epochs = 20
batch_size = 2
learning_rate = 0.01

# Create the model
model = Simple3DCNN()
model=model.to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Assuming you have a DataLoader called 'data_loader' from the previous setup
train_losses = []
test_losses = []

# Training Loop
for epoch in tqdm(range(num_epochs)):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for batch_videos, batch_labels in data_loader_train:
        batch_videos = batch_videos.to(device)
        batch_labels = batch_labels.to(device)
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(batch_videos)  # Forward pass
        loss = criterion(outputs.squeeze(), batch_labels.squeeze())  # Calculate loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
        running_loss += loss.item() * batch_videos.size(0)

    epoch_loss = running_loss / len(data_loader_train.dataset)
    train_losses.append(epoch_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

# Testing Loop (assuming you have a separate test DataLoader)
model.eval()  # Set the model to evaluation mode
running_loss = 0.0
with torch.no_grad():
    for batch_videos, batch_labels in data_loader_test:  # Use your test DataLoader here
        batch_videos = batch_videos.to(device)
        batch_labels = batch_labels.to(device)
        outputs = model(batch_videos)
        loss = criterion(outputs.squeeze(), batch_labels.squeeze())
        running_loss += loss.item() * batch_videos.size(0)

test_loss = running_loss / len(data_loader_test.dataset)  # Replace with the test DataLoader
test_losses.append(test_loss)
print(f'Test Loss: {test_loss:.4f}')

# Visualize the training and testing loss
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Test Loss')
plt.legend()
plt.show()


  from .autonotebook import tqdm as notebook_tqdm
  0%|          | 0/20 [00:01<?, ?it/s]


RuntimeError: Given input size: (32x1x56x56). Calculated output size: (32x0x28x28). Output size is too small