In [1]:
import os
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import UCF101
import torchvision.io as io
from sklearn.model_selection import train_test_split
import torchvision.transforms.functional as F
import random
import cv2

In [2]:

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [17]:
ucf101_root = "./Datasets/UCF101/UCF-101"  
ucf101_annot = "E:/sai/UCE/Video_summarization/Datasets/UCF101/UCF101TrainTestSplits/UCF_annotations" 


In [18]:
# Custom transform (resize, augmentations, etc.)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize frames to 224x224
    transforms.ToTensor(),          # Convert to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # Normalize
])

In [19]:
# UCF101 dataset using the prebuilt class in torchvision
train_dataset = UCF101(root=ucf101_root, annotation_path=ucf101_annot, train=True, transform=transform, frames_per_clip=16)
test_dataset = UCF101(root=ucf101_root, annotation_path=ucf101_annot, train=False, transform=transform, frames_per_clip=16)

# Split train dataset into train/val sets
train_idx, val_idx = train_test_split(list(range(len(train_dataset))), test_size=0.2, random_state=42)
train_split = torch.utils.data.Subset(train_dataset, train_idx)
val_split = torch.utils.data.Subset(train_dataset, val_idx)

# DataLoader for batch processing
train_loader = DataLoader(train_split, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_split, batch_size=8, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=True)


100%|██████████| 833/833 [04:45<00:00,  2.92it/s]
100%|██████████| 833/833 [04:08<00:00,  3.36it/s]


In [20]:
# Define augmentation for individual frames using PyTorch
def augment_frame_pytorch(frame):
    # Example augmentation: Random horizontal flip
    if random.random() > 0.5:
        frame = F.hflip(frame)
    return frame

# Function to apply preprocessing on the videos using GPU
def process_videos_pytorch(loader):
    for batch_idx, (videos, labels) in enumerate(loader):
        videos = videos.to(device)  # Transfer videos to GPU
        labels = labels.to(device)
        # Apply frame-level augmentations here (optional)
        augmented_videos = [augment_frame_pytorch(video) for video in videos]
        # You can now pass the augmented_videos to your model
        print(f"Processed batch {batch_idx+1}/{len(loader)}")

In [None]:
# Step 1: Process the training dataset
print("Processing training dataset...")
process_videos_pytorch(train_loader)

# Step 2: Process the validation dataset
print("Processing validation dataset...")
process_videos_pytorch(val_loader)

# Step 3: Process the test dataset
print("Processing test dataset...")
process_videos_pytorch(test_loader)

Processing training dataset...


In [22]:
transform = transforms.Compose([
    # Remove ToTensor if the input is already a tensor
    # transforms.ToTensor(),
    transforms.Resize((224, 224)),  # Resizing frames
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [23]:
def __getitem__(self, idx):
    video = self.dataset[idx]  # Load the video
    print(type(video))  # Check if it's already a tensor or ndarray
    video = self.transform(video)
    return video
