In [1]:
import os

In [5]:
video_dir = '../WLASL/start_kit/videos'

In [4]:
labels = os.listdir(video_dir)


In [1]:
pip list

Package                       Version
----------------------------- -----------
anyio                         3.6.2
appnope                       0.1.3
argon2-cffi                   21.1.0
asttokens                     2.2.1
attrs                         22.2.0
Babel                         2.12.1
backcall                      0.2.0
backports.functools-lru-cache 1.6.4
beautifulsoup4                4.12.2
bleach                        6.0.0
brotlipy                      0.7.0
certifi                       2022.12.7
cffi                          1.15.1
charset-normalizer            3.1.0
colorama                      0.4.6
cryptography                  39.0.1
decorator                     5.1.1
defusedxml                    0.7.1
entrypoints                   0.4
executing                     1.2.0
fastjsonschema                2.16.3
fsspec                        2023.4.0
future                        0.18.3
idna                          3.4
importlib-metadata            6.6.0
importlib

In [21]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

class VideoDataset(Dataset):
    def __init__(self, data_dir, split="train", test_size=0.1, val_size=0.1, random_state=429):
        self.labels = os.listdir(data_dir)
        self.label_to_idx = {label: idx for idx, label in enumerate(self.labels)}
        self.data = []
        for label in self.labels:
            label_dir = os.path.join(data_dir, label)
            video_files = os.listdir(label_dir)
            for video_file in video_files:
                video_path = os.path.join(label_dir, video_file)
                self.data.append((video_path, label))
                
        train_val_data, test_data = train_test_split(self.data, test_size=test_size, random_state=random_state)

        if split == "train":
            train_data, val_data = train_test_split(train_val_data, test_size=val_size/(1-test_size), random_state=random_state)
            self.data = train_data
        elif split == "val":
            train_data, val_data = train_test_split(train_val_data, test_size=val_size/(1-test_size), random_state=random_state)
            self.data = val_data
        elif split == "test":
            self.data = test_data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        video_path, label = self.data[idx]
        video = cv2.VideoCapture(video_path)
        frames = []
        while True:
            ret, frame = video.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, (224, 224))
            frame = transforms.ToTensor()(frame)
            frames.append(frame)
        video_tensor = torch.stack(frames, dim=0)
        label_idx = self.label_to_idx[label]
        return video_tensor, label_idx


In [22]:
train_data = VideoDataset(video_dir, split="train")
train_loader = DataLoader(train_data, batch_size=4, shuffle=True)

val_data = VideoDataset(video_dir, split="val")
val_loader = DataLoader(val_data, batch_size=4, shuffle=True)

test_data = VideoDataset(video_dir, split="test")
test_loader = DataLoader(test_data, batch_size=4, shuffle=True)

UnboundLocalError: local variable 'test_data' referenced before assignment

In [19]:
val_data.data

[('../WLASL/start_kit/videos/book/07088.mp4', 'book'),
 ('../WLASL/start_kit/videos/book/07075.mp4', 'book'),
 ('../WLASL/start_kit/videos/book/07074.mp4', 'book'),
 ('../WLASL/start_kit/videos/before/05732.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05733.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05727.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05731.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05724.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05730.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05734.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05750.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05744.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05746.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05747.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05743.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05742.mp4', 'before'),
 ('../WLASL/start_kit/videos/before/05740.mp4', 'before'),
 ('../WLA