In [1]:
from typing import Callable

import os
import cv2
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm

from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset

import torchvision.transforms as transforms


In [2]:
N_FRAMES = 16

In [3]:
df = pd.read_csv('data.csv')
df.rename(columns={'target': 'label'}, inplace=True)

df['time_of_event'].replace({float('nan'): None}, inplace=True)

df['time_of_alert'].replace({float('nan'): None}, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['time_of_event'].replace({float('nan'): None}, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['time_of_alert'].replace({float('nan'): None}, inplace=True)


In [4]:
train_df: pd.DataFrame
val_df: pd.DataFrame
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

In [5]:
train_df['id'] = train_df['id'].astype(str).str.zfill(5)
val_df['id'] = val_df['id'].astype(str).str.zfill(5)

In [6]:
train_df['vid_path'] = "data/" + train_df['id'] + '.mp4'
val_df['vid_path'] = "data/" + val_df['id'] + '.mp4'

In [7]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(299),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(), # ToTensor : [0, 255] -> [0, 1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(), # ToTensor : [0, 255] -> [0, 1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df: pd.DataFrame, n_frames = 16, transform: Callable= lambda x: x):
        self.item_id = df['id'].astype(str).str.zfill(5).tolist()
        self.vid_path = df['vid_path'].tolist()

        self.time_of_event = df['time_of_event'].tolist()

        self.transform = transform
        self.n_frames = n_frames


    def extract_frames(self, idx: int):
        path = self.vid_path[idx]

        cap = cv2.VideoCapture(path)

        fps = int(cap.get(cv2.CAP_PROP_FPS))
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        duration = frame_count / fps
        
        time = self.time_of_event[idx]

        if time and time != float('nan'):
            sample_idx = np.linspace(0, frame_count-1, self.n_frames, dtype=int)
            f = int(round(time * fps))
            k = np.searchsorted(sample_idx, f, side='right') - 1
        else:
            k = None

        
        step = max(frame_count // self.n_frames, 1)
        frames = []

        for i in range(self.n_frames):
            cap.set(cv2.CAP_PROP_POS_FRAMES, i * step)
            ret, frame = cap.read()

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            frame = Image.fromarray(frame)
            frame = self.transform(frame)

            frames.append(frame)

        cap.release()

        return torch.stack(frames), k

    def __len__(self):
        return len(self.vid_path)

    def __getitem__(self, idx):
        item_id = self.item_id[idx]
        frames, k = self.extract_frames(idx)

        return item_id, frames, k

In [9]:
train_ds = CustomDataset(train_df, n_frames=N_FRAMES, transform=train_transform)

In [10]:
val_ds = CustomDataset(val_df, n_frames=N_FRAMES, transform=val_transform)

In [None]:
os.makedirs("train_tensor", exist_ok=True)
os.makedirs("val_tensor", exist_ok=True)

In [None]:
### Train

train_k = []
for item_id, frames, k in tqdm(train_ds):
    torch.save(frames, os.path.join('train_tensor', item_id + '.pt'))
    if k is None:
        k = -1
    train_k.append(k)

### Validation
val_k = []
for item_id, frames, k in tqdm(val_ds):
    torch.save(frames, os.path.join('val_tensor', item_id + '.pt'))
    if k is None:
        k = -1
    val_k.append(k)

100%|██████████| 1200/1200 [26:55<00:00,  1.35s/it]
100%|██████████| 300/300 [06:39<00:00,  1.33s/it]


In [15]:
train_df['event_frame'] = train_k
val_df['event_frame'] = val_k

In [31]:
train_df.sort_values(by='id', inplace=True)
val_df.sort_values(by='id', inplace=True)

In [32]:
train_df['tensor_path'] = "train_tensor/" + train_df['id'] + '.pt'
val_df['tensor_path'] = "val_tensor/" + val_df['id'] + '.pt'

In [33]:
train_df.to_csv('train.csv', index=False)
val_df.to_csv('val.csv', index=False)