In [29]:
import torch.nn.functional as F
import torch
import pytorch_lightning as pl
import warnings
import torch.nn as nn

warnings.filterwarnings('ignore')

In [30]:
from detr_model import attention_detr
from slow_model import create_model

In [31]:
class test_model(pl.LightningModule):
    def __init__(self, batch_size):
        super().__init__()
        self.batch_size = batch_size
        self.attention_weights = attention_detr(self.batch_size)
        self.backbone = create_model()
    
    def forward(self, x):
        attention_input = torch.einsum('bcfhw -> bfchw', x)
        attention_mask = self.attention_weights(attention_input)
        attention_mask = attention_mask.unsqueeze(1)
        input_x = torch.cat((x, attention_mask), dim = 1)
        output = self.backbone(input_x)

        return output
    
    def training_step(self, batch, batch_idx):
        video_data, label = batch
        pred = self.forward(video_data)
        loss_function = nn.MSELoss()
        loss = loss_function(pred, label)

        self.log("train_loss", loss)

        return loss
    
    def validation_step(self, batch, batch_idx):
        video_data, label = batch
        pred = self.forward(video_data)
        loss_function = nn.MSELoss()
        loss = loss_function(pred, label)

        self.log("val_loss", loss)

        return loss
    
    def test_step(self, batch, batch_idx):
        video_data, label = batch
        pred = self.forward(video_data)
        loss_function = nn.MSELoss()
        loss = loss_function(pred, label)

        self.log("test_loss", loss)

        return loss
    
    def predict_step(self, batch, batch_idx, dataloader_idx):
        video_data, label = batch

        return self.forward(video_data)
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
        return [optimizer], [scheduler]

In [32]:
import torch

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample
)
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.data.encoded_video import EncodedVideo
import os
import pandas as pd

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, csv_dataset) -> None:
        super().__init__()

        df_dataset = pd.read_csv(csv_dataset)
        
        self.df_dataset = df_dataset

        side_size = 1024
        mean = [0.45, 0.45, 0.45]
        std = [0.225, 0.225, 0.225]
        crop_size = 1024
        num_frames = 8
        sampling_rate = 8
        frames_per_second = 30

        self.transform =  ApplyTransformToKey(
            key="video",
            transform=Compose(
                [
                    UniformTemporalSubsample(num_frames),
                    Lambda(lambda x: x/255.0),
                    NormalizeVideo(mean, std),
                    ShortSideScale(
                        size=side_size
                    ),
                    CenterCropVideo(crop_size=(crop_size, crop_size))
                ]
            ),
        )
        self.clip_duration = (num_frames * sampling_rate)/frames_per_second

    def __len__(self):
        return len(self.df_dataset)

    def __getitem__(self, idx):
        video_path = self.df_dataset.iloc[idx]["paths"]

        label = self.df_dataset.iloc[idx]["label"]
        
        label = torch.tensor(label, dtype=torch.float)
        label = label.unsqueeze(0)
        label = label * 0.01

        video = EncodedVideo.from_path(video_path)
        video_data = video.get_clip(start_sec=0, end_sec=10)
        video_data = self.transform(video_data)
        inputs = video_data["video"]

        return inputs, label
    
class CustomDataModule(pl.LightningDataModule):
    def __init__(self, batch_size, num_workers):
        super().__init__()
        
        self._BATCH_SIZE = batch_size
        self._NUM_WORKERS = num_workers

    def train_dataloader(self):
        train_dataset = CustomDataset(csv_dataset=os.path.join(os.getcwd(), 'test.csv'))
        return torch.utils.data.DataLoader(
            train_dataset,
            batch_size=self._BATCH_SIZE,
            num_workers=self._NUM_WORKERS,
            drop_last=True
        )

    def val_dataloader(self):
        val_dataset = CustomDataset(csv_dataset=os.path.join(os.getcwd(), 'val.csv'))
        return torch.utils.data.DataLoader(
            val_dataset,
            batch_size=self._BATCH_SIZE,
            num_workers=self._NUM_WORKERS,
            drop_last=True
        )
    def test_dataloader(self):
        test_dataset = CustomDataset(csv_dataset=os.path.join(os.getcwd(), 'test.csv'))
        return torch.utils.data.DataLoader(
            test_dataset,
            batch_size=self._BATCH_SIZE,
            num_workers=self._NUM_WORKERS,
            drop_last=True
        )

In [36]:
model = test_model(batch_size = 1)
data_module = CustomDataModule(batch_size = 1, num_workers = 0)
type(model)

Using cache found in C:\Users\JungSungYeon/.cache\torch\hub\facebookresearch_detr_main
Using cache found in C:\Users\JungSungYeon/.cache\torch\hub\facebookresearch_pytorchvideo_main


__main__.test_model

In [38]:
trainer = pl.Trainer(accelerator='cpu', devices=1, max_epochs=1)
trainer.fit(model, data_module)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | attention_weights | attention_detr | 41.5 M
1 | backbone          | Net            | 31.6 M
-----------------------------------------------------
72.9 M    Trainable params
222 K     Non-trainable params
73.2 M    Total params
292.658   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 268435456 bytes.

tensor([[-0.0824]], grad_fn=<ViewBackward0>)