In [6]:
from libs.config import get_config
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import os
import numpy as np
from logging import getLogger


__all__ = ["get_dataset"]
logger = getLogger(__name__)
config = get_config('/mnt/sda1/Summarization/SurgSum/result/fps_sampling=1-batch_size=128-img_size=224-out_features=6-lr=0.0001-loss_fn=ib_focal-max_epoch=20-aug_ver=1/config.yaml')

## Create DataFrames

In [4]:
csv_path = Path(config.dataset_dir) / "csv"
dirs = list(csv_path.glob('*csv'))

In [8]:
all_df = pd.DataFrame()
for i, path in enumerate(tqdm(sorted(dirs))):
    file_name = path.stem
    # if file_name != 'video01':
    #     continue
    tmp = pd.read_csv(path)
    print(f'{file_name} before subsampling: {len(tmp)}')

    tmp['video_idx'] = int(file_name[-2:])  
    img_path = os.path.join(config.dataset_dir, 'video_split', file_name)
    tmp['file_name'] = sorted(os.listdir(img_path)[:len(tmp)])
    
    if config.val_vid_idx == int(file_name[-2:]):
        tmp['stage'] = 'val'
        factor = int(30 / config.fps_sampling_test)
    else:
        tmp['stage'] = 'train'
        factor = int(30 / config.fps_sampling)
    tmp = tmp.iloc[::factor]
    print(f'{file_name} after subsampling: {len(tmp)}')
    all_df = pd.concat([all_df, tmp], axis=0)


 20%|██        | 1/5 [00:00<00:00,  7.85it/s]

video00 before subsampling: 131840
video00 after subsampling: 4395
video01 before subsampling: 57063
video01 after subsampling: 1903
video02 before subsampling: 117343


 60%|██████    | 3/5 [00:00<00:00, 10.69it/s]

video02 after subsampling: 3912
video03 before subsampling: 146863
video03 after subsampling: 4896


100%|██████████| 5/5 [00:00<00:00,  8.53it/s]

video05 before subsampling: 126155
video05 after subsampling: 4206





In [6]:
all_df

Unnamed: 0,Frame,time,field,phase,summary,video_idx,file_name,stage
0,0,0:00:00.00,False,irrelevant,0.0,0,video00_000001.png,train
30,30,0:00:01.00,False,irrelevant,-1.0,0,video00_000031.png,train
60,60,0:00:02.00,False,irrelevant,-1.0,0,video00_000061.png,train
90,90,0:00:03.00,False,irrelevant,-1.0,0,video00_000091.png,train
120,120,0:00:04.00,False,irrelevant,-1.0,0,video00_000121.png,train
...,...,...,...,...,...,...,...,...
126030,126030,1:10:01.00,False,irrelevant,-1.0,5,video05_173704.png,train
126060,126060,1:10:02.00,False,irrelevant,-1.0,5,video05_173742.png,train
126090,126090,1:10:03.00,False,irrelevant,-1.0,5,video05_173787.png,train
126120,126120,1:10:04.00,False,irrelevant,-1.0,5,video05_173826.png,train


In [13]:
all_df.head()

Unnamed: 0,Frame,time,field,phase,summary,video_idx,file_name,stage
0,0,0:00:00.00,False,irrelevant,0.0,0,video00_000001.png,train
30,30,0:00:01.00,False,irrelevant,-1.0,0,video00_000031.png,train
60,60,0:00:02.00,False,irrelevant,-1.0,0,video00_000061.png,train
90,90,0:00:03.00,False,irrelevant,-1.0,0,video00_000091.png,train
120,120,0:00:04.00,False,irrelevant,-1.0,0,video00_000121.png,train


## dataset

In [60]:
from logging import getLogger

import torch
from PIL import Image
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
import os

__all__ = ["get_dataloader"]

logger = getLogger(__name__)


class ExtractorDataset(torch.utils.data.Dataset):
    def __init__(self, df, config, stage="train"):
        self.stage = stage
        self.df = df
        self.df = self.df[self.df["stage"] == self.stage]
        self.config = config
        self.class_labels = self.get_labels()

    def __getitem__(self, index):
        row = self.df.iloc[index]
        video_name = "video" + str(row.video_idx).zfill(2)
        data_path = os.path.join(self.config.dataset_dir, "video_split", video_name, row.file_name)
        img = Image.open(data_path)
        img = np.array(img)
        if self.stage == 'train':
            img = self.transform()(image=img)["image"]
        label = torch.tensor(self.class_labels[row.phase])

        return img.float(), label.float()
        


    def __len__(self):
        return len(self.df)
    
    def get_labels(self):
        class_labels = {}
        for i,label in enumerate(self.df.phase.unique()):
            class_labels[label] = i
            print(label, i)
        return class_labels

    def transform(self):
        transforms = [
                A.Normalize(mean=(0,0,0), std=(1,1,1)),
        ]
        
        if self.stage == 'train':
            if self.config.aug_ver == 1:
                transforms += [
                A.RandomResizedCrop(always_apply=False, p=1.0, height=self.img_size, width=self.img_size, scale=(0.7, 1.2), ratio=(0.75, 1.3), interpolation=1),
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                ]
            elif self.config.aug_ver == 2:
                transforms += [
                    A.HorizontalFlip(p=0.3),
                    A.VerticalFlip(p=0.3),
                ]
            
        transforms.append(ToTensorV2(p=1))

        return A.Compose(transforms)


In [61]:
ds = ExtractorDataset(all_df, config, 'train')

irrelevant 0
design 1
anesthesia 2
incision 3
hemostasis 4
dissection 5
closure 6
others 7


In [62]:
img, label = ds.__getitem__(0)

In [63]:
img.shape

torch.Size([3, 250, 250])

In [64]:
label

tensor(0.)

## datamodule

In [3]:
preds = np.load('/mnt/sda1/Summarization/SurgSum/result/fps_sampling=1-val_vid_idx=1-batch_size=128-img_size=224-out_features=6-lr=0.0001-loss_fn=ib_focal-max_epoch=1-aug_ver=1/preds.npy')

In [4]:
features = np.load('/mnt/sda1/Summarization/SurgSum/result/fps_sampling=1-val_vid_idx=1-batch_size=128-img_size=224-out_features=6-lr=0.0001-loss_fn=ib_focal-max_epoch=1-aug_ver=1/features.npy')

In [5]:
len(preds)

4695

In [8]:
len(features)

4695

In [6]:
df = pd.read_csv('/mnt/sda1/Summarization/SurgSum/result/fps_sampling=1-val_vid_idx=1-batch_size=128-img_size=224-out_features=6-lr=0.0001-loss_fn=ib_focal-max_epoch=1-aug_ver=1/processed_df.csv')

In [7]:
df

Unnamed: 0,Frame,time,field,phase,summary,video_idx,file_name,stage,y
0,3330,0:01:51.00,True,design,0.0,0,video00_003331.png,train,0
1,3360,0:01:52.00,True,design,0.0,0,video00_003361.png,train,0
2,3390,0:01:53.00,True,design,0.0,0,video00_003391.png,train,0
3,3420,0:01:54.00,True,design,0.0,0,video00_003421.png,train,0
4,3450,0:01:55.00,True,design,0.0,0,video00_003451.png,train,0
...,...,...,...,...,...,...,...,...,...
3452,131430,1:13:01.00,True,closure,0.0,0,video00_131432.png,train,5
3453,131460,1:13:02.00,True,closure,0.0,0,video00_131462.png,train,5
3454,131490,1:13:03.00,True,closure,0.0,0,video00_131492.png,train,5
3455,131520,1:13:04.00,True,closure,0.0,0,video00_131522.png,train,5


In [36]:
df = pd.read_csv('/mnt/sda1/Summarization/SurgSum/SummarizationDataset/csv/video06.csv')

In [37]:
len(os.listdir('/mnt/sda1/Summarization/SurgSum/SummarizationDataset/video_split/video06')), len(df)

(75555, 80526)

In [44]:
75555 * 30 /25

90666.0

In [42]:
df.phase[-10000:].value_counts()

closure       9999
irrelenant       1
Name: phase, dtype: int64

In [35]:
df.phase.value_counts()

dissection          56116
closure             35518
incision            13913
irrelevant_frame    12613
anesthesia          10402
hemostasis           5708
design               3700
Name: phase, dtype: int64

In [33]:
110 * 60 * 30

198000

In [17]:
2684*3

8052

# PGL_SUM

In [1]:
from libs.models import get_model
from libs.config import get_config
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import os
import numpy as np
from logging import getLogger


__all__ = ["get_dataset"]
logger = getLogger(__name__)
config = get_config('/mnt/sda1/Summarization/SurgSum/result/fps_sampling=1-batch_size=128-img_size=224-out_features=6-lr=0.0001-loss_fn=ib_focal-max_epoch=20-aug_ver=1/config.yaml')

In [26]:
data_dir = 'fps_sampling=1-batch_size=128-img_size=224-out_features=6-lr=0.0001-loss_fn=ib_focal-max_epoch=20-aug_ver=1'
feature_path = os.path.join('../result',data_dir)
fe_df = pd.read_csv(os.path.join(feature_path,'processed_df.csv'))
features = np.load(os.path.join(feature_path,'features.npy'))

df = pd.DataFrame({"video_idx":[0,1,2,3]})
start = []
end = []
for i in range(4):
    start.append(fe_df[fe_df.video_idx==i].index[0])
    end.append(fe_df[fe_df.video_idx==i].index[-1] + 1)
df['start_idx'] = start
df['end_idx'] = end

Unnamed: 0,video_idx,start_idx,end_idx
0,0,0,3457
1,1,3457,4695
2,2,4695,7443
3,3,7443,10401


In [3]:
import torch
from PIL import Image
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.utils import class_weight
import os
import pandas as pd



In [9]:
import torch
from PIL import Image
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.utils import class_weight
import os
import pandas as pd

class SumDataset(torch.utils.data.Dataset):
    def __init__(self, config, stage="train"):
        self.stage = stage
        self.config = config
        self.data_dir = 'fps_sampling=1-batch_size=128-img_size=224-out_features=6-lr=0.0001-loss_fn=ib_focal-max_epoch=20-aug_ver=1'
        train_vid_ids = [0,1,2]
        val_vid_ids = [3]
        feature_path = os.path.join('../result',self.data_dir)
        self.fe_df = pd.read_csv(os.path.join(feature_path,'processed_df.csv'))
        self.features = np.load(os.path.join(feature_path,'features.npy'))
        self.gts = self.fe_df.summary
        if self.stage=='train':
            self.vid_ids = train_vid_ids
        else:
            self.vid_ids = val_vid_ids
            
        self.df = self.get_df()
        
        

    def __getitem__(self, index):
        row = self.df.iloc[index]
        start = row.start_idx
        end = row.end_idx
        
        features = torch.Tensor(self.features[start:end])
        gts = torch.Tensor(self.gts[start:end])

        return features, gts

    def __len__(self):
        return len(self.df)
    
    def get_df(self):
        df = pd.DataFrame({"video_idx":self.vid_ids})
        start = []
        end = []
        split = []
        for i in self.vid_ids:
            start.append(self.fe_df[self.fe_df.video_idx==i].index[0])
            end.append(self.fe_df[self.fe_df.video_idx==i].index[-1] + 1)
        df['start_idx'] = start
        df['end_idx'] = end
        return df



In [10]:
ds = SumDataset(config)

In [11]:
ds.__len__()

3

In [12]:
feats, gts = ds.__getitem__(0)

In [16]:
feats.shape

torch.Size([3457, 2048])

In [20]:
gts.unsqueeze(1).shape

torch.Size([3457, 1])

In [None]:
import pytorch_lightning as pl
import torch
import torch.nn.functional as F
import torchmetrics
from torchvision import transforms as transforms
from logging import getLogger
import numpy as np

__all__ = ["lightningmodule"]

logger = getLogger(__name__)


class ExtractorLitModule(pl.LightningModule):
    # ネットワークモジュールなどの定義
    def __init__(self, config, model=None, loss_fn=None):
        super().__init__()
        self.config = config
        self.model = model
        self.loss_fn = loss_fn
        self.learning_rate = self.config.lr
        self.features = np.zeros((0,2048))
        self.preds = np.zeros((0))
        self.init_metrics()



    def init_metrics(self):
        self.acc_phase = torchmetrics.Accuracy(task='multiclass',num_classes=self.config.out_features)
        self.f1_phase = torchmetrics.F1Score(num_classes=self.config.out_features,task='multiclass',average='macro')

    # オプティマイザの定義
    def configure_optimizers(self):
        optimizer = torch.optim.RAdam(self.parameters(), lr=self.learning_rate, weight_decay=self.config.weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer,
                T_max = self.config.max_epoch,
                eta_min = self.config.lr_min,
                last_epoch = -1
            )

        # lr_scheduler_dict = {"scheduler": scheduler, "intervalf1"step"}
        return {"optimizer": optimizer, "lr_scheduler": scheduler}

    # ==================================================================
    def forward(self, batch):
        imgs = batch
        stem, preds = self.model(imgs)
        return stem, preds

    def training_step(self, batch, batch_idx):
        preds, loss, acc, f1 = self._shared_step(batch)

        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True)
        self.log("train_acc", acc, on_step=False, on_epoch=True, logger=True)
        self.log("train_f1", f1, on_step=False, on_epoch=True, logger=True)
        
        if batch_idx % 100 == 0:
            logger.info(f'train_acc {batch_idx}: {acc}')
            logger.info(f'train_f1 {batch_idx}: {f1}')

        return loss

    def validation_step(self, batch, batch_idx):
        preds, loss, acc, f1 = self._shared_step(batch)

        self.log("val_loss", loss, on_step=True, on_epoch=True, logger=True)
        self.log("val_acc", acc, on_step=False, on_epoch=True, logger=True)
        self.log("val_f1", f1, on_step=False, on_epoch=True, logger=True)
        
        if batch_idx % 50 == 0:
            logger.info(f'val_acc {batch_idx}: {acc}')
            logger.info(f'val_f1 {batch_idx}: {f1}')

        return loss
    
    def test_step(self, batch, batch_idx):
        with torch.no_grad():
            imgs, labels = batch
            stem, preds = self.model(imgs)
            preds = F.softmax(preds)
            preds = torch.argmax(preds, dim=1)
        logger.info(self.features.shape)
        logger.info(self.preds.shape)
        self.features = np.concatenate([self.features,stem.cpu().detach().numpy()],0)
        self.preds = np.concatenate([self.preds,np.asarray(preds.cpu()).squeeze()],0)


    def _shared_step(self, batch):
        imgs, labels = batch
        stem, preds = self.model(imgs)
        if self.config.loss_fn == 'ib_focal':
            loss = self.loss_fn(preds, labels, stem)
        else:
            loss = self.loss_fn(preds, labels)
        acc = self.acc_phase(preds, labels)
        f1 = self.f1_phase(preds, labels)
        
        for param_group in self.trainer.optimizers[0].param_groups:
            lr = param_group["lr"]
        self.log("lr", lr, on_step=True, on_epoch=False, prog_bar=True)
        

        return preds, loss, acc, f1