In [1]:
from torch.utils.data import DataLoader
from omegaconf import OmegaConf
from tqdm.auto import tqdm

import os
import torch
import torchmetrics
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import matplotlib.pyplot as plt

from utils.etc import collate_fn, get_image_size_from_decoder_blocks
from pl_script.unet_smp import LitUnetSmp
from utils.augmentation import get_study_transform
from dataset import COVIDDataset

pl.seed_everything(42)

Global seed set to 42


42

In [2]:
CV = "sgkf"
ROOT = "./data-640"
DEVICE = "cuda:2"

def get_fold_num(filename, fold_name="sgkf"):
  for i in filename.split("-"):
    if fold_name in i:
      return int(i.replace(fold_name, ""))

def load_config():
  backbone_name = "tu-tf_efficientnetv2_m"
  config_dict = OmegaConf.merge(OmegaConf.load("config.yaml"), OmegaConf.load("model/unet_smp.yaml"))
  config_dict.model = "unet_smp"
  config_dict.unet_smp.backbone_name = backbone_name
  config_dict.unet_smp.classes = 2
  config_dict.unet_smp.neck_type = "F"
  return config_dict

def get_batch(batch):
  imgs, image_id, study_id, _, _, targets_study, _ = batch
  imgs = [img.float() for img in imgs]
  
  imgs = torch.stack(imgs)
  targets_study = torch.stack(targets_study)
  
  return imgs, targets_study, image_id, study_id

In [3]:
results = []
for state_dict_filename in os.listdir("./pretrained/42"):
  if ".ckpt" not in state_dict_filename:
    continue
  config = load_config()
  
  fold = get_fold_num(state_dict_filename)
  val_df = pd.read_csv(f"./pretrained/42/{fold}.csv")
  _, transform = get_study_transform(640)
  mask_img_size = get_image_size_from_decoder_blocks(config.unet_smp.decoder_blocks, config.img_size)
  dataset = COVIDDataset(root=ROOT, df=val_df, img_size=640, mask="both", transform=transform, mask_img_size=mask_img_size)

  checkpoint_path = os.path.join("./pretrained/42", state_dict_filename)
  pl_model = LitUnetSmp.load_from_checkpoint(checkpoint_path, config=config)
  model = pl_model.model
  model.eval()
  model.to(DEVICE)

  dataloader = DataLoader(dataset, batch_size=4,
                        collate_fn=collate_fn, pin_memory=True, num_workers=16)

  test_map = torchmetrics.BinnedAveragePrecision(num_classes=4)
  test_map = test_map.to(DEVICE)
  for batch in tqdm(dataloader):
    inputs, targets, image_id, study_id = get_batch(batch)
    inputs = inputs.to(DEVICE)
    targets = targets.to(DEVICE)
    
    outputs = model(inputs)
    
    results += list(zip(image_id, study_id, (torch.softmax(outputs, dim=1).cpu().detach().numpy().tolist())))
    
    # test_map(torch.softmax(outputs, dim=1), targets)
  # ap = test_map.compute()
  # mean_ap = sum(ap) / len(ap)
  # print(mean_ap)



  0%|          | 0/283 [00:01<?, ?it/s]

  0%|          | 0/301 [00:02<?, ?it/s]

  0%|          | 0/309 [00:02<?, ?it/s]

  0%|          | 0/297 [00:02<?, ?it/s]

  0%|          | 0/326 [00:02<?, ?it/s]

In [4]:
import pandas as pd
psl = pd.DataFrame(results).rename(columns={0: "id", 1: "study_id"})
psl[["psl_Negative for Pneumonia","psl_Typical Appearance","psl_Indeterminate Appearance","psl_Atypical Appearance"]] = psl.pop(2).tolist()
psl

Unnamed: 0,id,study_id,psl_Negative for Pneumonia,psl_Typical Appearance,psl_Indeterminate Appearance,psl_Atypical Appearance
0,bb4b1da810f3_image,0051d9b12e72_study,0.234519,0.281015,0.334334,0.150132
1,bf1f75117093_image,00908ffd2d08_study,0.891906,0.037111,0.063512,0.007471
2,92552b44c70c_image,00febcfee50b_study,0.059702,0.043282,0.132088,0.764928
3,55e22c0c5de0_image,0142feaef82f_study,0.783585,0.090400,0.111680,0.014335
4,204f98b6eaa0_image,015707c0e73f_study,0.062214,0.526540,0.294439,0.116806
...,...,...,...,...,...,...
6049,bb484ab7a8c6_image,ff0bf9cae3b3_study,0.097310,0.592212,0.250608,0.059870
6050,49c596c8b1a1_image,ff3bc8d91f5b_study,0.127436,0.689356,0.161916,0.021292
6051,bcdfd2f72125_image,ff3fc3faf1ca_study,0.733449,0.119203,0.106908,0.040441
6052,aabf3a11ea75_image,ff7ffd9c6c80_study,0.918736,0.023258,0.037839,0.020168


In [7]:
train_df = pd.read_csv("/root/siim-covid/data-640/merged_drop_multi_image_study.csv")
train_df.pop("Unnamed: 0")
train_df

Unnamed: 0,id,boxes,label,StudyInstanceUID,Negative for Pneumonia,Typical Appearance,Indeterminate Appearance,Atypical Appearance,PatientID
0,65761e66de9f_image,"[{'x': 720.65215, 'y': 636.51048, 'width': 332...",opacity 1 720.65215 636.51048 1052.84563 1284....,00086460a852_study,0,1,0,0,7d3bf0aef747
1,51759b5579bc_image,,none 1 0 0 1 1,000c9c05fd14_study,0,0,0,1,2c00dc1ead80
2,f6293b1c49e2_image,,none 1 0 0 1 1,00292f8c37bd_study,1,0,0,0,f942cf0989ff
3,3019399c31f4_image,,none 1 0 0 1 1,005057b3f880_study,1,0,0,0,f1894c76ef7b
4,bb4b1da810f3_image,"[{'x': 812.54698, 'y': 1376.41291, 'width': 62...",opacity 1 812.54698 1376.41291 1435.14793 1806...,0051d9b12e72_study,0,0,0,1,e2dbb0ae5e50
...,...,...,...,...,...,...,...,...,...
6049,84ed5f7f71bf_image,"[{'x': 1721.27651, 'y': 974.09667, 'width': 12...",opacity 1 1721.27651 974.09667 2999.21998 2681...,ffcb4630f46f_study,0,1,0,0,e67df835b17d
6050,e6215d0188e5_image,"[{'x': 364.93056, 'y': 870.04017, 'width': 731...",opacity 1 364.93056 870.04017 1096.13908 2053....,ffe4d6e8fbb0_study,0,1,0,0,a66dba7d222a
6051,7d27b1bb3987_image,"[{'x': 28.48292, 'y': 828.48474, 'width': 1116...",opacity 1 28.48292 828.48474 1145.01081 2296.7...,ffe94fcb14fa_study,0,1,0,0,1af09f44436b
6052,52478e480a15_image,"[{'x': 425.81211, 'y': 424.86147, 'width': 528...",opacity 1 425.81211 424.86147 953.95118 1579.3...,ffebf1ef4a9c_study,0,1,0,0,95680c66c89a


In [9]:
train_df.merge(psl, on="id", suffixes=('', '_dup')).to_csv("./data-640/train_psl.csv", index=False)