In [1]:
from torch.utils.data import DataLoader
from omegaconf import OmegaConf
from tqdm.auto import tqdm

import os
import torch
import torchmetrics
import torch.nn.functional as F
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import matplotlib.pyplot as plt

from utils.etc import collate_fn, get_image_size_from_decoder_blocks
from pl_script.unet_smp import LitUnetSmp
from utils.augmentation import get_study_transform
from dataset import COVIDDataset

pl.seed_everything(42)

Global seed set to 42


42

In [2]:
CV = "sgkf"
ROOT = "./data-640-pseudo"
DEVICE = "cuda:2"

def get_fold_num(filename, fold_name="sgkf"):
  for i in filename.split("-"):
    if fold_name in i:
      return int(i.replace(fold_name, ""))

def load_config():
  backbone_name = "tu-tf_efficientnetv2_m"
  config_dict = OmegaConf.merge(OmegaConf.load("config.yaml"), OmegaConf.load("model/unet_smp.yaml"))
  config_dict.model = "unet_smp"
  config_dict.unet_smp.backbone_name = backbone_name
  config_dict.unet_smp.classes = 2
  config_dict.unet_smp.neck_type = "F"
  return config_dict

def get_batch(batch):
  imgs, image_id, study_id, _, _, targets_study, _ = batch
  imgs = [img.float() for img in imgs]
  
  imgs = torch.stack(imgs)
  targets_study = torch.stack(targets_study)
  
  return imgs, targets_study, image_id, study_id

In [3]:
models = []
for state_dict_filename in os.listdir("./pretrained/42"):
  if ".ckpt" not in state_dict_filename:
    continue
  config = load_config()
  
  checkpoint_path = os.path.join("./pretrained/42", state_dict_filename)
  pl_model = LitUnetSmp.load_from_checkpoint(checkpoint_path, config=config)
  model = pl_model.model
  model.eval()
  model.to(DEVICE)
  models.append(model)

In [4]:
all_files = list(filter(lambda x: ".png" in x, os.listdir("./data-640-pseudo/train")))
temp = pd.read_csv("./data-640/train_image_level.csv")["id"].apply(lambda x: x.replace("_image",".png"))
all_files = list(set(all_files) - set(temp))
df = pd.DataFrame({"id": list(map(lambda x: x.replace(".png", ""),all_files))})

In [5]:
df

Unnamed: 0,id
0,7dc52532f465
1,413344e1032c
2,94ddca484ebf
3,2be1c227dbf2
4,b4a7026fe17c
...,...
2145,f10f8cc6f465
2146,a34e31fef465
2147,f35e5994f465
2148,05a70a1c16c1


In [6]:
df["StudyInstanceUID"] = ""
df[["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]] = 0.0
df["boxes"] = ""
df["label"] = ""
df["PatientID"] = ""

In [12]:
pd.concat((
  pd.read_csv("./data-640/meta.csv"),
  pd.DataFrame({
    "image_id": df["id"],
    "dim0": [0] * len(df["id"]),
    "dim1": [0] * len(df["id"]),
    "split": ["train"] * len(df["id"]),
  }))).to_csv("./data-640-pseudo/meta.csv", index=False)
pd.concat((
  pd.read_csv("./data-640/meta.csv"),
  pd.DataFrame({
    "image_id": df["id"],
    "dim0": [0] * len(df["id"]),
    "dim1": [0] * len(df["id"]),
    "split": ["train"] * len(df["id"]),
  }))).to_csv("./data-640-pseudo2/meta.csv", index=False)

In [9]:
merge_df = pd.read_csv("./data-640-merge/train_psl_none.csv")
merge_df = merge_df[merge_df["merge"]==True]
merge_df = merge_df[["id", "Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]]
ricord_label_dict = dict(zip(merge_df["id"], merge_df[["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]].to_numpy()))

In [10]:
results_psl = []
results = []
# fold = get_fold_num(state_dict_filename)
# val_df = pd.read_csv(f"./pretrained/42/{fold}.csv")
_, transform = get_study_transform(640)
mask_img_size = get_image_size_from_decoder_blocks(config.unet_smp.decoder_blocks, config.img_size)
dataset = COVIDDataset(root=ROOT, df=df, img_size=640, mask="both", transform=transform, mask_img_size=mask_img_size)


dataloader = DataLoader(dataset, batch_size=4,
                      collate_fn=collate_fn, pin_memory=True, num_workers=16)

test_map = torchmetrics.BinnedAveragePrecision(num_classes=4)
test_map = test_map.to(DEVICE)
for batch in tqdm(dataloader):
  inputs, _, image_id, study_id = get_batch(batch)
  inputs = inputs.to(DEVICE)
  # targets = targets.to(DEVICE)
  
  outputs = model(inputs)
  results += list(zip(image_id, study_id, (F.one_hot(torch.argmax(outputs, dim=1), num_classes=4).cpu().detach().numpy().tolist())))

  ricord_labels = []
  for ii in image_id:
    if f"{ii}_image" in ricord_label_dict:
      ricord_labels.append(ricord_label_dict[f"{ii}_image"])
    else:
      ricord_labels.append([0,0,0,0])
  ricord_labels = torch.tensor(ricord_labels).to(DEVICE)
  results_psl += list(zip(image_id, study_id, (F.one_hot(torch.argmax(torch.softmax(outputs, dim=1) + ricord_labels, dim=1), num_classes=4).cpu().detach().numpy().tolist())))
  # results_psl += list(zip(image_id, study_id, (torch.softmax(outputs, dim=1).cpu().detach().numpy().tolist())))
  
  # test_map(torch.softmax(outputs, dim=1), targets)
# ap = test_map.compute()
# mean_ap = sum(ap) / len(ap)
# print(mean_ap)



  0%|          | 0/538 [00:01<?, ?it/s]

KeyboardInterrupt: 

In [None]:
psl = pd.DataFrame(results).rename(columns={0: "id", 1: "study_id"})
psl[["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]] = psl.pop(2).tolist()
psl["id"] += "_image"
# for col in ["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]:
#   print(psl[col].value_counts())
  
merge_df = pd.read_csv("./data-640-merge/train_psl_none.csv")
merge_df = merge_df[merge_df["merge"]==True]
# for col in ["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]:
#   print(merge_df[col].value_counts())
merge_df = merge_df[merge_df["Indeterminate Appearance"] == 1][["id", "Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]]
ia_dict = dict(zip(merge_df["id"], [[0,0,1,0]] * len(merge_df["id"])))

new_psl = []
for idx, row in psl.iterrows():
  if row["id"] in ia_dict:
    row[["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]] = ia_dict[row["id"]]
  new_psl.append(row)
new_psl = pd.DataFrame(new_psl)
new_psl.to_csv("./data-640-pseudo/train_psl_none.", index=False)

train_df = pd.read_csv("./data-640/train_psl_none.csv")

new_psl["label"] = "none 1 0 0 1 1"
new_psl["StudyInstanceUID"] = new_psl["id"].apply(lambda x: x.replace("_image", "_study"))
new_psl["PatientID"] = new_psl["StudyInstanceUID"]
new_psl["label"] = "none 1 0 0 1 1"

train_df = pd.concat((train_df, new_psl))
train_df["merge"] = train_df["PatientID"] == train_df["StudyInstanceUID"]
train_df.to_csv("./data-640-pseudo/train_psl_none.csv", index=False)

In [None]:
psl = pd.DataFrame(results_psl).rename(columns={0: "id", 1: "study_id"})
psl[["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]] = psl.pop(2).tolist()
psl["id"] += "_image"
# for col in ["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]:
#   print(psl[col].value_counts())
  
merge_df = pd.read_csv("./data-640-merge/train_psl_none.csv")
merge_df = merge_df[merge_df["merge"]==True]
# for col in ["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]:
#   print(merge_df[col].value_counts())
merge_df = merge_df[merge_df["Indeterminate Appearance"] == 1][["id", "Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]]
ia_dict = dict(zip(merge_df["id"], [[0,0,1,0]] * len(merge_df["id"])))

new_psl = []
for idx, row in psl.iterrows():
  if row["id"] in ia_dict:
    row[["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"]] = ia_dict[row["id"]]
  new_psl.append(row)
new_psl = pd.DataFrame(new_psl)
new_psl.to_csv("./data-640-pseudo/train_psl_none.", index=False)

train_df = pd.read_csv("./data-640/train_psl_none.csv")

new_psl["label"] = "none 1 0 0 1 1"
new_psl["StudyInstanceUID"] = new_psl["id"].apply(lambda x: x.replace("_image", "_study"))
new_psl["PatientID"] = new_psl["StudyInstanceUID"]
new_psl["label"] = "none 1 0 0 1 1"

train_df = pd.concat((train_df, new_psl))
train_df["merge"] = train_df["PatientID"] == train_df["StudyInstanceUID"]
train_df.to_csv("./data-640-pseudo2/train_psl_none.csv", index=False)