### TODOs

- [x] Inference Loop
- [ ] Oracle?
- [ ] Finetune
- [ ] Spottune
- [x] Voxelization NaN to num
- [x] Precompute 3rd order sample voxel spacing upfront


#### Augs?
- [ ] LAMB
- [ ] Label Smoothing
- [ ] Stoch. depth
- [ ] CutMix / MixUp
- [ ] Hflip? (prob not)
- [ ] RandomResizedCrop
- [ ] Rand Augment


In [2]:
from spottunet.dataset.cc359 import *
from spottunet.split import one2all
from spottunet.torch.module.unet import UNet2D
from spottunet.utils import sdice
from dpipe.im.metrics import dice_score

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.tensorboard import SummaryWriter
from torch.cuda.amp import autocast
from torch.cuda.amp import GradScaler 

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

from PIL import Image

from monai import transforms as T
from monai.transforms import Compose, apply_transform
from fastprogress.fastprogress import master_bar, progress_bar

import json
import nibabel as nib
import pandas as pd
import numpy as np
from scipy import ndimage
from dpipe.im.shape_ops import zoom
import cv2
import os
import gc
from collections import defaultdict
from pathlib import Path
import segmentation_models_pytorch as smp

import matplotlib.pyplot as plt

### Config & Logging

In [3]:
import wandb
from configs.config import CFG
from utils import *

def class2dict(f):
    return dict((name, getattr(f, name)) for name in dir(f) if not name.startswith('__'))



In [4]:
from dataset.dataloader import *
from dataset.loader import *
from dataset.dataloader_utils import *

In [4]:
"""cc359_df = pd.read_csv(f"{CFG.dataset_path}/meta.csv",delimiter=",")
seed = 0xBadCafe
val_size = 4
n_experiments = len(cc359_df.fold.unique())
split = one2all(df=cc359_df,val_size=val_size)[:n_experiments]
train_df = cc359_df.iloc[split[0][0]].reset_index()

train_dataset = CC359_Dataset(df=train_df,root_dir=CFG.dataset_path,
                              voxel_spacing=CFG.voxel_spacing,transforms=tfms,
                              mode="train", cache=True)

valid_df = cc359_df.iloc[split[0][1]].reset_index()

valid = CC359_Dataset(df=valid_df,root_dir=CFG.dataset_path,
                              voxel_spacing=CFG.voxel_spacing,transforms=None,
                              mode="val", cache=False)"""

'cc359_df = pd.read_csv(f"{CFG.dataset_path}/meta.csv",delimiter=",")\nseed = 0xBadCafe\nval_size = 4\nn_experiments = len(cc359_df.fold.unique())\nsplit = one2all(df=cc359_df,val_size=val_size)[:n_experiments]\ntrain_df = cc359_df.iloc[split[0][0]].reset_index()\n\ntrain_dataset = CC359_Dataset(df=train_df,root_dir=CFG.dataset_path,\n                              voxel_spacing=CFG.voxel_spacing,transforms=tfms,\n                              mode="train", cache=True)\n\nvalid_df = cc359_df.iloc[split[0][1]].reset_index()\n\nvalid = CC359_Dataset(df=valid_df,root_dir=CFG.dataset_path,\n                              voxel_spacing=CFG.voxel_spacing,transforms=None,\n                              mode="val", cache=False)'

In [5]:
"""x,y,_id = next(iter(valid))
plt.imshow(x[180].squeeze(), "gray")
plt.show()
plt.imshow(y[180])
plt.show()"""

'x,y,_id = next(iter(valid))\nplt.imshow(x[180].squeeze(), "gray")\nplt.show()\nplt.imshow(y[180])\nplt.show()'

In [6]:
"""x,y = next(iter(train_dataset))
plt.imshow(train_dataset.shared_array[520], "gray")
plt.show()
plt.imshow(train_dataset.shared_array2[520])
plt.show()"""

'x,y = next(iter(train_dataset))\nplt.imshow(train_dataset.shared_array[520], "gray")\nplt.show()\nplt.imshow(train_dataset.shared_array2[520])\nplt.show()'

### Training

In [7]:
from trainer import Trainer
from dataset.loader import *

In [8]:

def run_fold(fold):
    result_dir = CFG.results_dir + "/mode_"+str(fold)
    os.makedirs(result_dir, exist_ok=True)
    #wandb.tensorboard.patch(root_logdir=result_dir+"/logs")
    run = wandb.init(project="domain_shift",
                     group=CFG.model_name,
                     name=f"mode_{str(fold)}",
                     job_type="rand_slice3",
                     config=class2dict(CFG),
                     reinit=True,
                     sync_tensorboard=True)
    
    writer = SummaryWriter(log_dir=result_dir+"/logs")
    
    cc359_df = pd.read_csv(f"{CFG.dataset_path}/meta.csv",delimiter=",")
    

    #model = smp.Unet(encoder_name="resnet50", encoder_weights="imagenet", in_channels=CFG.n_chans_in,classes=CFG.n_chans_out)

    model = UNet2D(n_chans_in=CFG.n_chans_in, n_chans_out=CFG.n_chans_out, n_filters_init=CFG.n_filters)
    model.to(CFG.device)
    
    #optimizer = CFG.optim(model.parameters(),lr=CFG.lr,weight_decay=CFG.wd)
    optimizer = CFG.optim(model.parameters(), lr=CFG.lr, weight_decay=CFG.wd, betas=(.9, .999), adam=False)
    #scheduler = CFG.scheduler(optimizer, lr_lambda=lambda epoch: CFG.scheduler_multi_lr_fact )
    criterion = CFG.crit

    seed = 0xBadCafe
    val_size = 4
    n_experiments = len(cc359_df.fold.unique())
    split = one2all(df=cc359_df,val_size=val_size)[:n_experiments]


    train_df = cc359_df.iloc[split[fold][0]].reset_index()
    valid_df = cc359_df.iloc[split[fold][1]].reset_index()
    test_df  = cc359_df.iloc[split[fold][2]].reset_index()

    print("Caching Train Data ...")
    
    sa_x,sa_y = create_shared_arrays(CFG,train_df,root_dir=CFG.dataset_path)
    train_dataset = CC359_Dataset(CFG,df=train_df,root_dir=CFG.dataset_path,
                                  voxel_spacing=CFG.voxel_spacing,transforms=tfms,
                                  mode="train", cache=True, cached_x=sa_x, cached_y=sa_y)
    
    valid_dataset = CC359_Dataset(CFG,df=valid_df,root_dir=CFG.dataset_path,
                                  voxel_spacing=CFG.voxel_spacing,
                                  transforms=None,mode="val", cache=False)
    test_dataset = CC359_Dataset(CFG,df=test_df,root_dir=CFG.dataset_path,
                                 voxel_spacing=CFG.voxel_spacing,
                                  transforms=None,mode="test", cache=False)
    
    train_loader = PrefetchLoader(DataLoader(train_dataset,
                                              batch_size=CFG.bs,
                                              shuffle=True,
                                              num_workers=CFG.num_workers,
                                              sampler=None,
                                              collate_fn=fast_collate,
                                              pin_memory=False,
                                              drop_last=True),
                                  fp16=CFG.fp16)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=1,shuffle=False,
                              num_workers=1,pin_memory=True)
    test_dataloader = DataLoader(test_dataset, 
                                  batch_size=1,shuffle=False,
                                  num_workers=1,pin_memory=False)

    from torch_lr_finder import LRFinder
    lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
    lr_finder.range_test(train_loader, end_lr=100, num_iter=100)
    lr_finder.plot() # to inspect the loss-learning rate graph
    lr_finder.reset()
    
    """trainer = Trainer(CFG,
                      model=model, 
                      device=CFG.device, 
                      optimizer=optimizer,
                      scheduler=scheduler,
                      criterion=criterion,
                      writer=writer,
                      fold=fold,
                      max_norm=CFG.max_norm)
    
    history = trainer.fit(
            CFG.epochs, 
            train_loader, 
            valid_loader, 
            f"{result_dir}/", 
            CFG.epochs,
        )
    trainer.test(test_dataloader,result_dir)
    td_sdice = get_target_domain_metrics(CFG.dataset_path,Path(CFG.results_dir),fold)
    #writer.add_hparams(class2dict(CFG),td_sdice)
    wandb.log(td_sdice)
    writer.close()
    run.finish()

    del trainer
    del train_loader
    del valid_loader
    del train_dataset
    del valid_dataset
    gc.collect()"""

In [None]:
for fold in CFG.fold:
    run_fold(fold)

In [None]:
%debug

In [None]:
fold = 0
model = UNet2D(n_chans_in=CFG.n_chans_in, n_chans_out=CFG.n_chans_out, n_filters_init=CFG.n_filters)
model.to(CFG.device)

model.load_state_dict(torch.load(f"baseline6/mode_0/e1-loss3.043.pth")["model_state_dict"])

cc359_df = pd.read_csv(f"{CFG.dataset_path}/meta.csv",delimiter=",")
seed = 0xBadCafe
val_size = 4
n_experiments = len(cc359_df.fold.unique())
split = one2all(df=cc359_df,val_size=val_size)[:n_experiments]

In [None]:
train_df = cc359_df.iloc[split[fold][0]].reset_index()

print("Caching Train Data ...")

sa_x,sa_y = create_shared_arrays(train_df,root_dir=CFG.dataset_path)
train_dataset = CC359_Dataset(df=train_df,root_dir=CFG.dataset_path,
                              voxel_spacing=CFG.voxel_spacing,transforms=tfms,
                              mode="train", cache=True, cached_x=sa_x, cached_y=sa_y)


train_loader = PrefetchLoader(DataLoader(train_dataset,
                                              batch_size=CFG.bs,
                                              shuffle=True,
                                              num_workers=CFG.num_workers,
                                              sampler=None,
                                              collate_fn=fast_collate,
                                              pin_memory=False,
                                              drop_last=True),
                                  fp16=CFG.fp16)

In [None]:
def test_run(fold):
    result_dir = CFG.results_dir + "/mode_"+str(fold)
    os.makedirs(result_dir, exist_ok=True)
    
    cc359_df = pd.read_csv(f"{CFG.dataset_path}/meta.csv",delimiter=",")

    model = UNet2D(n_chans_in=CFG.n_chans_in, n_chans_out=CFG.n_chans_out, n_filters_init=CFG.n_filters)
    #model = smp.Unet(encoder_name="resnet50", encoder_weights="swsl", in_channels=CFG.n_chans_in,classes=CFG.n_chans_out)
    model.load_state_dict(torch.load(f"{result_dir}/mode_{fold}_best_epoch_model.pth")["model_state_dict"])
    
    model.to(CFG.device)


    seed = 0xBadCafe
    val_size = 4
    n_experiments = len(cc359_df.fold.unique())
    split = one2all(df=cc359_df,val_size=val_size)[:n_experiments]

    test_df  = cc359_df.iloc[split[fold][2]].reset_index()

    test_dataset = CC359_Dataset(CFG,df=test_df,root_dir=CFG.dataset_path,
                                  voxel_spacing=CFG.voxel_spacing,transforms=None,
                                  mode="test", cache=False)
    trainer = Trainer(CFG,model,CFG.device, None,None,None,None,fold,CFG.max_norm)

    test_dataloader = DataLoader(test_dataset, 
                                  batch_size=1,shuffle=False,
                                  num_workers=0,pin_memory = False)

    trainer.test(test_dataloader, result_dir)
    
#for fold in CFG.fold:
test_run(4)

In [10]:
from pathlib import Path

import numpy as np
import pandas as pd

from dpipe.io import load

path_base = Path('baseline_results/baseline16')

meta = pd.read_csv(f"meta.csv",delimiter=",", index_col='id')
meta.head()

records = []
for s in sorted(meta['fold'].unique()):
    res_row = {}
    
    # one2all results:
    sdices = load(path_base / f'mode_{s}/sdice_score.json')
    #sdices = dict(sorted(sdices.items()))
    for t in sorted(set(meta['fold'].unique()) - {s}):
        df_row = meta[meta['fold'] == t].iloc[0]
        target_name = df_row['tomograph_model'] + str(df_row['tesla_value'])
        
        ids_t = meta[meta['fold'] == t].index
        res_row[target_name] = np.mean([sdsc for _id, sdsc in sdices.items() if _id in ids_t])
    print(res_row)
    df_row = meta[meta['fold'] == s].iloc[0]
    source_name = df_row['tomograph_model'] + str(df_row['tesla_value'])
    sdices = {}
    #for n_val in range(3):
    #    sdices = {**sdices,
    #              **load(path_base / f'mode_{s}/sdice_score.json')}
    #res_row[source_name] = np.mean(list(sdices.values()))

    res_row[' '] = source_name
    records.append(res_row)
df = pd.DataFrame.from_records(records, index=' ')
df[df.index]

{'siemens3': 0.8862069239613072, 'ge15': 0.7441230970897291, 'ge3': 0.8912394862975026, 'philips15': 0.8970851070655189, 'philips3': 0.8286721599408774}
{'siemens15': 0.793814857791286, 'ge15': 0.5653082755803079, 'ge3': 0.8225196980954298, 'philips15': 0.8261902887021051, 'philips3': 0.7110882322227293}
{'siemens15': 0.8891796363359588, 'siemens3': 0.8272426106209404, 'ge3': 0.8278275451776236, 'philips15': 0.910006792778337, 'philips3': 0.8410205023556002}
{'siemens15': 0.7821464758318191, 'siemens3': 0.7399309079510205, 'ge15': 0.5524823019288387, 'philips15': 0.8087147676361184, 'philips3': 0.4670436739161707}
{'siemens15': 0.8873857290765282, 'siemens3': 0.8701553290248403, 'ge15': 0.734153952054563, 'ge3': 0.8284189264340683, 'philips3': 0.7408041083601975}
{'siemens15': 0.8689110423814996, 'siemens3': 0.846955769289682, 'ge15': 0.863288075647754, 'ge3': 0.8199882577969172, 'philips15': 0.8481256074157847}


Unnamed: 0,siemens15,siemens3,ge15,ge3,philips15,philips3
,,,,,,
siemens15,,0.886207,0.744123,0.891239,0.897085,0.828672
siemens3,0.793815,,0.565308,0.82252,0.82619,0.711088
ge15,0.88918,0.827243,,0.827828,0.910007,0.841021
ge3,0.782146,0.739931,0.552482,,0.808715,0.467044
philips15,0.887386,0.870155,0.734154,0.828419,,0.740804
philips3,0.868911,0.846956,0.863288,0.819988,0.848126,
