# LOO

## import libraries

In [1]:
import os
import pickle
import random
from glob import glob
from time import sleep

import numpy as np
import pandas as pd
import timm
import torch
import torch.nn as nn
import torch.optim as optim
from albumentations import (CenterCrop, Compose, HorizontalFlip, Normalize,
                            RandomCrop, VerticalFlip)
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, RandomSampler
from tqdm import tqdm

from clmodel.dataset import CLModelDataset
from clmodel.evaluate import Metrics, macro_auroc, macro_balanced_accuracy
from clmodel.model import FrozenEffnetB4Model
from clmodel.train import train_loop
from clmodel.utils import AverageValue, Logger, fix_seed

## set seeds

In [2]:
seed = 123
fix_seed(seed)


def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)


g = torch.Generator()
g.manual_seed(seed)


<torch._C.Generator at 0x7f738f524eb0>

## load data

In [3]:
df = pd.read_csv("../../data/TGGATEs/processed/train_val_for_model_training.csv")


In [4]:
ft_list = list(df.columns[3:11])
ft_list


['Proliferation, bile duct',
 'Ground glass appearance',
 'Increased mitosis',
 'Inclusion body, intracytoplasmic',
 'Deposit, pigment',
 'Single cell necrosis',
 'Vacuolization, cytoplasmic',
 'Swelling']

## image preprocessing

In [5]:
image_size = 512

tr_transform = Compose(
    [
        # RandomCrop(image_size, image_size),
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        Normalize(),
        ToTensorV2(),
    ]
)
vl_transform = Compose(
    [Normalize(), ToTensorV2()]  # CenterCrop(image_size, image_size),
)


## create image_dict

In [6]:
n_epochs = 10

tr = df[df["fold"] != 2]
vl = df[df["fold"] == 2]

ft_dict = {v[0]: v[1:] for v in df[["path"] + ft_list].to_numpy()}

train_dataset = CLModelDataset(
    tr["path"].values, None, transform=RandomCrop(image_size, image_size)
)
train_loader = DataLoader(
    train_dataset,
    num_workers=4,
    batch_size=16,
    sampler=RandomSampler(
        train_dataset, num_samples=len(train_dataset) // 10, replacement=True
    ),
    pin_memory=True,
    drop_last=True,
    worker_init_fn=seed_worker,
    generator=g,
)

new_tr = []
image_dict = {}
count = 0
for epoch in range(n_epochs):
    for x, pathes in tqdm(train_loader):
        for i in range(16):
            image_dict[f"dummy/dummy/{count}"] = x[i].numpy()
            new_tr.append([f"dummy/dummy/{count}"] + list(ft_dict[pathes[i]]))
            count += 1
    # with open("../../temp/cache_image_dict_new_tr.pickle", "wb") as f:
    #     pickle.dump((image_dict, new_tr), f)
new_tr = pd.DataFrame(new_tr, columns=["path"] + ft_list)


100%|██████████| 321/321 [24:20<00:00,  4.55s/it]
100%|██████████| 321/321 [21:46<00:00,  4.07s/it]
100%|██████████| 321/321 [19:52<00:00,  3.71s/it]
100%|██████████| 321/321 [17:33<00:00,  3.28s/it]
100%|██████████| 321/321 [16:27<00:00,  3.08s/it]
100%|██████████| 321/321 [14:51<00:00,  2.78s/it]
100%|██████████| 321/321 [13:23<00:00,  2.50s/it]
100%|██████████| 321/321 [12:26<00:00,  2.32s/it]
100%|██████████| 321/321 [13:52<00:00,  2.59s/it]
100%|██████████| 321/321 [14:59<00:00,  2.80s/it]


In [7]:
valid_dataset = CLModelDataset(
    vl["path"].values,
    None,
    transform=CenterCrop(image_size, image_size),
)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=32,
    drop_last=False,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
)
new_vl = []
for x, pathes in tqdm(valid_loader):
    for i in range(len(x)):
        image_dict[f"dummy/dummy/{count}"] = x[i].numpy()
        new_vl.append([f"dummy/dummy/{count}"] + list(ft_dict[pathes[i]]))
        count += 1
new_vl = pd.DataFrame(new_vl, columns=["path"] + ft_list)


100%|██████████| 536/536 [1:22:35<00:00,  9.25s/it]
  7%|▋         | 40/536 [07:11<1:29:06, 10.78s/it]


KeyboardInterrupt: 

In [9]:
new_vl = pd.DataFrame(new_vl[:len(vl)], columns=["path"] + ft_list)


## train

In [14]:
for ft in ft_list[4:]:
    print(f"==============={ft}=============")
    loo_ft_list = [f for f in ft_list if f != ft]
    train_dataset = CLModelDataset(
        new_tr["path"].values,
        new_tr[loo_ft_list].values >= 0.5,
        image_dict,
        transform=tr_transform,
        length=len(tr) // 10,
        cache_mode=True,
    )
    valid_dataset = CLModelDataset(
        new_vl["path"].values,
        new_vl[loo_ft_list].values >= 0.5,
        image_dict,
        transform=vl_transform,
    )

    train_loader = DataLoader(
        train_dataset,
        num_workers=4,
        batch_size=16,
        shuffle=False,
        pin_memory=True,
        drop_last=False,
    )
    valid_loader = DataLoader(
        valid_dataset,
        num_workers=4,
        batch_size=32,
        shuffle=False,
        pin_memory=True,
        drop_last=False,
    )

    criterion = nn.BCEWithLogitsLoss()

    n_epochs = 10

    out_dir = f"../../outputs/230305TGGATEs_ft_loo_{ft}_seed123"
    os.system(f'mkdir "{out_dir}"')

    for depth in range(9):
        print(f"=================Depth {depth}===================")
        if depth >= 8:
            model = timm.create_model(
                "tf_efficientnet_b4_ns", pretrained=True, num_classes=len(loo_ft_list)
            )
            if depth == 9:
                depth = "8_10epochs"
                n_epochs = 10
        else:
            model = FrozenEffnetB4Model(depth, len(loo_ft_list))

        model.to("cuda")
        optimizer = optim.Adam(model.parameters(), lr=5e-4)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 10, 1e-6)

        metrics = [
            Metrics("macro AUROC", macro_auroc, "+"),
            Metrics("macro balanced accuracy", macro_balanced_accuracy, "+"),
        ]
        os.system(f'mkdir "{out_dir}/{depth}"')
        res = train_loop(
            model,
            train_loader,
            valid_loader,
            0,
            criterion,
            optimizer,
            "cuda",
            n_epochs,
            scheduler,
            metrics,
            f"{out_dir}/{depth}",
            f"effnetb4_freeze{depth}",
            preprocess=lambda x: x.sigmoid(),
            verbose=100,
            logger=Logger(),
        )

        with open(f"{out_dir}/{depth}/result.pickle", "wb") as f:
            pickle.dump(res, f)
        sleep(100)



mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123’: File exists


Epoch 1


mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123/0’: File exists


Step: 1/321 Loss: 0.6942 Elapsed time 3.0 Rest time 965.8
Step: 101/321 Loss: 0.2928 Elapsed time 8.3 Rest time 18.0
Step: 201/321 Loss: 0.2573 Elapsed time 13.2 Rest time 7.9
Step: 301/321 Loss: 0.2385 Elapsed time 19.6 Rest time 1.3
Step: 321/321 Loss: 0.2360 Elapsed time 20.9 Rest time 0.0
Step: 1/536 Loss: 0.2402 Elapsed time 0.7 Rest time 361.4
Step: 101/536 Loss: 0.2201 Elapsed time 15.5 Rest time 66.8
Step: 201/536 Loss: 0.2169 Elapsed time 30.6 Rest time 51.0
Step: 301/536 Loss: 0.2176 Elapsed time 45.1 Rest time 35.2
Step: 401/536 Loss: 0.2177 Elapsed time 59.6 Rest time 20.1
Step: 501/536 Loss: 0.2171 Elapsed time 74.2 Rest time 5.2
Step: 536/536 Loss: 0.2169 Elapsed time 79.1 Rest time 0.0
macro AUROC : 0.8355
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.624
This is best macro balanced accuracy.
saved model.
loss : 0.2169
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1961 Elapsed time 0.3 Rest time 110.0
Step: 101/321 Loss: 0.1921 Elapsed 

mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123/1’: File exists


Step: 1/321 Loss: 0.6935 Elapsed time 0.4 Rest time 120.6
Step: 101/321 Loss: 0.2732 Elapsed time 5.7 Rest time 12.3
Step: 201/321 Loss: 0.2377 Elapsed time 10.9 Rest time 6.5
Step: 301/321 Loss: 0.2168 Elapsed time 16.4 Rest time 1.1
Step: 321/321 Loss: 0.2141 Elapsed time 17.4 Rest time 0.0
Step: 1/536 Loss: 0.2287 Elapsed time 0.5 Rest time 257.6
Step: 101/536 Loss: 0.2072 Elapsed time 15.0 Rest time 64.5
Step: 201/536 Loss: 0.2028 Elapsed time 29.5 Rest time 49.1
Step: 301/536 Loss: 0.2041 Elapsed time 44.0 Rest time 34.4
Step: 401/536 Loss: 0.2043 Elapsed time 58.6 Rest time 19.7
Step: 501/536 Loss: 0.2042 Elapsed time 73.1 Rest time 5.1
Step: 536/536 Loss: 0.2037 Elapsed time 78.1 Rest time 0.0
macro AUROC : 0.8588
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.648
This is best macro balanced accuracy.
saved model.
loss : 0.2037
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1494 Elapsed time 0.4 Rest time 119.0
Step: 101/321 Loss: 0.1639 Elapsed 

mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123/2’: File exists


Step: 1/321 Loss: 0.6895 Elapsed time 0.4 Rest time 132.4
Step: 101/321 Loss: 0.2590 Elapsed time 11.3 Rest time 24.6
Step: 201/321 Loss: 0.2270 Elapsed time 18.8 Rest time 11.2
Step: 301/321 Loss: 0.2067 Elapsed time 26.3 Rest time 1.7
Step: 321/321 Loss: 0.2043 Elapsed time 27.8 Rest time 0.0
Step: 1/536 Loss: 0.2074 Elapsed time 0.5 Rest time 264.4
Step: 101/536 Loss: 0.1902 Elapsed time 15.0 Rest time 64.6
Step: 201/536 Loss: 0.1877 Elapsed time 29.5 Rest time 49.2
Step: 301/536 Loss: 0.1889 Elapsed time 44.1 Rest time 34.4
Step: 401/536 Loss: 0.1890 Elapsed time 58.6 Rest time 19.7
Step: 501/536 Loss: 0.1884 Elapsed time 73.2 Rest time 5.1
Step: 536/536 Loss: 0.1879 Elapsed time 78.2 Rest time 0.0
macro AUROC : 0.8854
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.6682
This is best macro balanced accuracy.
saved model.
loss : 0.1879
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1506 Elapsed time 0.4 Rest time 122.3
Step: 101/321 Loss: 0.1502 Elaps

mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123/3’: File exists


Step: 1/321 Loss: 0.6777 Elapsed time 0.4 Rest time 139.2
Step: 101/321 Loss: 0.2521 Elapsed time 12.7 Rest time 27.7
Step: 201/321 Loss: 0.2185 Elapsed time 21.7 Rest time 12.9
Step: 301/321 Loss: 0.1987 Elapsed time 30.6 Rest time 2.0
Step: 321/321 Loss: 0.1965 Elapsed time 32.3 Rest time 0.0
Step: 1/536 Loss: 0.2243 Elapsed time 0.5 Rest time 257.4
Step: 101/536 Loss: 0.2055 Elapsed time 15.0 Rest time 64.6
Step: 201/536 Loss: 0.2023 Elapsed time 29.5 Rest time 49.2
Step: 301/536 Loss: 0.2034 Elapsed time 44.0 Rest time 34.4
Step: 401/536 Loss: 0.2031 Elapsed time 58.7 Rest time 19.7
Step: 501/536 Loss: 0.2028 Elapsed time 73.2 Rest time 5.1
Step: 536/536 Loss: 0.2020 Elapsed time 78.1 Rest time 0.0
macro AUROC : 0.8914
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.7006
This is best macro balanced accuracy.
saved model.
loss : 0.202
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1395 Elapsed time 0.4 Rest time 135.0
Step: 101/321 Loss: 0.1428 Elapse

mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123/4’: File exists


Step: 1/321 Loss: 0.6934 Elapsed time 0.5 Rest time 146.2
Step: 101/321 Loss: 0.2514 Elapsed time 10.8 Rest time 23.5
Step: 201/321 Loss: 0.2170 Elapsed time 21.3 Rest time 12.7
Step: 301/321 Loss: 0.1989 Elapsed time 31.6 Rest time 2.1
Step: 321/321 Loss: 0.1967 Elapsed time 33.7 Rest time 0.0
Step: 1/536 Loss: 0.1779 Elapsed time 0.5 Rest time 262.3
Step: 101/536 Loss: 0.1803 Elapsed time 15.0 Rest time 64.6
Step: 201/536 Loss: 0.1791 Elapsed time 29.5 Rest time 49.2
Step: 301/536 Loss: 0.1808 Elapsed time 44.1 Rest time 34.4
Step: 401/536 Loss: 0.1811 Elapsed time 58.6 Rest time 19.7
Step: 501/536 Loss: 0.1808 Elapsed time 73.1 Rest time 5.1
Step: 536/536 Loss: 0.1801 Elapsed time 78.1 Rest time 0.0
macro AUROC : 0.8896
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.6769
This is best macro balanced accuracy.
saved model.
loss : 0.1801
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1519 Elapsed time 0.4 Rest time 139.5
Step: 101/321 Loss: 0.1383 Elaps

mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123/5’: File exists


Step: 1/321 Loss: 0.6924 Elapsed time 0.5 Rest time 153.6
Step: 101/321 Loss: 0.2499 Elapsed time 16.1 Rest time 35.1
Step: 201/321 Loss: 0.2152 Elapsed time 28.6 Rest time 17.1
Step: 301/321 Loss: 0.1988 Elapsed time 41.0 Rest time 2.7
Step: 321/321 Loss: 0.1967 Elapsed time 43.6 Rest time 0.0
Step: 1/536 Loss: 0.1769 Elapsed time 0.5 Rest time 261.7
Step: 101/536 Loss: 0.1799 Elapsed time 15.0 Rest time 64.7
Step: 201/536 Loss: 0.1805 Elapsed time 29.5 Rest time 49.2
Step: 301/536 Loss: 0.1812 Elapsed time 44.1 Rest time 34.4
Step: 401/536 Loss: 0.1817 Elapsed time 58.6 Rest time 19.7
Step: 501/536 Loss: 0.1811 Elapsed time 73.2 Rest time 5.1
Step: 536/536 Loss: 0.1802 Elapsed time 78.1 Rest time 0.0
macro AUROC : 0.889
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.6765
This is best macro balanced accuracy.
saved model.
loss : 0.1802
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1370 Elapsed time 0.7 Rest time 240.0
Step: 101/321 Loss: 0.1401 Elapse

mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123/6’: File exists


Step: 1/321 Loss: 0.6911 Elapsed time 0.5 Rest time 163.3
Step: 101/321 Loss: 0.2530 Elapsed time 19.3 Rest time 42.0
Step: 201/321 Loss: 0.2193 Elapsed time 35.8 Rest time 21.4
Step: 301/321 Loss: 0.1991 Elapsed time 54.7 Rest time 3.6
Step: 321/321 Loss: 0.1971 Elapsed time 58.0 Rest time 0.0
Step: 1/536 Loss: 0.1785 Elapsed time 0.5 Rest time 262.3
Step: 101/536 Loss: 0.1835 Elapsed time 15.0 Rest time 64.7
Step: 201/536 Loss: 0.1816 Elapsed time 29.7 Rest time 49.5
Step: 301/536 Loss: 0.1821 Elapsed time 44.4 Rest time 34.7
Step: 401/536 Loss: 0.1826 Elapsed time 58.9 Rest time 19.8
Step: 501/536 Loss: 0.1821 Elapsed time 74.0 Rest time 5.2
Step: 536/536 Loss: 0.1812 Elapsed time 79.1 Rest time 0.0
macro AUROC : 0.8973
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.6892
This is best macro balanced accuracy.
saved model.
loss : 0.1812
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1603 Elapsed time 0.5 Rest time 163.3
Step: 101/321 Loss: 0.1404 Elaps

mkdir: cannot create directory ‘../../outputs/230305TGGATEs_ft_loo_Deposit, pigment_seed123/7’: File exists


Step: 1/321 Loss: 0.6955 Elapsed time 0.5 Rest time 167.9
Step: 101/321 Loss: 0.2518 Elapsed time 19.5 Rest time 42.5
Step: 201/321 Loss: 0.2194 Elapsed time 38.9 Rest time 23.2
Step: 301/321 Loss: 0.2024 Elapsed time 57.0 Rest time 3.8
Step: 321/321 Loss: 0.1994 Elapsed time 62.1 Rest time 0.0
Step: 1/536 Loss: 0.1686 Elapsed time 0.7 Rest time 374.0
Step: 101/536 Loss: 0.1868 Elapsed time 15.3 Rest time 65.7
Step: 201/536 Loss: 0.1846 Elapsed time 29.8 Rest time 49.7
Step: 301/536 Loss: 0.1863 Elapsed time 44.4 Rest time 34.6
Step: 401/536 Loss: 0.1873 Elapsed time 59.1 Rest time 19.9
Step: 501/536 Loss: 0.1870 Elapsed time 73.7 Rest time 5.1
Step: 536/536 Loss: 0.1860 Elapsed time 78.6 Rest time 0.0
macro AUROC : 0.8874
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.7087
This is best macro balanced accuracy.
saved model.
loss : 0.186
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1444 Elapsed time 0.5 Rest time 166.5
Step: 101/321 Loss: 0.1441 Elapse

## train with the all pathological findings

In [16]:
train_dataset = CLModelDataset(
    new_tr["path"].values,
    new_tr[ft_list].values >= 0.5,
    image_dict,
    transform=tr_transform,
    length=len(tr) // 10,
    cache_mode=True,
)
valid_dataset = CLModelDataset(
    new_vl["path"].values,
    new_vl[ft_list].values >= 0.5,
    image_dict,
    transform=vl_transform,
)

train_loader = DataLoader(
    train_dataset,
    num_workers=4,
    batch_size=16,
    shuffle=False,
    pin_memory=True,
    drop_last=False,
)
valid_loader = DataLoader(
    valid_dataset,
    num_workers=4,
    batch_size=32,
    shuffle=False,
    pin_memory=True,
    drop_last=False,
)

criterion = nn.BCEWithLogitsLoss()

n_epochs = 10

out_dir = "../../outputs/230305TGGATEs_model_seed123"
os.system(f'mkdir "{out_dir}"')

for depth in range(9):
    print(f"=================Depth {depth}===================")
    if depth >= 8:
        model = timm.create_model(
            "tf_efficientnet_b4_ns", pretrained=True, num_classes=len(ft_list)
        )
        if depth == 9:
            depth = "8_10epochs"
            n_epochs = 10
    else:
        model = FrozenEffnetB4Model(depth, len(ft_list))

    model.to("cuda")
    optimizer = optim.Adam(model.parameters(), lr=5e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 10, 1e-6)

    metrics = [
        Metrics("macro AUROC", macro_auroc, "+"),
        Metrics("macro balanced accuracy", macro_balanced_accuracy, "+"),
    ]
    os.system(f'mkdir "{out_dir}/{depth}"')
    res = train_loop(
        model,
        train_loader,
        valid_loader,
        0,
        criterion,
        optimizer,
        "cuda",
        n_epochs,
        scheduler,
        metrics,
        f"{out_dir}/{depth}",
        f"effnetb4_freeze{depth}",
        preprocess=lambda x: x.sigmoid(),
        verbose=100,
        logger=Logger(),
    )

    with open(f"{out_dir}/{depth}/result.pickle", "wb") as f:
        pickle.dump(res, f)
    sleep(100)

mkdir: cannot create directory ‘../../outputs/230305TGGATEs_model_seed123’: File exists


Epoch 1


mkdir: cannot create directory ‘../../outputs/230305TGGATEs_model_seed123/0’: File exists


Step: 1/321 Loss: 0.6897 Elapsed time 0.4 Rest time 122.7
Step: 101/321 Loss: 0.2808 Elapsed time 5.0 Rest time 10.8
Step: 201/321 Loss: 0.2456 Elapsed time 9.5 Rest time 5.7
Step: 301/321 Loss: 0.2279 Elapsed time 14.0 Rest time 0.9
Step: 321/321 Loss: 0.2255 Elapsed time 14.9 Rest time 0.0
Step: 1/536 Loss: 0.2227 Elapsed time 0.5 Rest time 270.3
Step: 101/536 Loss: 0.2034 Elapsed time 14.9 Rest time 64.3
Step: 201/536 Loss: 0.2005 Elapsed time 29.4 Rest time 49.0
Step: 301/536 Loss: 0.2008 Elapsed time 44.2 Rest time 34.5
Step: 401/536 Loss: 0.2011 Elapsed time 58.8 Rest time 19.8
Step: 501/536 Loss: 0.2005 Elapsed time 73.4 Rest time 5.1
Step: 536/536 Loss: 0.2004 Elapsed time 78.3 Rest time 0.0
macro AUROC : 0.8477
This is best macro AUROC.
saved model.
macro balanced accuracy : 0.6155
This is best macro balanced accuracy.
saved model.
loss : 0.2004
This is best loss.
saved model.
Epoch 2
Step: 1/321 Loss: 0.1777 Elapsed time 0.4 Rest time 122.1
Step: 101/321 Loss: 0.1838 Elapsed 