# 

In [None]:
import sys
import datetime
import random
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from iterstrat.ml_stratifiers import (
    MultilabelStratifiedKFold
)
from sklearn.metrics import log_loss
from sklearn.utils.class_weight import compute_class_weight

# import PyTorch Modules
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.optim import lr_scheduler
from torch.utils.data import (
    Subset
)

# import PyTorch Modules for ComputerVision.
import torchvision
torchvision.disable_beta_transforms_warning()
import torchvision.transforms.v2 as transforms
import torchvision.transforms.v2.functional as F
from torchvision.io import read_image
from torchvision.models import resnet18, efficientnet_v2_s, vit_b_16, swin_v2_s
from torcheval.metrics import MulticlassAUROC

sys.path.append("../src/")
plt.rcParams["savefig.bbox"] = 'tight'
plt.ion()   # interactive mode
cudnn.benchmark = True
from timm.scheduler import CosineLRScheduler


In [None]:
# my modules for RSNA2023.
from dataset_handler import AbdominalTraumaDataset, make_datalodaers
from model import MultiAbdominalTraumaClassifier
from metric import score, add_sample_weights, make_submission_file


In [None]:
# Train Parameters #
start_time = datetime.datetime.today().strftime("%Y%m%d%H%M")
save_dir = Path(rf"D:\RSNA2023\data\out\models\baseline\{start_time}")
save_dir.mkdir(parents=True, exist_ok=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
n_split = 4
random_state = 0

# for image
stride = 4
imgsize = 384

train_transform = nn.Sequential(
    transforms.Resize((imgsize, imgsize), antialias=True),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomAdjustSharpness(1.80, p=0.5),
    transforms.RandomErasing(p=0.5, scale=(0.001, 0.005), ratio=(1, 1)),
    transforms.ConvertDtype(),
    transforms.Normalize([0.0, 0.0, 0.0], [1.0, 1.0, 1.0])
)

valid_transform = nn.Sequential(
    transforms.Resize((imgsize, imgsize), antialias=True),
    transforms.ConvertDtype(),
    transforms.Normalize([0.0, 0.0, 0.0], [1.0, 1.0, 1.0])
)

criterions_weights = {
    "bowel_injury": 1,
    "extravasation_injury": 3,
    "kidney_injury": 3,
    "liver_injury": 3,
    "spleen_injury": 3,
    "any_injury": 0.5,
    "incomplete_organ": 0.5
}

num_epoch = 30
learning_rate = 1e-4
warmup_t = 3
warmup_lr_init = 1e-8
warmup_prefix = True
batch_size = 32

_optimizer = optim.AdamW
backbone = resnet18
b_weights = "IMAGENET1K_V1"

# ---------------- #


In [None]:
# utilities.
def torch_fix_seed(seed:int=0):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.use_deterministic_algorithms = True


torch_fix_seed(random_state)


In [None]:
train_image_root_dir = Path(
    r"D:\RSNA2023\data\train_images"
)

patient_info = pd.read_csv(
    r"D:\RSNA2023\data\train.csv"
)
patient_series = pd.read_csv(
    r"D:\RSNA2023\data\train_series_meta.csv"
)


In [None]:
# calc class weights..
data = patient_series.merge(patient_info)
data["kidney_injury"] = data["kidney_low"] + (data["kidney_high"] * 2)
data["liver_injury"] = data["liver_low"] + (data["liver_high"] * 2)
data["spleen_injury"] = data["spleen_low"] + (data["spleen_high"] * 2)

target_labels = [
    "bowel_injury",
    "extravasation_injury",
    "kidney_injury",
    "liver_injury",
    "spleen_injury",
    "any_injury",
    "incomplete_organ",
]

class_weights = {}
for label in target_labels:
    class_weights[label] = torch.from_numpy(
        compute_class_weight(
            class_weight="balanced",
            classes=np.sort(data.loc[:, label].unique()),
            y=data.loc[:, label].values)
        .astype(np.float32)
    )
    print(
        f"{label} weights: " +
        ", ".join(str(_.item()) for _ in class_weights[label])
    )


In [None]:
# Make dataset for RSNA2023 AbdominalTrauma.
dataset = AbdominalTraumaDataset(
    patient_info,
    patient_series,
    train_image_root_dir,
    img_extension=".png",
    has_pseudo3D_img=True,
    stride=stride
)

# dataset split by multilabel stratified split.
data_spliter = MultilabelStratifiedKFold(
    n_splits=n_split,
    shuffle=True,
    random_state=random_state,
)


In [None]:
def calc_multiclasses_roc(y_pred, y_true, classes, show_score):
    j, scores = 0, []

    for i, (cls, num) in enumerate(classes.items()):
        metric = MulticlassAUROC(num_classes=num)
        metric.update(y_pred[:, j:num+j], y_true[:, i])
        scores.append(metric.compute().item())
        j += num
        if show_score:
            print(f"{cls}' ROC: {scores[-1]}")

    return scores


# 📚 Train...

In [None]:
scores, rocs = [], []

for k, (train_indices, valid_indices) in enumerate(data_spliter.split(dataset.labels, dataset.labels)):
    save_sub_dir = save_dir / f"fold#{k:02}"
    save_sub_dir.mkdir(parents=True, exist_ok=True)

    # ----- Train ----- #
    model = MultiAbdominalTraumaClassifier(
        backbone=backbone(weights=b_weights)
    )
    criterions = {
        classname: nn.CrossEntropyLoss(weight=weights.to(device))
        for classname, weights in class_weights.items()
    }

    optimizer = _optimizer(model.parameters(), lr=learning_rate)

    # exp_lr_scheduler = lr_scheduler.StepLR(
    #     optimizer,
    #     step_size=step_size,
    #     gamma=gamma
    # )
    
    scheduler = CosineLRScheduler(
        optimizer,
        t_initial=num_epoch,
        warmup_t=warmup_t,
        warmup_lr_init=warmup_lr_init,
        warmup_prefix=warmup_prefix
    )

    train_dataset = Subset(dataset, train_indices)
    train_dataset.dataset.transform = train_transform
    valid_dataset = Subset(dataset, valid_indices)
    valid_dataset.dataset.transform = valid_transform

    dataloaders = make_datalodaers(
        train_dataset,
        valid_dataset,
        batch_size=batch_size
    )

    history = model.fit(
        dataloaders,
        criterions,
        optimizer,
        scheduler,
        num_epoch,
        save_dir=save_sub_dir,
        criterions_weight=criterions_weights
    )

    train_history = pd.DataFrame(history).T
    train_history.to_csv(save_sub_dir / "history.csv")

    fig, ax = plt.subplots(1, 1, figsize=(8, 3))
    ax.set_title(f"Fold#{k:02}: Train Losses")
    ax.set_xlabel("Epoch")
    train_history.loc[:, ["train_average_loss",
                          "valid_average_loss"]].plot(ax=ax)
    ax.grid()

    plt.close()
    display(fig)

    # ----- Scoring with Validation Data -----#
    weighted_cols = [
        "valid_extravasation_injury",
        "valid_kidney_injury",
        "valid_liver_injury",
        "valid_spleen_injury"
    ]

    train_history[weighted_cols] = train_history[weighted_cols] * 3
    best_epoch = train_history.loc[:, [
        "valid_bowel_injury",
        "valid_extravasation_injury",
        "valid_kidney_injury",
        "valid_liver_injury",
        "valid_spleen_injury",
    ]
    ].mean(axis=1).idxmin()

    model.load_model_state(save_sub_dir / f"E{best_epoch:03}.pt", device)
    print(f"fold#{k:02}'s best epoch: E{best_epoch:03}")

    test_patient_series = patient_series.copy().iloc[
        valid_indices, :].reset_index(drop=True)

    test_patient_info = patient_info.copy().set_index("patient_id").loc[
        test_patient_series.patient_id.unique(), :
    ].reset_index()

    test_dataset = AbdominalTraumaDataset(
        test_patient_info,
        test_patient_series,
        train_image_root_dir,
        img_extension=".png",
        transform=nn.Sequential(
            transforms.Resize((imgsize, imgsize), antialias=True),
            transforms.ConvertDtype(),
            transforms.Normalize([0.0, 0.0, 0.0], [1.0, 1.0, 1.0])
        ),
        has_pseudo3D_img=True,
        stride=stride
    )

    pred_test_series = model.predict(test_dataset)
    y_pred = make_submission_file(
        pred_test_series, test_patient_series.reset_index(drop=True)
    )
    y_true = add_sample_weights(test_patient_info)

    scores.append(score(y_true.copy(), y_pred.copy(), "patient_id"))
    rocs.append(
        calc_multiclasses_roc(
            torch.tensor(pred_test_series),
            test_dataset.labels,
            {
                "bowel_injury": 2,
                "extravasation_injury": 2,
                "kidney_injury": 3,
                "liver_injury": 3,
                "spleen_injury": 3,
            },
            True
        )
    )
    del model


In [None]:
scores = pd.DataFrame(
    scores,
    columns=["bowel", "extravasation", "kidney", "liver", "spleen", "any"]
)
scores["Average"] = scores.mean(axis=1)
scores.loc["CV-Average"] = scores.mean(axis=0)
display(scores)
scores.to_csv(save_dir / "scores.csv")


In [None]:
rocs = pd.DataFrame(
    rocs,
    columns=[
        "bowel_injury",
        "extravasation_injury",
        "kidney_injury",
        "liver_injury",
        "spleen_injury"
    ]
)
display(rocs)
rocs.to_csv(save_dir / "AUROC.csv")
