# exp011_eval
[Notion](https://www.notion.so/exp011-4a459065840b4614b0381125851ca87e?pvs=4)

Copy from: exp009_eval.ipynb

In [1]:
import os
import random
import sys
import warnings
warnings.filterwarnings('ignore')
from collections import defaultdict
from typing import Tuple, Any

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm

# リポジトリtopに移動
while os.path.basename(os.getcwd()) != 'rsna-2023':
    os.chdir('../')
    if os.getcwd() == '/':
        raise Exception('Could not find project root directory.')

from src.classification.dataset import TestDatasetBowelExtra as TestDataset
from src.classification.dataset import load_df
from src.image_processing import windowing
from src.classification.model import load_models
from src.classification.trainer import evaluate
from src.metrics import logloss

# Config

In [2]:
class CFG:
    exp_name = 'exp_011'
    # model config
    # timm backbone
    backbone = 'efficientnet_b4'
    # n_ch: z軸方向のスライス数
    n_ch = 1 # support only 1
    expand_ch_dim = False
    # n_class: bowel_injury, extravasation
    n_class = 2
    label_smoothing = None #Optional(float)
    # hyper params
    init_lr = 5e-5
    min_lr = 1e-6
    weight_decay = 1e-4
    image_size = (512, 512)
    batch_size = 64
    amp = True
    n_epoch = 20
    iteration_per_epoch = 100
    pretrain = True
    freeze_epochs = 1
    noaug_epochs = 1
    # fold config
    n_fold = 6
    include_evaluation = False
    train_folds = 1
    # path
    image_dir = "data/dataset001"
    model_save_dir = "outputs"
    # other config
    seed = 42
    num_workers = 0
    num_gpus = 2
    progress_bar = True
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# label dict
label_index_dict_inv = {
    0: 'bowel_injury',
    1: 'extravasation'
}
# load dataframe
df_train = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/train.csv')
df_train_image_level = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/image_level_labels.csv')
df_train_series_meta = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/train_series_meta.csv')

In [4]:
df = load_df(CFG)
df_eval = df[df["fold"] == 0].reset_index(drop=True)
df_eval.head()

Unnamed: 0,patient_id,series_id,image_id,bowel,extravasation,fold,weight
0,10007,47578,25,0,0,0,1.0
1,10007,47578,26,0,0,0,1.0
2,10007,47578,27,0,0,0,1.0
3,10007,47578,28,0,0,0,1.0
4,10007,47578,29,0,0,0,1.0


In [5]:
def apply_preprocess(image: np.ndarray)-> np.ndarray:
    """データ前処理. カスタマイズして使用.
    Args:
        image (numpy.ndarray): HU値のCT画像.
    Returns:
        image (numpy.ndarray): windowing及び0~1に正規化.
    """
    # 0~1に正規化
    image = windowing(image, wl=0, ww=400, mode="float32")
    return image

# Load models

In [6]:
# モデルの読み込み
models = load_models(CFG, mode="final", framework='timm')

# Evaluation

In [7]:
def evaluate_series(CFG: Any, df: pd.DataFrame, models: list, pid: int, sid: int) -> dict:
    """患者ごと(シリーズごと)の評価を行う.
    Args:
        CFG (Any): Config
        models (list): 学習済みモデルのリスト
        pid (int): 患者ID
        sid (int): シリーズID
    Returns:
        dict: 評価結果
    """
    # 評価用データセットの作成
    df_res = df[(df["patient_id"] == pid) & (df["series_id"] == sid)].reset_index(drop=True)
    ds = TestDataset(CFG, df_res, preprocess=apply_preprocess)
    eval_iterator = DataLoader(
        ds,
        shuffle=False,
        batch_size=CFG.batch_size,
        num_workers=CFG.num_workers
    )
    # 推論
    result = evaluate(CFG, models, eval_iterator)
    return result

In [8]:
pid, sid = df_eval["patient_id"][0], df_eval["series_id"][0]
result = evaluate_series(CFG, df_eval, models, pid, sid)

In [43]:
def calc_metric(metrics: list)->None:
    p, y = [], []
    for i in range(len(metrics)):
        p.append(metrics[i][0])
        y.append(metrics[i][1])
    print(f"logloss: {logloss(np.array(p), np.array(y), norm=False):.4f}")

def print_result(pid: int, sid: int, result: dict)-> None:
    """予測結果のmaxをとり、患者レベルでのaccuracyを見る."""
    print(f"pid: {pid}, sid: {sid}")
    label = result["label"]
    pred = result["pred"]
    ret = [[] for _ in range(2)]
    for index, label_name in label_index_dict_inv.items():
        l = label[:, index]
        p = pred[:, index]
        l = max(l)
        p = max(p)
        print(f"{label_name} max: {l}, pred max: {p:.3f}")
        ret[0].append(p)
        ret[1].append(l)
    return ret


In [44]:
df_eval_unique = df_eval.drop_duplicates(subset=["patient_id", "series_id"]).reset_index(drop=True)
metrics = []
for i in range(100): #3: oraganごとにrowがあるため
    pid, sid = df_eval_unique["patient_id"][i], df_eval_unique["series_id"][i]
    result = evaluate_series(CFG, df_eval, models, pid, sid)
    metrics.append(print_result(pid, sid, result))
calc_metric(metrics)

pid: 10007, sid: 47578
bowel_injury max: 0.0, pred max: 0.186
extrabasation max: 0.0, pred max: 0.181
pid: 10205, sid: 65236
bowel_injury max: 0.0, pred max: 0.904
extrabasation max: 0.0, pred max: 0.302
pid: 10275, sid: 14254
bowel_injury max: 0.0, pred max: 0.010
extrabasation max: 0.0, pred max: 0.104
pid: 10275, sid: 37142
bowel_injury max: 0.0, pred max: 0.018
extrabasation max: 0.0, pred max: 0.086
pid: 10430, sid: 47009
bowel_injury max: 0.0, pred max: 0.007
extrabasation max: 0.0, pred max: 0.235
pid: 10430, sid: 52259
bowel_injury max: 0.0, pred max: 0.017
extrabasation max: 0.0, pred max: 0.104
pid: 10494, sid: 65369
bowel_injury max: 0.0, pred max: 0.424
extrabasation max: 1.0, pred max: 0.360
pid: 10696, sid: 62306
bowel_injury max: 0.0, pred max: 0.018
extrabasation max: 0.0, pred max: 0.052
pid: 10987, sid: 5611
bowel_injury max: 0.0, pred max: 0.224
extrabasation max: 0.0, pred max: 0.184
pid: 11130, sid: 8511
bowel_injury max: 0.0, pred max: 0.040
extrabasation max: 0.0