# exp005  
[Notion](https://www.notion.so/exp005-acda5e00fb014299a3e4b822e7bba837?pvs=4)  
3D-CNNによる固形臓器(Liver, Spleen, Kidney)損傷の検出のために、セグメンテーションした臓器を切り抜き保存。  
提出時にこの切り抜きを再現できるように、コードをスクリプト化。  
本来リークを考慮し学習データはoofで推論すべきだが、全臓器に対するDice係数の平均が0.95を超えているので、あまり大きな問題にはならないと捉え、そのままリークさせて推論・保存する。  
新たにデータセットのディレクトリ(`dataset002`)を作成する。

In [139]:
import os
import random
import sys
import warnings
warnings.filterwarnings('ignore')
from typing import Any, Tuple
from collections import defaultdict

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import cv2
import torch
import seaborn as sns
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

import matplotlib
from matplotlib import animation, rc
rc('animation', html='jshtml')
matplotlib.rcParams['animation.embed_limit'] = 70

# リポジトリtopに移動
while os.path.basename(os.getcwd()) != 'rsna-2023':
    os.chdir('../')
    if os.getcwd() == '/':
        raise Exception('Could not find project root directory.')
    
from src.segmentation.dataset import TestDataset, load_df
from src.image_processing import windowing
from src.visualization import apply_colormap_to_multilabel_images, animate, print_injury
from src.metrics import calc_cfm_metrics
from src.segmentation.model import load_models
from src.segmentation.trainer import evaluate

# Segmentation Config

In [3]:
class CFG:
    exp_name = 'exp_004'
    # model config
    backbone = 'efficientnet-b3'
    n_ch = 1
    n_class = 4 # 学習時は腎臓の左右を区別しないので、5->4
    # hyper params
    init_lr = 1e-3
    min_lr = 1e-6
    weight_decay = 1e-4
    image_size = (512, 512)
    batch_size = 32
    amp = True
    n_epoch = 20
    iteration_per_epoch = 200
    pretrain = True
    freeze_epochs = 1
    noaug_epochs = 1
    # fold config
    n_fold = 6
    include_evaluation = False
    train_folds = 1
    # path
    image_dir = "data/dataset001/train_images"
    mask_dir = "data/dataset001/segmentations"
    model_save_dir = "outputs"
    # other config
    seed = 42
    num_workers = 0
    num_gpus = 2
    progress_bar = True
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load DataFrame

In [35]:
df_train = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/train.csv')
df_train_image_level = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/image_level_labels.csv')
df_train_series_meta = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/train_series_meta.csv')
base_dir = "data/rsna-2023-abdominal-trauma-detection"
dataset_dir = "data/dataset002"

# get label correspondences
organ_index_dict_inv = {
    0: 'liver',
    1: 'spleen',
    2: 'kidney',
    3: 'bowel'
}
organ_index_dict = {v: k for k, v in organ_index_dict_inv.items()}

In [40]:
def get_dataframe():
    """データセットのDataFrameを作成する.
    データセットによって内容を書き換える必要あり.
    """
    # df_train_series_metaをベースに、データフレームを構築.
    image_paths = []
    pid_list = []
    sid_list = []
    for i in range(len(df_train_series_meta)):
        sr = df_train_series_meta.iloc[i]
        pid, sid = sr["patient_id"], sr["series_id"]
        pid, sid = int(pid), int(sid)
        dir_ = f"data/dataset001/train_images/{pid}/{sid}"
        path_list = os.listdir(dir_)
        path_list = [[int(path.replace(".npy","")), path] for path in path_list]
        path_list.sort()
        path_list = [path[1] for path in path_list]
        for path in path_list:
            image_paths.append(os.path.join(dir_, path))
            pid_list.append(pid)
            sid_list.append(sid)
    # 画像データのDataFrameを作成
    df = pd.DataFrame({
            'image_path': image_paths,
            'patient_id': pid_list,
            'series_id': sid_list,
            })
    return df

df = get_dataframe()
# mask_pathという空のカラムを追加
df["mask_path"] = None

# Load models

In [5]:
# モデルの読み込み
models = load_models(CFG, mode="final")

# Inference Code  
- シリーズごとの推論の準備
- セグメンテーションの前処理及び後処理
- 臓器抽出後の分割・保存

In [273]:
def load_series_from_dataset(dir_: str)-> np.ndarray:
    """seriesを読み込む."""
    path_list = os.listdir(dir_)
    path_list = [[int(path.replace(".npy","")), path] for path in path_list]
    path_list.sort()
    path_list = [path[1] for path in path_list]
    arr = []
    for path in path_list:
        arr.append(np.load(os.path.join(dir_, path)))
    return np.array(arr)

def apply_preprocess(image: np.ndarray, mask: np.ndarray)-> Tuple[np.ndarray, np.ndarray]:
    """データ前処理. カスタマイズして使用.
    Args:
        image (numpy.ndarray): HU値のCT画像.
        mask (numpy.ndarray): channel lastのマスク画像.
    Returns:
        image (numpy.ndarray): windowing及び0~1に正規化.
        mask (numpy.ndarray): channel lastのマスク画像.
    """
    # 0~1に正規化
    image = windowing(image, wl=0, ww=400, mode="float32")
    return image, mask

def morpho_pytorch(masks):
    for c in range(masks.shape[-1]):
        with torch.no_grad():
            arr = torch.tensor(masks[...,c][np.newaxis]).to(CFG.device).to(torch.float32)
            #dialation
            arr = torch.nn.MaxPool3d(3, stride=1, padding=1, dilation=1, return_indices=False, ceil_mode=False)(arr)
            #erosion
            arr = -torch.nn.MaxPool3d(3, stride=1, padding=1, dilation=1, return_indices=False, ceil_mode=False)(-arr)
        arr = arr.squeeze(0).cpu().numpy().astype(np.uint8)
        masks[...,c] = arr
    return masks

# 各臓器に対して、一定閾値以下のボクセルの集合を切り捨てる
area_th = {
    "liver":50,
    "spleen":20,
    "kidney":20,
    "bowel":30,
}

def area_0fill(masks):
    for idx,mask in enumerate(masks):
        for c,th in area_th.items():
            c_idx = organ_index_dict[c]
            
            contours = cv2.findContours(mask[...,c_idx],cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            contours = contours[0] #0番目は元画像,2番目は階層構造。新しいopencvだと1番目のみがコンツーリング情報っぽい
            for con in contours:
                area = cv2.contourArea(con)
                if area <= area_th[c]:
                    fill_mask = mask[...,c_idx].copy()
                    fill_mask = cv2.drawContours(fill_mask, [con], 0, 0, -1)
                    masks[idx,:,:,c_idx] =  fill_mask
    return masks

def apply_postprocess(mask: np.ndarray)-> np.ndarray:
    """セグメンテーション後の臓器マスクの後処理.
    Args:
        mask (numpy.ndarray): (Z, H, W, C)のマスク画像.
    """
    mask = morpho_pytorch(mask)
    # mask = area_0fill(mask)
    return mask

def evaluate_series(CFG: Any, df: pd.DataFrame, models: list, pid: int, sid: int) -> dict:
    """患者ごと(シリーズごと)の評価を行う.
    Args:
        CFG (Any): Config
        df (pd.DataFrame): get_training_dataframeによって作成したdf
        models (list): 学習済みモデルのリスト
        pid (int): 患者ID
        sid (int): シリーズID
    Returns:
        dict: 評価結果
    """
    # 評価用データセットの作成
    df_res = df[(df["patient_id"] == pid) & (df["series_id"] == sid)].reset_index(drop=True)
    if len(df_res) == 0:
        raise ValueError(f"pid:{pid}, sid:{sid} is not found.")
    ds = TestDataset(CFG, df_res, preprocess=apply_preprocess)
    eval_iterator = DataLoader(
        ds,
        shuffle=False,
        batch_size=CFG.batch_size,
        num_workers=CFG.num_workers
    )
    # 推論
    result = evaluate(CFG, models, eval_iterator)
    return result

def crop_organ(image: np.ndarray, mask: np.ndarray)-> np.ndarray:
    """臓器のみを切り抜き、臓器に外接するボリュームを返す."""
    # 臓器が存在する部分のインデックスを取得
    z_indices, h_indices, w_indices = np.where(mask != 0)

    """# 各軸に沿って最小と最大のインデックスを見つける
    z_min, z_max = np.min(z_indices), np.max(z_indices)
    h_min, h_max = np.min(h_indices), np.max(h_indices)
    w_min, w_max = np.min(w_indices), np.max(w_indices)"""

    # 各軸に沿って、p%のボクセルが含まれる範囲を見つける
    p = 98
    z_min, z_max = np.percentile(z_indices, 100-p), np.percentile(z_indices, p)
    h_min, h_max = np.percentile(h_indices, 100-p), np.percentile(h_indices, p)
    w_min, w_max = np.percentile(w_indices, 100-p), np.percentile(w_indices, p)
    z_min, z_max = int(z_min), int(z_max)
    h_min, h_max = int(h_min), int(h_max)
    w_min, w_max = int(w_min), int(w_max)

    # この範囲でセグメンテーションデータを切り抜く
    margin = 10
    z_min, z_max = max(0, z_min - 5), min(image.shape[0], z_max + 5)
    h_min, h_max = max(0, h_min - margin), min(image.shape[1], h_max + margin)
    w_min, w_max = max(0, w_min - margin), min(image.shape[2], w_max + margin)
    cropped_image = image[z_min:z_max+1, h_min:h_max+1, w_min:w_max+1]
    cropped_mask = mask[z_min:z_max+1, h_min:h_max+1, w_min:w_max+1]

    # crop segmentation
    # cropped_image = cropped_image * cropped_mask + (1 - cropped_mask) * -1000

    return cropped_image, cropped_mask

def kidney_split(image: np.ndarray, mask: np.ndarray)-> np.ndarray:
    """腎臓について、一度crop_organに入力したものを再度この関数に入力することで左右の腎臓に切り出す.
    Note:
        本関数中のleft/rightは画像上のleft_rightを表す.
    """
    w_half = image.shape[2] // 2
    left_image = image[:, :, :w_half]
    left_mask = mask[:, :, :w_half]
    right_image = image[:, :, w_half:]
    right_mask = mask[:, :, w_half:]
    left_image, _ = crop_organ(left_image, left_mask)
    right_image, _ = crop_organ(right_image, right_mask)
    return left_image, right_image

def resize_volume(mask: np.ndarray, hw_shape: tuple)-> np.ndarray:
    """h, wが512ではない場合にmaskをimageに合うようにリサイズする."""
    new_arr = []
    for i in range(mask.shape[0]):
        new_arr.append(cv2.resize(mask[i], hw_shape[::-1]))
    return np.stack(new_arr)

In [305]:
def inference() -> None:
    """学習用全データに対するセグメンテーション推論を行う.
    切り抜いた臓器のCT画像を保存する.
    保存は、f'{pid}_{sid}_{organ}.npy'という形式で、スライス位置などの情報を付加せずに外接矩形で保存.
    ボリュームサイズは制限せず、元の解像度で保存.
    """
    for i in tqdm(range(len(df_train_series_meta))):
        pid, sid = df_train_series_meta.iloc[i][["patient_id", "series_id"]]
        pid, sid = int(pid), int(sid)
        result = evaluate_series(CFG, df, models, pid, sid)
        image_dir = f"data/dataset001/train_images/{pid}/{sid}"
        image = load_series_from_dataset(image_dir)
        pred = result["pred"] # (Z, H, W, C)
        pred = (pred > 0.5).astype(np.uint8)
        pred = apply_postprocess(pred)
        # imageが512x512でない場合はmask側をリサイズ
        if (image.shape[1], image.shape[2]) != CFG.image_size:
            pred = resize_volume(pred, image.shape[1:])

        for organ in ["kidney", "liver", "spleen"]:
            organ_idx = organ_index_dict[organ]
            organ_segment = pred[..., organ_idx]
            if organ_segment.sum() == 0:
                continue
            organ_cropped, mask_cropped = crop_organ(image, organ_segment)
            save_dir = os.path.join(dataset_dir, str(pid), str(sid))
            os.makedirs(save_dir, exist_ok=True)
            if organ == "kidney":
                kidney_r, kidney_l = kidney_split(organ_cropped, mask_cropped)
                path = os.path.join(save_dir, "kidney_r.npy")
                np.save(path, kidney_r)
                path = os.path.join(save_dir, "kidney_l.npy")
                np.save(path, kidney_l)
            else:
                path = os.path.join(save_dir, f"{organ}.npy")
                np.save(path, organ_cropped)

In [306]:
inference()

100%|██████████| 4711/4711 [10:09:13<00:00,  7.76s/it]  


In [303]:

pid, sid = 10082, 8139
organ = "kidney_l"
ct_crop = np.load(f"/home/medphys3/competition/rsna-2023/data/dataset002/{pid}/{sid}/{organ}.npy")
print(f"crop boxel shape: {ct_crop.shape}")
dir_ = f"/home/medphys3/competition/rsna-2023/data/dataset001/train_images/{pid}/{sid}"
ct = load_series_from_dataset(dir_)
print_injury(df_train, pid)

crop boxel shape: (50, 94, 102)
healty patient


In [None]:
ct_crop_w = windowing(ct_crop, 0, 400)
ct_crop_w[:,0,0] = 255
animate(ct_crop_w)

In [None]:
animate(windowing(ct))