# exp046
[Notion](https://www.notion.so/exp046-d797a34ea6cb467d97cebeedf7ce3f02?pvs=4)  
LSKの3Dデータのサイズについての検証  
liverなどで解像度が足りていない可能性があるため  

In [7]:
import os
from collections import defaultdict

import numpy as np
import pandas as pd

# リポジトリtopに移動
while os.path.basename(os.getcwd()) != 'rsna-2023':
    os.chdir('../')
    if os.getcwd() == '/':
        raise Exception('Could not find project root directory.')
    
from src.classification.dataset import TrainDatasetSolidOrgans as TrainDataset
from src.classification.dataset import load_image

In [4]:
# get label correspondences
organ_index_dict_inv = {
    0: 'liver',
    1: 'spleen',
    2: 'kidney',
    3: 'bowel'
}
organ_index_dict = {v: k for k, v in organ_index_dict_inv.items()}

# load dataframe
df_train = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/train.csv')
df_train_image_level = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/image_level_labels.csv')
df_train_series_meta = pd.read_csv('data/rsna-2023-abdominal-trauma-detection/train_series_meta.csv')

In [5]:
def get_training_dataframe():
    """データセットのDataFrameを作成する.
    データセットによって内容を書き換える必要あり.
    """
    # df_seg_info_imageをベースに、学習用データフレームを構築.
    # df_seg_info_imageは、exp002で作成されるcsvファイル.
    image_dir = "data/dataset002"
    path_dict = defaultdict(list)
    for i in range(len(df_train_series_meta)):
        sr = df_train_series_meta.iloc[i]
        pid, sid = int(sr["patient_id"]), int(sr["series_id"])

        sr_label = df_train[df_train["patient_id"] == pid].iloc[0]
        any_in_lsk = 0
        for organ in ["liver", "spleen", "kidney"]:
            image_path = os.path.join(image_dir, str(pid), str(sid), f"{organ}.npy")
            path_dict["patient_id"].append(pid)
            path_dict["series_id"].append(sid)
            path_dict["organ"].append(organ)
            path_dict["image_path"].append(image_path)
            path_dict["healthy"].append(sr_label[organ+"_healthy"])
            path_dict["low"].append(sr_label[organ+"_low"])
            path_dict["high"].append(sr_label[organ+"_high"])
            path_dict["extravasation"].append(sr_label["extravasation_injury"])


    # 画像データのDataFrameを作成
    df = pd.DataFrame(path_dict)
    
    return df

In [6]:
df = get_training_dataframe()
df.head()

Unnamed: 0,patient_id,series_id,organ,image_path,healthy,low,high,extravasation
0,10004,21057,liver,data/dataset002/10004/21057/liver.npy,1,0,0,1
1,10004,21057,spleen,data/dataset002/10004/21057/spleen.npy,0,0,1,1
2,10004,21057,kidney,data/dataset002/10004/21057/kidney.npy,0,1,0,1
3,10004,51033,liver,data/dataset002/10004/51033/liver.npy,1,0,0,1
4,10004,51033,spleen,data/dataset002/10004/51033/spleen.npy,0,0,1,1


In [9]:
def load_kidney(impath: str)-> np.ndarray:
    """kidneyの画像をロードする.
    """
    l, r = (
        impath.replace("kidney.npy", "kidney_r.npy"),
        impath.replace("kidney.npy", "kidney_l.npy"),
    )
    if os.path.exists(l):
        l = load_image(l)
    else:
        l = None
    if os.path.exists(r):
        r = load_image(r)
    else:
        r = None
    return l, r

In [22]:
result_dict = defaultdict(list)
for i in range(1000):# range(len(df)):
    sr = df.iloc[i]
    impath = sr["image_path"]
    if sr["organ"] == "kidney":
        l, r = load_kidney(impath)
        if l is None or r is None:
            continue
        for idx, axis in enumerate(["z", "y", "x"]):
            result_dict["kidney_l_"+axis].append(l.shape[idx])
            result_dict["kidney_r_"+axis].append(r.shape[idx])

    else:
        if os.path.exists(impath):
            img = load_image(impath)
        else:
            continue
        for idx, axis in enumerate(["z", "y", "x"]):
            result_dict[sr["organ"]+"_"+axis].append(img.shape[idx])

In [23]:
def print_info(col: str)->None:
    """統計情報を表示する.
    """
    arr = np.array(result_dict[col])
    print("========", col, "========")
    print(f"mean: {arr.mean():.3f}")
    print(f"std: {arr.std():.3f}")
    print(f"min: {arr.min():.3f}")
    print(f"max: {arr.max():.3f}")
    print(f"25%: {np.percentile(arr, 25):.3f}")
    print(f"50%: {np.percentile(arr, 50):.3f}")
    print(f"75%: {np.percentile(arr, 75):.3f}")

In [24]:
for col in result_dict.keys():
    print_info(col)

mean: 97.380
std: 62.621
min: 30.000
max: 319.000
25%: 53.000
50%: 67.000
75%: 141.500
mean: 203.746
std: 30.162
min: 132.000
max: 351.000
25%: 182.250
50%: 201.000
75%: 218.000
mean: 225.135
std: 38.238
min: 141.000
max: 466.000
25%: 200.250
50%: 220.000
75%: 245.000
mean: 62.183
std: 38.059
min: 15.000
max: 228.000
25%: 35.000
50%: 47.000
75%: 87.000
mean: 110.886
std: 25.166
min: 56.000
max: 194.000
25%: 93.000
50%: 109.000
75%: 124.000
mean: 116.054
std: 31.256
min: 65.000
max: 489.000
25%: 102.000
50%: 112.000
75%: 123.000
mean: 65.033
std: 38.749
min: 6.000
max: 207.000
25%: 38.000
50%: 46.000
75%: 93.500
mean: 68.000
std: 40.924
min: 6.000
max: 208.000
25%: 39.000
50%: 49.000
75%: 100.000
mean: 89.710
std: 15.652
min: 49.000
max: 189.000
25%: 79.000
50%: 89.000
75%: 97.500
mean: 86.441
std: 15.097
min: 46.000
max: 168.000
25%: 77.000
50%: 85.000
75%: 92.000
mean: 86.429
std: 13.911
min: 21.000
max: 146.000
25%: 79.000
50%: 87.000
75%: 95.000
mean: 89.795
std: 15.616
min: 21.000
