# exp_042
[Notion](https://www.notion.so/exp042-1b91460110774f9a824aae71733beea7?pvs=4)  
weighted meanで一様な予測を出した場合の各臓器に対するスコアの算出.  

In [8]:
import os
import random
import sys
import warnings
warnings.filterwarnings('ignore')
from collections import defaultdict
from typing import Tuple, Any, Dict, Optional

import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from tqdm import tqdm

# リポジトリtopに移動
while os.path.basename(os.getcwd()) != 'rsna-2023':
    os.chdir('../')
    if os.getcwd() == '/':
        raise Exception('Could not find project root directory.')
    
from src.classification.dataset import load_df
from src.metrics import score, create_training_solution, normalize_probabilities_to_one

In [2]:
class CFG_INF:
    exp_name = 'exp_042'
    # evaluation時：'train', submission時：'test'
    phase = 'train'
    base_dir = 'data/rsna-2023-abdominal-trauma-detection'
    image_dir = f'data/rsna-2023-abdominal-trauma-detection/{phase}_images'
    # dataframeはこのconfigにもたせ、phaseで対応できるようにする.
    if phase == 'train':
        df = pd.read_csv(os.path.join(base_dir, 'train.csv'))
    elif phase == 'test':
        df = pd.read_csv(os.path.join(base_dir, 'sample_submission.csv'))
    df_series_meta = pd.read_csv(os.path.join(base_dir, f'{phase}_series_meta.csv'))
    model_save_dir = "outputs"

class CFG_LSK:
    exp_name = 'exp_031'
    # model config
    # fold config
    n_fold = 6
    include_evaluation = False
    train_folds = 1
    # path
    image_dir = "data/dataset002"
    model_save_dir = "outputs"

In [3]:
df_solid_organ = load_df(CFG_LSK)
# fold 0のpatient_idを取得
pids = df_solid_organ[df_solid_organ["fold"] == 0]["patient_id"].unique()
df_all = pd.read_csv(os.path.join(CFG_INF.base_dir, 'train.csv'))
train_pids = df_solid_organ[df_solid_organ["fold"] != 0]["patient_id"].unique()
valid_pids = df_solid_organ[df_solid_organ["fold"] == 0]["patient_id"].unique()
df_train = df_all[df_all["patient_id"].isin(train_pids)].reset_index(drop=True)
df_valid = df_all[df_all["patient_id"].isin(valid_pids)].reset_index(drop=True)

In [6]:
df_valid

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high,any_injury
0,10007,1,0,1,0,1,0,0,1,0,0,1,0,0,0
1,10205,1,0,1,0,1,0,0,1,0,0,1,0,0,0
2,10275,1,0,1,0,1,0,0,1,0,0,1,0,0,0
3,10430,1,0,1,0,1,0,0,0,1,0,1,0,0,1
4,10494,1,0,0,1,1,0,0,1,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520,9537,1,0,1,0,1,0,0,1,0,0,1,0,0,0
521,96,1,0,1,0,1,0,0,1,0,0,1,0,0,0
522,9620,1,0,1,0,1,0,0,1,0,0,1,0,0,0
523,9835,1,0,1,0,1,0,0,1,0,0,1,0,0,0


In [10]:
submission

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high,any_injury
0,10007,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,1,0.082761,0.020595,1,0.062548,0.051869,0
1,10205,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,1,0.082761,0.020595,1,0.062548,0.051869,0
2,10275,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,1,0.082761,0.020595,1,0.062548,0.051869,0
3,10430,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,0,0.082761,0.020595,1,0.062548,0.051869,1
4,10494,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,1,0.082761,0.020595,1,0.062548,0.051869,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520,9537,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,1,0.082761,0.020595,1,0.062548,0.051869,0
521,96,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,1,0.082761,0.020595,1,0.062548,0.051869,0
522,9620,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,1,0.082761,0.020595,1,0.062548,0.051869,0
523,9835,0.979405,0.020595,0.936308,0.063692,1,0.035088,0.020976,1,0.082761,0.020595,1,0.062548,0.051869,0


In [11]:
submission = df_valid.copy()
cols = ["bowel_healthy", "bowel_injury", "extravasation_healthy", "extravasation_injury", "kidney_healthy", "kidney_low", "kidney_high", "liver_healthy", "liver_low", "liver_high", "spleen_healthy", "spleen_low", "spleen_high"]
for col in cols:
    submission[col] = df_train[col].mean()

In [12]:
# add weight
solution_train = create_training_solution(df_valid)

no_scale_score = score(solution_train.copy(),submission.copy(),'patient_id')
print(f'Training score without scaling: {no_scale_score:.4f}')

bowel: 0.1652
extravasation: 0.8371
kidney: 0.6299
liver: 0.6521
spleen: 0.7717
any_injury: 1.5161
mean: 0.7620
Training score without scaling: 0.7620


In [16]:
# Group by different sample weights
scale_by_1 = ['bowel_injury']
scale_by_2 = ['kidney_low','liver_low','spleen_low']
scale_by_4 = ['kidney_high','liver_high','spleen_high']
scale_by_6 = ['extravasation_injury']

# Scale factors based on described metric 
sf_1 = 2
sf_2 = 2
sf_4 = 4
sf_6 = 6

# Reset the prediction
y_pred = submission.copy()

# Scale each target 
y_pred[scale_by_1] *=sf_1
y_pred[scale_by_2] *=sf_2
y_pred[scale_by_4] *=sf_4
y_pred[scale_by_6] *=sf_6

weight_scale_score = score(solution_train.copy(),y_pred.copy(),'patient_id')
print(f'Training score with weight scaling: {weight_scale_score:.4f}')

bowel: 0.1597
extravasation: 0.5994
kidney: 0.5549
liver: 0.6135
spleen: 0.7196
any_injury: 0.9530
mean: 0.6000
Training score with weight scaling: 0.6000
