In [1]:
import numpy as np
import pandas as pd
import pandas.api.types
import sklearn.metrics
from scipy.special import softmax
from tqdm.auto import tqdm

In [2]:
class ParticipantVisibleError(Exception):
    pass


def get_condition(full_location: str) -> str:
    # Given an input like spinal_canal_stenosis_l1_l2 extracts 'spinal'
    for injury_condition in ['spinal', 'foraminal', 'subarticular']:
        if injury_condition in full_location:
            return injury_condition
    raise ValueError(f'condition not found in {full_location}')


def score(
        solution: pd.DataFrame,
        submission: pd.DataFrame,
        row_id_column_name: str,
        any_severe_scalar: float
    ) -> float:
    '''
    Pseudocode:
    1. Calculate the sample weighted log loss for each medical condition:
    2. Derive a new any_severe label.
    3. Calculate the sample weighted log loss for the new any_severe label.
    4. Return the average of all of the label group log losses as the final score, normalized for the number of columns in each group.
       This mitigates the impact of spinal stenosis having only half as many columns as the other two conditions.
    '''

    target_levels = ['normal_mild', 'moderate', 'severe']

    # Run basic QC checks on the inputs
    if not pandas.api.types.is_numeric_dtype(submission[target_levels].values):
        raise ParticipantVisibleError('All submission values must be numeric')

    if not np.isfinite(submission[target_levels].values).all():
        raise ParticipantVisibleError('All submission values must be finite')

    if solution[target_levels].min().min() < 0:
        raise ParticipantVisibleError('All labels must be at least zero')
    if submission[target_levels].min().min() < 0:
        raise ParticipantVisibleError('All predictions must be at least zero')

    solution['study_id'] = solution['row_id'].apply(lambda x: x.split('_')[0])
    solution['location'] = solution['row_id'].apply(lambda x: '_'.join(x.split('_')[1:]))
    solution['condition'] = solution['row_id'].apply(get_condition)

    del solution[row_id_column_name]
    del submission[row_id_column_name]
    assert sorted(submission.columns) == sorted(target_levels)

    submission['study_id'] = solution['study_id']
    submission['location'] = solution['location']
    submission['condition'] = solution['condition']

    # condition_losses = []
    # condition_weights = []
    # for condition in ['spinal', 'foraminal', 'subarticular']:
    #     condition_indices = solution.loc[solution['condition'] == condition].index.values
    #     condition_loss = sklearn.metrics.log_loss(
    #         y_true=solution.loc[condition_indices, target_levels].values,
    #         y_pred=submission.loc[condition_indices, target_levels].values,
    #         sample_weight=solution.loc[condition_indices, 'sample_weight'].values
    #     )
    #     condition_losses.append(condition_loss)
    #     condition_weights.append(1)

    condition_losses = {}
    condition_weights = {}
    for condition in ['spinal', 'foraminal', 'subarticular']:
        condition_indices = solution.loc[solution['condition'] == condition].index.values
        condition_loss = sklearn.metrics.log_loss(
            y_true=solution.loc[condition_indices, target_levels].values,
            y_pred=submission.loc[condition_indices, target_levels].values,
            sample_weight=solution.loc[condition_indices, 'sample_weight'].values
        )
        condition_losses[condition] = condition_loss
        condition_weights[condition] = 1

    any_severe_spinal_labels = pd.Series(solution.loc[solution['condition'] == 'spinal'].groupby('study_id')['severe'].max())
    any_severe_spinal_weights = pd.Series(solution.loc[solution['condition'] == 'spinal'].groupby('study_id')['sample_weight'].max())
    any_severe_spinal_predictions = pd.Series(submission.loc[submission['condition'] == 'spinal'].groupby('study_id')['severe'].max())
    any_severe_spinal_loss = sklearn.metrics.log_loss(
        y_true=any_severe_spinal_labels,
        y_pred=any_severe_spinal_predictions,
        sample_weight=any_severe_spinal_weights
    )
    condition_losses["any"]=any_severe_spinal_loss
    condition_weights["any"] = any_severe_scalar
    return np.average(np.asarray(list(condition_losses.values())), weights=np.asarray(list(condition_weights.values()))), condition_losses

In [3]:
# submission = pd.read_csv("eval_caformer_s18.csv").sort_values("row_id", ascending=True).reset_index(drop=True).copy()
# submission = pd.read_csv("eval_caformer_s18_fold0_mask0.1.csv").sort_values("row_id", ascending=True).reset_index(drop=True).copy()
# submission = pd.read_csv("eval_caformer_s18_fold0_ax5ch_mask0.1.csv").sort_values("row_id", ascending=True).reset_index(drop=True).copy()
# submission = pd.read_csv("eval_caformer_s18_fold0_cropx0.5_ax5ch_mask0.1.csv").sort_values("row_id", ascending=True).reset_index(drop=True).copy()
# submission = pd.read_csv("eval_caformer_s18_fold0_ax5ch_mask0.1_ep100.csv").sort_values("row_id", ascending=True).reset_index(drop=True).copy()
# submission = pd.read_csv("eval_caformer_s18_ax5ch_mask0.1.csv").sort_values("row_id", ascending=True).reset_index(drop=True).copy()
submission = pd.read_csv("eval_caformer_s18_ax5ch_mask0.1.csv").sort_values("row_id", ascending=True).reset_index(drop=True).copy()

submission.head()

Unnamed: 0,row_id,normal_mild,moderate,severe
0,100206310_left_neural_foraminal_narrowing_l1_l2,0.880859,0.11554,0.00379
1,100206310_left_neural_foraminal_narrowing_l2_l3,0.595703,0.39209,0.012077
2,100206310_left_neural_foraminal_narrowing_l3_l4,0.263916,0.681152,0.054657
3,100206310_left_neural_foraminal_narrowing_l4_l5,0.047302,0.569336,0.383057
4,100206310_left_neural_foraminal_narrowing_l5_s1,0.273682,0.66748,0.05899


In [4]:
yu4u_ss_df = pd.concat([pd.read_csv(f"../../../input/axial_val_cls_preds_baseline_fold{i}.csv") for i in range(5)]).reset_index(drop=True)
preds_study = {}
levels = ["_l1_l2", "_l2_l3"]
for study_id in tqdm(yu4u_ss_df.study_id.unique()):
    study_df = yu4u_ss_df[yu4u_ss_df.study_id == study_id]
    left_preds = []
    right_preds = []
    for series_id in study_df.series_id.unique():
        series_df = study_df[study_df.series_id == series_id].sort_values("part_id")
        left_pred = np.ones((5, 3)) / 3
        right_pred = np.ones((5, 3)) / 3
        for part_id in series_df.part_id:
            left_pred[part_id] = softmax(series_df[series_df.part_id == part_id].loc[:, ["left0", "left1", "left2"]].to_numpy(), 1)
            right_pred[part_id] = softmax(series_df[series_df.part_id == part_id].loc[:, ["right0", "right1", "right2"]].to_numpy(), 1)
        left_preds.append(left_pred)
        right_preds.append(right_pred)
    left_preds = np.stack(left_preds).mean(0)
    right_preds = np.stack(right_preds).mean(0)
    for i, level in enumerate(["l1_l2", "l2_l3", "l3_l4", "l4_l5", "l5_s1"]):
        preds_study[f"{study_id}_left_subarticular_stenosis_{level}"] = left_preds[i]
        preds_study[f"{study_id}_right_subarticular_stenosis_{level}"] = right_preds[i]
preds_study

for key in tqdm(preds_study):
    yu4u_ss_pred = preds_study[key]
    tattaka_ss_pred = submission[submission.row_id == key].iloc[0, [1, 2, 3]].to_numpy()
    index = submission[submission.row_id == key].index[0]
    # submission.iloc[index,  [1, 2, 3]] = (tattaka_ss_pred + yu4u_ss_pred) / 2
    submission.iloc[index,  [1, 2, 3]] = yu4u_ss_pred
    

  0%|          | 0/1975 [00:00<?, ?it/s]

  0%|          | 0/19750 [00:00<?, ?it/s]

In [5]:
train_main = pd.read_csv("../../../input/rsna-2024-lumbar-spine-degenerative-classification/train.csv")

solution = train_main.melt(id_vars=["study_id"], var_name="full_label", value_name="severity")
solution["row_id"] = solution.apply(lambda row: str(row.study_id) + "_" + row.full_label, axis=1)
solution.severity = solution.severity.fillna("Normal/Mild")
solution.loc[solution.severity == "Normal/Mild", "normal_mild"] = 1
solution.loc[solution.severity == "Moderate", "moderate"] = 1
solution.loc[solution.severity == "Severe", "severe"] = 1

solution.loc[solution.severity == "Normal/Mild", "sample_weight"] = 1
solution.loc[solution.severity == "Moderate", "sample_weight"] = 2
solution.loc[solution.severity == "Severe", "sample_weight"] = 4

solution = solution[["study_id", "row_id", "normal_mild", "moderate", "severe", "sample_weight"]]
solution = solution.fillna(0)
solution = solution.sort_values(by=["row_id"])
solution = solution[solution["row_id"].isin(submission.row_id)].sort_values("row_id", ascending=True).reset_index(drop=True).copy()

solution.head()

Unnamed: 0,study_id,row_id,normal_mild,moderate,severe,sample_weight
0,100206310,100206310_left_neural_foraminal_narrowing_l1_l2,1.0,0.0,0.0,1.0
1,100206310,100206310_left_neural_foraminal_narrowing_l2_l3,0.0,1.0,0.0,2.0
2,100206310,100206310_left_neural_foraminal_narrowing_l3_l4,0.0,1.0,0.0,2.0
3,100206310,100206310_left_neural_foraminal_narrowing_l4_l5,0.0,0.0,1.0,4.0
4,100206310,100206310_left_neural_foraminal_narrowing_l5_s1,0.0,1.0,0.0,2.0


In [6]:
solution = solution[["row_id", "normal_mild", "moderate", "severe", "sample_weight"]]
solution.head()
solution = solution.sort_values("row_id", ascending=True).reset_index(drop=True).copy()

In [7]:
s = score(solution.copy(), submission.copy(), "row_id", 1)
f"Score {s}"

"Score (0.41363129899100265, {'spinal': 0.2786654722862965, 'foraminal': 0.5203661772346457, 'subarticular': 0.5807988198539735, 'any': 0.27469472658909494})"

In [8]:
s = score(solution.sort_values("row_id").copy(), submission.sort_values("row_id").copy(), "row_id", 1)
f"Score {s}"

"Score (0.41363129899100265, {'spinal': 0.2786654722862965, 'foraminal': 0.5203661772346457, 'subarticular': 0.5807988198539735, 'any': 0.27469472658909494})"

In [9]:
s = score(solution.sort_values("row_id", ascending=True).copy(), submission.sort_values("row_id", ascending=False).copy(), "row_id", 1)
f"Score {s}"

"Score (0.41363129899100265, {'spinal': 0.2786654722862965, 'foraminal': 0.5203661772346457, 'subarticular': 0.5807988198539735, 'any': 0.27469472658909494})"

In [10]:
s = score(solution.sort_values("row_id", ascending=False).copy(), submission.sort_values("row_id", ascending=True).copy(), "row_id", 1)
f"Score {s}"

"Score (0.4136312989910027, {'spinal': 0.2786654722862965, 'foraminal': 0.5203661772346457, 'subarticular': 0.5807988198539736, 'any': 0.27469472658909494})"

In [11]:
from sklearn.model_selection import GroupKFold

train_df = pd.read_csv(
    "../../../input/rsna-2024-lumbar-spine-degenerative-classification/train.csv"
)
train_df["fold_id"] = -1
for i, (train_index, valid_index) in enumerate(
    GroupKFold(n_splits=5).split(
        train_df, np.arange(len(train_df)), train_df.study_id
    )
):
    train_df.loc[valid_index, "fold_id"] = i


solution = train_main.melt(id_vars=["study_id"], var_name="full_label", value_name="severity")
solution["row_id"] = solution.apply(lambda row: str(row.study_id) + "_" + row.full_label, axis=1)
solution.severity = solution.severity.fillna("Normal/Mild")
solution.loc[solution.severity == "Normal/Mild", "normal_mild"] = 1
solution.loc[solution.severity == "Moderate", "moderate"] = 1
solution.loc[solution.severity == "Severe", "severe"] = 1

solution.loc[solution.severity == "Normal/Mild", "sample_weight"] = 1
solution.loc[solution.severity == "Moderate", "sample_weight"] = 2
solution.loc[solution.severity == "Severe", "sample_weight"] = 4

solution = solution[["study_id", "row_id", "normal_mild", "moderate", "severe", "sample_weight"]]
solution = solution.fillna(0)
solution = solution.sort_values(by=["row_id"])
solution = solution[solution["row_id"].isin(submission.row_id)].sort_values("row_id", ascending=True).reset_index(drop=True).copy()

solution = solution.merge(train_df[["study_id", "fold_id"]], on="study_id")
solution.head()

Unnamed: 0,study_id,row_id,normal_mild,moderate,severe,sample_weight,fold_id
0,100206310,100206310_left_neural_foraminal_narrowing_l1_l2,1.0,0.0,0.0,1.0,3
1,100206310,100206310_left_neural_foraminal_narrowing_l2_l3,0.0,1.0,0.0,2.0,3
2,100206310,100206310_left_neural_foraminal_narrowing_l3_l4,0.0,1.0,0.0,2.0,3
3,100206310,100206310_left_neural_foraminal_narrowing_l4_l5,0.0,0.0,1.0,4.0,3
4,100206310,100206310_left_neural_foraminal_narrowing_l5_s1,0.0,1.0,0.0,2.0,3


In [12]:
for i in range(5):
    s = score(solution[solution.fold_id == i].copy(), submission[solution.fold_id == i].copy(), "row_id", 1)
    print(f"Score {s}")

Score (0.43483032707375946, {'spinal': 0.3039514215893382, 'foraminal': 0.5309460217058181, 'subarticular': 0.6139926979046245, 'any': 0.2904311670952572})
Score (0.3896554942222792, {'spinal': 0.24941102063810247, 'foraminal': 0.5176114343995515, 'subarticular': 0.541962404939778, 'any': 0.24963711691168475})
Score (0.3951348284298455, {'spinal': 0.2737667581558748, 'foraminal': 0.4957743707697177, 'subarticular': 0.5503718162481523, 'any': 0.2606263685456372})
Score (0.42982022931696773, {'spinal': 0.28745165649659504, 'foraminal': 0.5483035439363011, 'subarticular': 0.6133099799778515, 'any': 0.2702157368571233})
Score (0.41730620362312965, {'spinal': 0.27716127643953187, 'foraminal': 0.5089968987078949, 'subarticular': 0.5826111781661764, 'any': 0.30045546117891553})


In [13]:
submission

Unnamed: 0,row_id,normal_mild,moderate,severe
0,100206310_left_neural_foraminal_narrowing_l1_l2,0.880859,0.115540,0.003790
1,100206310_left_neural_foraminal_narrowing_l2_l3,0.595703,0.392090,0.012077
2,100206310_left_neural_foraminal_narrowing_l3_l4,0.263916,0.681152,0.054657
3,100206310_left_neural_foraminal_narrowing_l4_l5,0.047302,0.569336,0.383057
4,100206310_left_neural_foraminal_narrowing_l5_s1,0.273682,0.667480,0.058990
...,...,...,...,...
49370,998688940_spinal_canal_stenosis_l1_l2,0.992676,0.004581,0.002865
49371,998688940_spinal_canal_stenosis_l2_l3,0.344482,0.579590,0.075989
49372,998688940_spinal_canal_stenosis_l3_l4,0.145630,0.699707,0.154419
49373,998688940_spinal_canal_stenosis_l4_l5,0.132080,0.622559,0.245605


In [14]:
submission[submission.normal_mild == 1/3]

Unnamed: 0,row_id,normal_mild,moderate,severe
480,1040921274_left_subarticular_stenosis_l1_l2,0.333333,0.333333,0.333333
481,1040921274_left_subarticular_stenosis_l2_l3,0.333333,0.333333,0.333333
490,1040921274_right_subarticular_stenosis_l1_l2,0.333333,0.333333,0.333333
491,1040921274_right_subarticular_stenosis_l2_l3,0.333333,0.333333,0.333333
505,1047914296_left_subarticular_stenosis_l1_l2,0.333333,0.333333,0.333333
...,...,...,...,...
48965,97086905_right_subarticular_stenosis_l1_l2,0.333333,0.333333,0.333333
49130,979209761_left_subarticular_stenosis_l1_l2,0.333333,0.333333,0.333333
49140,979209761_right_subarticular_stenosis_l1_l2,0.333333,0.333333,0.333333
49355,998688940_left_subarticular_stenosis_l1_l2,0.333333,0.333333,0.333333


In [15]:
train_coord_df = pd.read_csv("train_coord_df.csv")
mask = solution.study_id.isin(train_coord_df.study_id)
solution = solution[mask]
submission = submission[mask]

In [16]:
s = score(solution.sort_values("row_id", ascending=False).copy(), submission.sort_values("row_id", ascending=True).copy(), "row_id", 1)
f"Score {s}"

"Score (0.41256621088351547, {'spinal': 0.2783180248552831, 'foraminal': 0.5184602895202596, 'subarticular': 0.5787708997709454, 'any': 0.2747156293875738})"

In [17]:
for i in range(5):
    s = score(solution[solution.fold_id == i].copy(), submission[solution.fold_id == i].copy(), "row_id", 1)
    print(f"Score {s}")

Score (0.4345376837810418, {'spinal': 0.3040566903635551, 'foraminal': 0.5305699338085815, 'subarticular': 0.6132768512774619, 'any': 0.2902472596745687})
Score (0.3863045363232007, {'spinal': 0.2475044381719207, 'foraminal': 0.5099734370482171, 'subarticular': 0.5393832184929136, 'any': 0.24835705157975146})
Score (0.39322654646550786, {'spinal': 0.27405340992239047, 'foraminal': 0.4925337498283597, 'subarticular': 0.5458090229817145, 'any': 0.2605100031295668})
Score (0.42948768951601674, {'spinal': 0.2871010919100184, 'foraminal': 0.548726092387746, 'subarticular': 0.6103437260027268, 'any': 0.2717798477635758})
Score (0.41754231758844734, {'spinal': 0.2770510618244999, 'foraminal': 0.5099978666269236, 'subarticular': 0.5828176063256837, 'any': 0.30030273557668224})


In [18]:
import pandas as pd

axial_keypoints_df = pd.read_csv("axial_keypoints_df.csv")
