In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from IPython.display import display

In [2]:
dataset_path = Path("/Users/mkurtys/datasets/spine")

In [3]:
condition_severity_map = {
    "Normal/Mild": 0,
    "Moderate": 1,
    "Severe": 2
}

level_code_map = {
    "l1_l2": 0,
    "l2_l3": 1,
    "l3_l4": 2,
    "l4_l5": 3,
    "l5_s1": 4
}

conditions_spec_ordered = ['spinal_canal_stenosis_l1_l2', 'spinal_canal_stenosis_l2_l3', 'spinal_canal_stenosis_l3_l4', 'spinal_canal_stenosis_l4_l5', 'spinal_canal_stenosis_l5_s1', 'left_neural_foraminal_narrowing_l1_l2', 'left_neural_foraminal_narrowing_l2_l3', 'left_neural_foraminal_narrowing_l3_l4', 'left_neural_foraminal_narrowing_l4_l5', 'left_neural_foraminal_narrowing_l5_s1', 'right_neural_foraminal_narrowing_l1_l2', 'right_neural_foraminal_narrowing_l2_l3', 'right_neural_foraminal_narrowing_l3_l4', 'right_neural_foraminal_narrowing_l4_l5', 'right_neural_foraminal_narrowing_l5_s1', 'left_subarticular_stenosis_l1_l2', 'left_subarticular_stenosis_l2_l3', 'left_subarticular_stenosis_l3_l4', 'left_subarticular_stenosis_l4_l5', 'left_subarticular_stenosis_l5_s1', 'right_subarticular_stenosis_l1_l2', 'right_subarticular_stenosis_l2_l3', 'right_subarticular_stenosis_l3_l4', 'right_subarticular_stenosis_l4_l5', 'right_subarticular_stenosis_l5_s1']
condition_spec_to_idx = {c: i for i, c in enumerate(conditions_spec_ordered)}
condition_spec_from_idx = {i: c for i, c in enumerate(conditions_spec_ordered)}

In [6]:
descriptions = pd.read_csv(dataset_path/"train_series_descriptions.csv")
coordinates = pd.read_csv(dataset_path/"train_label_coordinates.csv")
submissions = pd.read_csv(dataset_path/"sample_submission.csv")
train = pd.read_csv(dataset_path/"train.csv")

train_melt = train.melt(id_vars="study_id", var_name="condition_spec", value_name="severity").sort_values(["study_id", "condition_spec"])
train_melt["severity_code"] = train_melt["severity"].map(condition_severity_map)
train_melt["level"] = train_melt.apply(lambda x: "_".join(x["condition_spec"].rsplit("_", maxsplit=2)[1:]), axis=1)
train_melt["condition"] = train_melt.apply(lambda x: x["condition_spec"].replace("left_", "").replace("right_", "").rsplit("_", maxsplit=2)[0], axis=1)

for c in train.columns[1:]:
    train[c] = train[c].map(condition_severity_map)
train.fillna(-1, inplace=True)
for c in train.columns[1:]:
    train[c] = train[c].astype(int)

coordinates["instance_number"] = coordinates["instance_number"].astype(int)
coordinates["instance_number"] = coordinates["instance_number"]
coordinates["level"] = coordinates["level"].str.lower().str.replace("/", "_")
coordinates["condition_spec"] = coordinates.apply(lambda x: x["condition"].lower().replace(" ", "_") + "_" + x["level"], axis=1)
coordinates["condition"] = coordinates.apply(lambda x: x["condition_spec"].replace("left_", "").replace("right_", "").rsplit("_", maxsplit=2)[0], axis=1)

# coordinates = pd.merge(on=["study_id", "condition_level"], left=coordinates, right=train_melt, how="left")
coordinates = pd.merge(on=["study_id", "series_id"], left=coordinates, right=descriptions, how="left")


In [8]:
coordinates.head(1)

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y,condition_spec,series_description
0,4003253,702807833,8,spinal_canal_stenosis,l1_l2,322.831858,227.964602,spinal_canal_stenosis_l1_l2,Sagittal T2/STIR


In [9]:
train_melt.head(1)

Unnamed: 0,study_id,condition_spec,severity,severity_code,level,condition
9875,4003253,left_neural_foraminal_narrowing_l1_l2,Normal/Mild,0.0,l1_l2,neural_foraminal_narrowing


In [12]:
ct = pd.merge(train_melt[['study_id', 'condition_spec', 'severity']], coordinates[['study_id', 'series_id', 'condition_spec', 'x', 'y']],
         on=['study_id', 'condition_spec'], how='left')
ct.head(1)

Unnamed: 0,study_id,condition_spec,severity,series_id,x,y
0,4003253,left_neural_foraminal_narrowing_l1_l2,Normal/Mild,1054714000.0,196.070671,126.021201


In [13]:
ct[ct['x'].isna()]

Unnamed: 0,study_id,condition_spec,severity,series_id,x,y
405,46494080,left_subarticular_stenosis_l1_l2,,,,
406,46494080,left_subarticular_stenosis_l2_l3,,,,
415,46494080,right_subarticular_stenosis_l1_l2,,,,
416,46494080,right_subarticular_stenosis_l2_l3,,,,
605,64092030,left_subarticular_stenosis_l1_l2,,,,
...,...,...,...,...,...,...
48766,4232806580,right_subarticular_stenosis_l2_l3,,,,
48770,4232806580,spinal_canal_stenosis_l1_l2,Normal/Mild,,,
48771,4232806580,spinal_canal_stenosis_l2_l3,Normal/Mild,,,
48955,4255570773,left_subarticular_stenosis_l1_l2,,,,


In [16]:
len(ct[ct['x'].isna()].groupby('study_id'))

186

In [17]:
ct[(~ct['severity'].isna()) & (ct['x'].isna())]

Unnamed: 0,study_id,condition_spec,severity,series_id,x,y
770,74782131,spinal_canal_stenosis_l1_l2,Normal/Mild,,,
3095,267842058,spinal_canal_stenosis_l1_l2,Moderate,,,
3120,267989673,spinal_canal_stenosis_l1_l2,Normal/Mild,,,
3395,293713262,spinal_canal_stenosis_l1_l2,Normal/Mild,,,
3396,293713262,spinal_canal_stenosis_l2_l3,Normal/Mild,,,
...,...,...,...,...,...,...
47495,4127969449,spinal_canal_stenosis_l1_l2,Normal/Mild,,,
47595,4137194670,spinal_canal_stenosis_l1_l2,Normal/Mild,,,
47795,4146959702,spinal_canal_stenosis_l1_l2,Normal/Mild,,,
48770,4232806580,spinal_canal_stenosis_l1_l2,Normal/Mild,,,
