In [1]:
import pandas as pd
from glob import glob
import numpy as np
import pydicom
from tqdm.auto import tqdm
import h5py

def get_sorted_instance_number(series_id, series_description="Axial_T2"):
    if series_description == "Axial_T2":
        planes = 2
    else:
        planes = 0
    dicom_paths = glob(
        f"../../../input/rsna-2024-lumbar-spine-degenerative-classification/**/{series_id}/*.dcm",
        recursive=True,
    )
    positions = np.asarray(
        [
            float(pydicom.dcmread(dicom_path).ImagePositionPatient[planes])
            for dicom_path in dicom_paths
        ]
    )
    idx = np.argsort(-positions)
    return np.asarray([int(p.split("/")[-1].split(".")[0]) for p in dicom_paths])[idx]

In [2]:
axial_keypoints_df = pd.concat(
    [
        pd.read_csv("../../../input/axial_val_keypoint_preds_fold0.csv"),
        pd.read_csv("../../../input/axial_val_keypoint_preds_fold1.csv"),
        pd.read_csv("../../../input/axial_val_keypoint_preds_fold2.csv"),
        pd.read_csv("../../../input/axial_val_keypoint_preds_fold3.csv"),
        pd.read_csv("../../../input/axial_val_keypoint_preds_fold4.csv"),
    ]
).reset_index(drop=True)
axial_keypoints_df = axial_keypoints_df.rename(
    columns={
        "right_x": "x_l",
        "right_y": "y_l",
        "left_x": "x_r",
        "left_y": "y_r",
        "part_id": "level",
    }
).replace(
    {"level": {0: "L1/L2", 1: "L2/L3", 2: "L3/L4", 3: "L4/L5", 4: "L5/S1"}},
    inplace=False,
)
axial_keypoints_df.loc[:, ["x_l", "x_r", "y_l", "y_r"]] = (
    axial_keypoints_df.loc[:, ["x_l", "x_r", "y_l", "y_r"]] / 512
)
series_ids = axial_keypoints_df.groupby("series_id").count()[axial_keypoints_df.groupby("series_id").count().instance_number > 1].index

axial_keypoints_df = axial_keypoints_df[axial_keypoints_df.series_id.isin(series_ids)]
axial_t2_df = axial_keypoints_df.loc[
    :,
    [
        "study_id",
        "series_id",
        "level",
        "x_l",
        "x_r",
        "y_l",
        "y_r",
        "instance_number",
    ],
].rename(
    columns={
        "series_id": "series_id_ax",
        "x_l": "x_l_ax",
        "y_l": "y_l_ax",
        "x_r": "x_r_ax",
        "y_r": "y_r_ax",
        "instance_number": "instance_number_ax",
    }
)

  axial_keypoints_df.loc[:, ["x_l", "x_r", "y_l", "y_r"]] = (
  axial_keypoints_df.loc[:, ["x_l", "x_r", "y_l", "y_r"]] = (
  axial_keypoints_df.loc[:, ["x_l", "x_r", "y_l", "y_r"]] = (
  axial_keypoints_df.loc[:, ["x_l", "x_r", "y_l", "y_r"]] = (


In [3]:
sagittal_t1_pred_df = pd.read_csv(f"Sagittal_T1_pred.csv").rename(
    columns={
        "series_id_x": "series_id_st1",
        "x_x_pred": "x_st1",
        "y_x_pred": "y_st1",
    }
)
sagittal_t2_pred_df = pd.read_csv(f"Sagittal_T2-STIR_pred.csv").rename(
    columns={
        "series_id_y": "series_id_st2",
        "x_y_pred": "x_st2",
        "y_y_pred": "y_st2",
    }
)
study_id = pd.read_csv(
    "../../../input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv"
).loc[:, ["study_id", "series_id"]]
sagittal_t1_pred_df = sagittal_t1_pred_df.merge(study_id.rename(columns={"series_id":"series_id_st1"}), on="series_id_st1")
sagittal_t2_pred_df = sagittal_t2_pred_df.merge(study_id.rename(columns={"series_id":"series_id_st2"}), on="series_id_st2")

train_coord_df = pd.read_csv(f"../../../input/train_level_df.csv")
train_coord_df = train_coord_df.merge(sagittal_t1_pred_df, on=["study_id", "level"]).merge(sagittal_t2_pred_df, on=["study_id", "level"]).merge(
    axial_t2_df,
    on=["study_id", "level"],
)
train_coord_df = train_coord_df[
    ~(
        train_coord_df.left_neural_foraminal_narrowing.isna()
        | train_coord_df.right_neural_foraminal_narrowing.isna()
        | train_coord_df.spinal_canal_stenosis.isna()
        | train_coord_df.left_subarticular_stenosis.isna()
        | train_coord_df.right_subarticular_stenosis.isna()
    )
]

fn_df = train_coord_df[
    ((train_coord_df["x_st1"] == -1) | (train_coord_df["x_st2"] == -1))
]
# 未検出のうち、端っこ以外が未検出なものはkeypoint間の距離がバグるので除外
train_coord_df = train_coord_df[
    ~train_coord_df.study_id.isin(
        fn_df[~fn_df.level.isin(["L1/L2", "L5/S1"])].study_id
    )
]

# 一部検出できなかったやつ除外
train_coord_df = train_coord_df[
    ~((train_coord_df["x_st1"] == -1) | (train_coord_df["x_st2"] == -1))
]

train_coord_df = train_coord_df.reset_index(drop=True)

train_coord_df

Unnamed: 0,study_id,level,spinal_canal_stenosis,left_neural_foraminal_narrowing,right_neural_foraminal_narrowing,left_subarticular_stenosis,right_subarticular_stenosis,fold_id,series_id_st1,x_st1,y_st1,series_id_st2,x_st2,y_st2,series_id_ax,x_l_ax,x_r_ax,y_l_ax,y_r_ax,instance_number_ax
0,4003253,L1/L2,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,4,1054713880,0.500000,0.332031,702807833,0.503906,0.355469,2448190387,0.550781,0.449219,0.500000,0.494141,3
1,4003253,L2/L3,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,4,1054713880,0.492188,0.441406,702807833,0.500000,0.464844,2448190387,0.552734,0.453125,0.494141,0.486328,11
2,4003253,L3/L4,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,4,1054713880,0.480469,0.554688,702807833,0.500000,0.574219,2448190387,0.541016,0.447266,0.494141,0.486328,19
3,4003253,L4/L5,Normal/Mild,Moderate,Moderate,Moderate,Normal/Mild,4,1054713880,0.484375,0.656250,702807833,0.519531,0.667969,2448190387,0.541016,0.462891,0.496094,0.494141,28
4,4003253,L5/S1,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,4,1054713880,0.507812,0.750000,702807833,0.546875,0.753906,2448190387,0.544922,0.453125,0.498047,0.496094,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10493,4290709089,L1/L2,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,0,4237840455,0.566406,0.253906,3274612423,0.562500,0.277344,3390218084,0.554688,0.455078,0.556641,0.560547,2
10494,4290709089,L2/L3,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,0,4237840455,0.539062,0.363281,3274612423,0.546875,0.386719,3390218084,0.552734,0.457031,0.568359,0.574219,5
10495,4290709089,L3/L4,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,0,4237840455,0.527344,0.476562,3274612423,0.542969,0.496094,3390218084,0.548828,0.458984,0.556641,0.564453,10
10496,4290709089,L4/L5,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,0,4237840455,0.531250,0.578125,3274612423,0.570312,0.597656,3390218084,0.544922,0.464844,0.531250,0.531250,15


In [4]:
new_instance_number_ax = []
h5f = h5py.File("../../../input/volume_orig_res.h5")

for i in tqdm(range(len(train_coord_df))):
    row = train_coord_df.iloc[i]
    series_id_ax = row.series_id_ax

    volume = h5f[str(series_id_ax)]
    instance_number_all = get_sorted_instance_number(
        series_id_ax, series_description="Axial_T2"
    )
    instance_number_ax = (
        np.arange(volume.shape[0])[
            instance_number_all == row.instance_number_ax
        ][0]
        / volume.shape[0]
    )
    new_instance_number_ax.append(instance_number_ax)

train_coord_df.loc[:, "instance_number_ax"] = new_instance_number_ax

train_coord_df.to_csv("train_coord_df.csv", index=False)
train_coord_df

  0%|          | 0/10498 [00:00<?, ?it/s]

  train_coord_df.loc[:, "instance_number_ax"] = new_instance_number_ax


Unnamed: 0,study_id,level,spinal_canal_stenosis,left_neural_foraminal_narrowing,right_neural_foraminal_narrowing,left_subarticular_stenosis,right_subarticular_stenosis,fold_id,series_id_st1,x_st1,y_st1,series_id_st2,x_st2,y_st2,series_id_ax,x_l_ax,x_r_ax,y_l_ax,y_r_ax,instance_number_ax
0,4003253,L1/L2,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,4,1054713880,0.500000,0.332031,702807833,0.503906,0.355469,2448190387,0.550781,0.449219,0.500000,0.494141,0.046512
1,4003253,L2/L3,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,4,1054713880,0.492188,0.441406,702807833,0.500000,0.464844,2448190387,0.552734,0.453125,0.494141,0.486328,0.232558
2,4003253,L3/L4,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,4,1054713880,0.480469,0.554688,702807833,0.500000,0.574219,2448190387,0.541016,0.447266,0.494141,0.486328,0.418605
3,4003253,L4/L5,Normal/Mild,Moderate,Moderate,Moderate,Normal/Mild,4,1054713880,0.484375,0.656250,702807833,0.519531,0.667969,2448190387,0.541016,0.462891,0.496094,0.494141,0.627907
4,4003253,L5/S1,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,4,1054713880,0.507812,0.750000,702807833,0.546875,0.753906,2448190387,0.544922,0.453125,0.498047,0.496094,0.790698
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10493,4290709089,L1/L2,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,0,4237840455,0.566406,0.253906,3274612423,0.562500,0.277344,3390218084,0.554688,0.455078,0.556641,0.560547,0.043478
10494,4290709089,L2/L3,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,0,4237840455,0.539062,0.363281,3274612423,0.546875,0.386719,3390218084,0.552734,0.457031,0.568359,0.574219,0.173913
10495,4290709089,L3/L4,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,0,4237840455,0.527344,0.476562,3274612423,0.542969,0.496094,3390218084,0.548828,0.458984,0.556641,0.564453,0.391304
10496,4290709089,L4/L5,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,0,4237840455,0.531250,0.578125,3274612423,0.570312,0.597656,3390218084,0.544922,0.464844,0.531250,0.531250,0.608696


In [4]:
import pandas as pd
train_coord_df = pd.read_csv("train_coord_df.csv")
train_coord_df[train_coord_df.study_id == 4219508579]

Unnamed: 0,study_id,level,spinal_canal_stenosis,left_neural_foraminal_narrowing,right_neural_foraminal_narrowing,left_subarticular_stenosis,right_subarticular_stenosis,fold_id,series_id_st1,x_st1,y_st1,series_id_st2,x_st2,y_st2,series_id_ax,x_l_ax,x_r_ax,y_l_ax,y_r_ax,instance_number_ax
10302,4219508579,L1/L2,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,4,2446251925,0.527344,0.289062,3181934180,0.527344,0.316406,2888699611,0.566406,0.46875,0.515625,0.519531,0.133333
10303,4219508579,L2/L3,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,4,2446251925,0.488281,0.390625,3181934180,0.492188,0.421875,2888699611,0.554688,0.458984,0.521484,0.529297,0.466667
10304,4219508579,L3/L4,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,4,2446251925,0.46875,0.503906,3181934180,0.488281,0.53125,2888699611,0.548828,0.460938,0.523438,0.53125,0.8
10305,4219508579,L4/L5,Normal/Mild,Moderate,Moderate,Normal/Mild,Normal/Mild,4,2446251925,0.476562,0.601562,3181934180,0.527344,0.609375,1950252622,0.472656,0.388672,0.472656,0.466797,0.2


In [6]:
train_coord_df[train_coord_df.study_id.isin((train_coord_df.groupby("study_id").count()[train_coord_df.groupby("study_id").count().series_id_ax == 3].index))]

Unnamed: 0,study_id,level,spinal_canal_stenosis,left_neural_foraminal_narrowing,right_neural_foraminal_narrowing,left_subarticular_stenosis,right_subarticular_stenosis,fold_id,series_id_st1,x_st1,y_st1,series_id_st2,x_st2,y_st2,series_id_ax,x_l_ax,x_r_ax,y_l_ax,y_r_ax,instance_number_ax
94,46494080,L3/L4,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,1,4061588226,0.507812,0.507812,1763376930,0.515625,0.562500,1543341132,0.552734,0.462891,0.507812,0.513672,0.133333
95,46494080,L4/L5,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Moderate,1,4061588226,0.503906,0.609375,1763376930,0.527344,0.671875,1543341132,0.552734,0.458984,0.507812,0.509766,0.533333
96,46494080,L5/S1,Normal/Mild,Normal/Mild,Moderate,Moderate,Normal/Mild,1,4061588226,0.511719,0.699219,1763376930,0.554688,0.746094,1543341132,0.556641,0.466797,0.507812,0.509766,0.800000
387,159721286,L3/L4,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,Moderate,3,4204680939,0.527344,0.527344,1040667739,0.542969,0.554688,107069089,0.542969,0.462891,0.501953,0.511719,0.133333
388,159721286,L4/L5,Moderate,Moderate,Normal/Mild,Moderate,Severe,3,4204680939,0.523438,0.625000,1040667739,0.546875,0.648438,107069089,0.541016,0.474609,0.492188,0.496094,0.533333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9877,4050147189,L4/L5,Moderate,Moderate,Normal/Mild,Severe,Severe,4,779545816,0.500000,0.621094,954135845,0.523438,0.644531,824578184,0.521484,0.455078,0.521484,0.515625,0.466667
9878,4050147189,L5/S1,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,4,779545816,0.511719,0.714844,954135845,0.554688,0.718750,824578184,0.533203,0.453125,0.513672,0.505859,0.733333
10361,4232806580,L3/L4,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,3,3778706234,0.472656,0.437500,346177997,0.480469,0.460938,3008065164,0.539062,0.447266,0.525391,0.523438,0.187500
10362,4232806580,L4/L5,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Moderate,3,3778706234,0.460938,0.527344,346177997,0.480469,0.546875,3008065164,0.535156,0.458984,0.515625,0.515625,0.500000
