In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np 
import pandas as pd

from pathlib import Path

In [3]:
train_dir = Path('E:\data\RSNA2024')

In [4]:
class CFG:
    random_seed = 42
    
    ROOT_FOLDER = train_dir
    IMAGES_DIR = ROOT_FOLDER / 'train_images'
    TRAIN_CSV = ROOT_FOLDER / 'train.csv'
    FILES_CSV = ROOT_FOLDER / 'train_files.csv'
    TRAIN_DESC_CSV = ROOT_FOLDER / 'train_series_descriptions.csv'
    COORDS_CSV = ROOT_FOLDER / 'train_label_coordinates.csv'

### Train data

In [5]:
train_df = pd.read_csv(CFG.TRAIN_CSV)
train_desc_df = pd.read_csv(CFG.TRAIN_DESC_CSV)

train_df.shape, train_desc_df.shape

((1975, 26), (6294, 3))

In [6]:
train_df.head()

Unnamed: 0,study_id,spinal_canal_stenosis_l1_l2,spinal_canal_stenosis_l2_l3,spinal_canal_stenosis_l3_l4,spinal_canal_stenosis_l4_l5,spinal_canal_stenosis_l5_s1,left_neural_foraminal_narrowing_l1_l2,left_neural_foraminal_narrowing_l2_l3,left_neural_foraminal_narrowing_l3_l4,left_neural_foraminal_narrowing_l4_l5,...,left_subarticular_stenosis_l1_l2,left_subarticular_stenosis_l2_l3,left_subarticular_stenosis_l3_l4,left_subarticular_stenosis_l4_l5,left_subarticular_stenosis_l5_s1,right_subarticular_stenosis_l1_l2,right_subarticular_stenosis_l2_l3,right_subarticular_stenosis_l3_l4,right_subarticular_stenosis_l4_l5,right_subarticular_stenosis_l5_s1
0,4003253,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
1,4646740,Normal/Mild,Normal/Mild,Moderate,Severe,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Severe,Normal/Mild,Normal/Mild,Moderate,Moderate,Moderate,Normal/Mild
2,7143189,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,...,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
3,8785691,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
4,10728036,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,...,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild


In [7]:
train_df.iloc[1]

study_id                                      4646740
spinal_canal_stenosis_l1_l2               Normal/Mild
spinal_canal_stenosis_l2_l3               Normal/Mild
spinal_canal_stenosis_l3_l4                  Moderate
spinal_canal_stenosis_l4_l5                    Severe
spinal_canal_stenosis_l5_s1               Normal/Mild
left_neural_foraminal_narrowing_l1_l2     Normal/Mild
left_neural_foraminal_narrowing_l2_l3     Normal/Mild
left_neural_foraminal_narrowing_l3_l4     Normal/Mild
left_neural_foraminal_narrowing_l4_l5        Moderate
left_neural_foraminal_narrowing_l5_s1        Moderate
right_neural_foraminal_narrowing_l1_l2    Normal/Mild
right_neural_foraminal_narrowing_l2_l3    Normal/Mild
right_neural_foraminal_narrowing_l3_l4       Moderate
right_neural_foraminal_narrowing_l4_l5       Moderate
right_neural_foraminal_narrowing_l5_s1    Normal/Mild
left_subarticular_stenosis_l1_l2          Normal/Mild
left_subarticular_stenosis_l2_l3          Normal/Mild
left_subarticular_stenosis_l

In [8]:
train_df.study_id.nunique()

1975

### Coordinates

In [9]:
coords_df = pd.read_csv(CFG.COORDS_CSV)
files_df = pd.read_csv(CFG.FILES_CSV)

coords_df.shape, files_df.shape

((48692, 7), (147218, 5))

In [10]:
coords_df.study_id.nunique(), coords_df.condition.nunique(), coords_df.level.nunique()

(1974, 5, 5)

In [11]:
coords_df.condition.unique(), coords_df.level.unique()

(array(['Spinal Canal Stenosis', 'Right Neural Foraminal Narrowing',
        'Left Neural Foraminal Narrowing', 'Left Subarticular Stenosis',
        'Right Subarticular Stenosis'], dtype=object),
 array(['L1/L2', 'L2/L3', 'L3/L4', 'L4/L5', 'L5/S1'], dtype=object))

In [12]:
coords_df.series_id.nunique()

6291

In [13]:
coords_df['id'] = coords_df.apply(lambda row: str(row['study_id']) + str(row['series_id']), axis=1)
train_desc_df['id'] = train_desc_df.apply(lambda row: str(row['study_id']) + str(row['series_id']), axis=1)

In [52]:
coords_df.sample(2)

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y,id,plane
20027,1784445928,3914663632,17,Left Subarticular Stenosis,L2/L3,284.432203,237.559322,17844459283914663632,Axial T2
10682,953218250,468536734,7,Left Subarticular Stenosis,L2/L3,176.642296,165.523546,953218250468536734,Axial T2


In [53]:
coords_df['condition'] = coords_df.apply(lambda row: ''.join([w[0] for w in row['condition'].split(' ')]), axis=1)

In [56]:
coords_df.condition.nunique()

5

In [54]:
coords_df.head(10)

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y,id,plane
0,4003253,702807833,8,SCS,L1/L2,322.831858,227.964602,4003253702807833,Sagittal T2/STIR
1,4003253,702807833,8,SCS,L2/L3,320.571429,295.714286,4003253702807833,Sagittal T2/STIR
2,4003253,702807833,8,SCS,L3/L4,323.030303,371.818182,4003253702807833,Sagittal T2/STIR
3,4003253,702807833,8,SCS,L4/L5,335.292035,427.327434,4003253702807833,Sagittal T2/STIR
4,4003253,702807833,8,SCS,L5/S1,353.415929,483.964602,4003253702807833,Sagittal T2/STIR
5,4003253,1054713880,4,RNFN,L4/L5,187.961759,251.839388,40032531054713880,Sagittal T1
6,4003253,1054713880,4,RNFN,L5/S1,198.240918,285.613767,40032531054713880,Sagittal T1
7,4003253,1054713880,5,RNFN,L3/L4,187.227533,210.722753,40032531054713880,Sagittal T1
8,4003253,1054713880,6,RNFN,L1/L2,194.56979,127.755258,40032531054713880,Sagittal T1
9,4003253,1054713880,6,RNFN,L2/L3,191.632887,165.93499,40032531054713880,Sagittal T1


In [15]:
# coords_df[coords_df.study_id == 4003253]

In [16]:
train_desc_df[train_desc_df['id'] == '4003253702807833'].series_description.values[0]

'Sagittal T2/STIR'

In [17]:
coords_df['plane'] = coords_df.apply(lambda row: train_desc_df[train_desc_df['id'] == row['id']].series_description.values[0], axis=1)

In [18]:
coords_df.sample(5)

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y,id,plane
15277,1373010257,2466299608,13,Right Neural Foraminal Narrowing,L1/L2,152.81638,63.577279,13730102572466299608,Sagittal T1
15006,1341339532,2001288014,10,Right Neural Foraminal Narrowing,L5/S1,305.010183,398.036664,13413395322001288014,Sagittal T1
23766,2139287338,1271026495,26,Left Subarticular Stenosis,L2/L3,351.609105,334.568289,21392873381271026495,Axial T2
32443,2864325627,3367692930,11,Left Subarticular Stenosis,L4/L5,279.292176,300.365111,28643256273367692930,Axial T2
257,29931867,1152175603,9,Right Neural Foraminal Narrowing,L2/L3,162.322026,146.899633,299318671152175603,Sagittal T1


In [19]:
# check canal stenosis is noy only in axial plane
coords_df[(coords_df.condition == 'Spinal Canal Stenosis') & (coords_df.plane != 'Axial T2')].sample()

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y,id,plane
44121,3882759508,963820603,11,Spinal Canal Stenosis,L4/L5,230.060342,306.20831,3882759508963820603,Sagittal T2/STIR


In [20]:
# get the positive slices
coords_df.groupby(['study_id','series_id']).instance_number.unique()

study_id    series_id 
4003253     702807833                              [8]
            1054713880               [4, 5, 6, 11, 12]
            2448190387          [3, 4, 11, 19, 28, 35]
4646740     3201256954    [15, 16, 22, 28, 29, 34, 40]
            3486248476           [5, 6, 7, 15, 16, 17]
                                      ...             
4287160193  1507070277                             [8]
            1820446240          [4, 9, 10, 16, 22, 28]
4290709089  3274612423                             [9]
            3390218084    [2, 3, 5, 6, 10, 15, 20, 21]
            4237840455                  [4, 5, 11, 12]
Name: instance_number, Length: 6291, dtype: object

In [38]:
coords_df.id.nunique()

6291

In [21]:
pos_slices = coords_df.groupby(['study_id','series_id']).instance_number.unique().apply(list).reset_index(name='slice').explode('slice')
pos_slices.shape

(24546, 3)

In [22]:
# coords_df[coords_df.instance_number > 100]

In [23]:
pos_slices

Unnamed: 0,study_id,series_id,slice
0,4003253,702807833,8
1,4003253,1054713880,4
1,4003253,1054713880,5
1,4003253,1054713880,6
1,4003253,1054713880,11
...,...,...,...
6289,4290709089,3390218084,21
6290,4290709089,4237840455,4
6290,4290709089,4237840455,5
6290,4290709089,4237840455,11


In [24]:
pos_slices.groupby('study_id').slice.nunique().mean()/3

3.6653157716987503

### train_df

In [25]:
# look at categories
for f in ['condition','level']:
    print(coords_df[f].value_counts())
    print('-'*50);print();

condition
Left Neural Foraminal Narrowing     9860
Right Neural Foraminal Narrowing    9859
Spinal Canal Stenosis               9753
Right Subarticular Stenosis         9612
Left Subarticular Stenosis          9608
Name: count, dtype: int64
--------------------------------------------------

level
L3/L4    9858
L4/L5    9858
L5/S1    9845
L2/L3    9661
L1/L2    9470
Name: count, dtype: int64
--------------------------------------------------



In [26]:
pd.crosstab(coords_df.condition, coords_df.level)

level,L1/L2,L2/L3,L3/L4,L4/L5,L5/S1
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Left Neural Foraminal Narrowing,1972,1972,1972,1972,1972
Left Subarticular Stenosis,1810,1892,1971,1971,1964
Right Neural Foraminal Narrowing,1972,1972,1971,1972,1972
Right Subarticular Stenosis,1812,1891,1971,1971,1967
Spinal Canal Stenosis,1904,1934,1973,1972,1970


### Files

In [33]:
files_df.head(3)

Unnamed: 0,patient,series,image,rows,columns
0,100206310,1012284084,1,320,320
1,100206310,1012284084,10,320,320
2,100206310,1012284084,11,320,320


In [28]:
files_df.rows.min(), files_df.rows.max(), files_df['columns'].min(), files_df['columns'].max(), 

(192, 1024, 224, 1024)

In [34]:
# files_df.image.max(), files_df.image.mean()

In [30]:
# file names do not correspond to file count
files_df[files_df.image == 5049]

Unnamed: 0,patient,series,image,rows,columns
59719,2581283971,2683794967,5049,320,320


In [31]:
# max/mean images per patient
files_df.groupby(['patient','series']).image.count().max(), files_df.groupby(['patient','series']).image.count().mean()

(192, 23.390212901175722)

In [32]:
# mean positive imgs per series
coords_df.groupby(['study_id','series_id']).instance_number.nunique().mean()

3.9017644253695756

In [37]:
files_df.groupby(['patient','series']).series.count()

patient     series    
4003253     702807833     15
            1054713880    15
            2448190387    43
4646740     3201256954    54
            3486248476    17
                          ..
4287160193  1507070277    15
            1820446240    42
4290709089  3274612423    15
            3390218084    23
            4237840455    15
Name: series, Length: 6294, dtype: int64

### Analyze one example

In [46]:
patient = 4003253
train_df[train_df['study_id'] == patient].iloc[0]

study_id                                      4003253
spinal_canal_stenosis_l1_l2               Normal/Mild
spinal_canal_stenosis_l2_l3               Normal/Mild
spinal_canal_stenosis_l3_l4               Normal/Mild
spinal_canal_stenosis_l4_l5               Normal/Mild
spinal_canal_stenosis_l5_s1               Normal/Mild
left_neural_foraminal_narrowing_l1_l2     Normal/Mild
left_neural_foraminal_narrowing_l2_l3     Normal/Mild
left_neural_foraminal_narrowing_l3_l4     Normal/Mild
left_neural_foraminal_narrowing_l4_l5        Moderate
left_neural_foraminal_narrowing_l5_s1     Normal/Mild
right_neural_foraminal_narrowing_l1_l2    Normal/Mild
right_neural_foraminal_narrowing_l2_l3    Normal/Mild
right_neural_foraminal_narrowing_l3_l4       Moderate
right_neural_foraminal_narrowing_l4_l5       Moderate
right_neural_foraminal_narrowing_l5_s1    Normal/Mild
left_subarticular_stenosis_l1_l2          Normal/Mild
left_subarticular_stenosis_l2_l3          Normal/Mild
left_subarticular_stenosis_l

In [45]:
coords_df[coords_df['study_id'] == patient]

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y,id,plane
0,4003253,702807833,8,Spinal Canal Stenosis,L1/L2,322.831858,227.964602,4003253702807833,Sagittal T2/STIR
1,4003253,702807833,8,Spinal Canal Stenosis,L2/L3,320.571429,295.714286,4003253702807833,Sagittal T2/STIR
2,4003253,702807833,8,Spinal Canal Stenosis,L3/L4,323.030303,371.818182,4003253702807833,Sagittal T2/STIR
3,4003253,702807833,8,Spinal Canal Stenosis,L4/L5,335.292035,427.327434,4003253702807833,Sagittal T2/STIR
4,4003253,702807833,8,Spinal Canal Stenosis,L5/S1,353.415929,483.964602,4003253702807833,Sagittal T2/STIR
5,4003253,1054713880,4,Right Neural Foraminal Narrowing,L4/L5,187.961759,251.839388,40032531054713880,Sagittal T1
6,4003253,1054713880,4,Right Neural Foraminal Narrowing,L5/S1,198.240918,285.613767,40032531054713880,Sagittal T1
7,4003253,1054713880,5,Right Neural Foraminal Narrowing,L3/L4,187.227533,210.722753,40032531054713880,Sagittal T1
8,4003253,1054713880,6,Right Neural Foraminal Narrowing,L1/L2,194.56979,127.755258,40032531054713880,Sagittal T1
9,4003253,1054713880,6,Right Neural Foraminal Narrowing,L2/L3,191.632887,165.93499,40032531054713880,Sagittal T1
