In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np 
import pandas as pd

from pathlib import Path

In [3]:
train_dir = Path('E:\data\RSNA2024')

In [4]:
class CFG:
    random_seed = 42

    image_size = 256
    
    ROOT_FOLDER = train_dir / 'original'
    DEST_FOLDER = train_dir
    PNG_DIR = DEST_FOLDER / f'pngs_{image_size}'
    IMAGES_DIR = ROOT_FOLDER / 'train_images'
    TRAIN_CSV = ROOT_FOLDER / 'train.csv'
    FILES_CSV = ROOT_FOLDER / 'train_files.csv'
    TRAIN_DESC_CSV = ROOT_FOLDER / 'train_series_descriptions.csv'
    COORDS_CSV = ROOT_FOLDER / 'train_label_coordinates.csv'

### Train_df

In [5]:
train_df = pd.read_csv(CFG.TRAIN_CSV)
train_desc_df = pd.read_csv(CFG.TRAIN_DESC_CSV)

train_df.shape, train_desc_df.shape

((1975, 26), (6294, 3))

In [6]:
train_desc_df['ss_id'] = train_desc_df.apply(lambda row: f'{str(row["study_id"])}_{str(row["series_id"])}', axis=1)

In [7]:
train_df.head()

Unnamed: 0,study_id,spinal_canal_stenosis_l1_l2,spinal_canal_stenosis_l2_l3,spinal_canal_stenosis_l3_l4,spinal_canal_stenosis_l4_l5,spinal_canal_stenosis_l5_s1,left_neural_foraminal_narrowing_l1_l2,left_neural_foraminal_narrowing_l2_l3,left_neural_foraminal_narrowing_l3_l4,left_neural_foraminal_narrowing_l4_l5,...,left_subarticular_stenosis_l1_l2,left_subarticular_stenosis_l2_l3,left_subarticular_stenosis_l3_l4,left_subarticular_stenosis_l4_l5,left_subarticular_stenosis_l5_s1,right_subarticular_stenosis_l1_l2,right_subarticular_stenosis_l2_l3,right_subarticular_stenosis_l3_l4,right_subarticular_stenosis_l4_l5,right_subarticular_stenosis_l5_s1
0,4003253,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
1,4646740,Normal/Mild,Normal/Mild,Moderate,Severe,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Severe,Normal/Mild,Normal/Mild,Moderate,Moderate,Moderate,Normal/Mild
2,7143189,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,...,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
3,8785691,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,...,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
4,10728036,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,...,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild


In [8]:
train_df.isna().sum(axis=1).sum()

572

In [9]:
cols = train_df.columns[1:]
# first = [c.split('_')[:-2] for c in cols]
# last = [c.split('_')[-2:] for c in cols]

cols = [c.split('_') for c in cols]
cols = [''.join([i[0] if len(i) > 2 else i for i in c]).upper() for c in cols]

cols = ['study_id'] + cols

cols[:5]

['study_id', 'SCSL1L2', 'SCSL2L3', 'SCSL3L4', 'SCSL4L5']

In [10]:
dict(zip(train_df.columns, cols))

{'study_id': 'study_id',
 'spinal_canal_stenosis_l1_l2': 'SCSL1L2',
 'spinal_canal_stenosis_l2_l3': 'SCSL2L3',
 'spinal_canal_stenosis_l3_l4': 'SCSL3L4',
 'spinal_canal_stenosis_l4_l5': 'SCSL4L5',
 'spinal_canal_stenosis_l5_s1': 'SCSL5S1',
 'left_neural_foraminal_narrowing_l1_l2': 'LNFNL1L2',
 'left_neural_foraminal_narrowing_l2_l3': 'LNFNL2L3',
 'left_neural_foraminal_narrowing_l3_l4': 'LNFNL3L4',
 'left_neural_foraminal_narrowing_l4_l5': 'LNFNL4L5',
 'left_neural_foraminal_narrowing_l5_s1': 'LNFNL5S1',
 'right_neural_foraminal_narrowing_l1_l2': 'RNFNL1L2',
 'right_neural_foraminal_narrowing_l2_l3': 'RNFNL2L3',
 'right_neural_foraminal_narrowing_l3_l4': 'RNFNL3L4',
 'right_neural_foraminal_narrowing_l4_l5': 'RNFNL4L5',
 'right_neural_foraminal_narrowing_l5_s1': 'RNFNL5S1',
 'left_subarticular_stenosis_l1_l2': 'LSSL1L2',
 'left_subarticular_stenosis_l2_l3': 'LSSL2L3',
 'left_subarticular_stenosis_l3_l4': 'LSSL3L4',
 'left_subarticular_stenosis_l4_l5': 'LSSL4L5',
 'left_subarticular_ste

In [11]:
train_df.rename(columns=dict(zip(train_df.columns, cols)), inplace=True)
train_df.shape

(1975, 26)

In [12]:
train_df.iloc[0]

study_id        4003253
SCSL1L2     Normal/Mild
SCSL2L3     Normal/Mild
SCSL3L4     Normal/Mild
SCSL4L5     Normal/Mild
SCSL5S1     Normal/Mild
LNFNL1L2    Normal/Mild
LNFNL2L3    Normal/Mild
LNFNL3L4    Normal/Mild
LNFNL4L5       Moderate
LNFNL5S1    Normal/Mild
RNFNL1L2    Normal/Mild
RNFNL2L3    Normal/Mild
RNFNL3L4       Moderate
RNFNL4L5       Moderate
RNFNL5S1    Normal/Mild
LSSL1L2     Normal/Mild
LSSL2L3     Normal/Mild
LSSL3L4     Normal/Mild
LSSL4L5        Moderate
LSSL5S1     Normal/Mild
RSSL1L2     Normal/Mild
RSSL2L3     Normal/Mild
RSSL3L4     Normal/Mild
RSSL4L5     Normal/Mild
RSSL5S1     Normal/Mild
Name: 0, dtype: object

In [13]:
train_df.study_id.nunique()

1975

In [14]:
vals = {'Normal/Mild': 'N', 'Moderate': 'M', 'Severe': 'S'}
vals

{'Normal/Mild': 'N', 'Moderate': 'M', 'Severe': 'S'}

In [15]:
train_df[cols[1:]] = train_df[cols[1:]].replace(vals)

In [16]:
train_df.sample(2)

Unnamed: 0,study_id,SCSL1L2,SCSL2L3,SCSL3L4,SCSL4L5,SCSL5S1,LNFNL1L2,LNFNL2L3,LNFNL3L4,LNFNL4L5,...,LSSL1L2,LSSL2L3,LSSL3L4,LSSL4L5,LSSL5S1,RSSL1L2,RSSL2L3,RSSL3L4,RSSL4L5,RSSL5S1
1247,2734689910,N,N,N,N,N,N,N,N,N,...,N,N,N,N,N,N,N,N,N,N
978,2178420447,N,N,N,N,N,N,N,N,N,...,N,N,N,N,N,N,N,M,M,N


### Coordinates_df

In [17]:
coords_df = pd.read_csv(CFG.COORDS_CSV)
files_df = pd.read_csv(CFG.FILES_CSV)

coords_df.shape, files_df.shape

((48692, 7), (147218, 11))

In [18]:
coords_df.rename(columns={'instance_number': 'instance'}, inplace=True)

In [19]:
coords_df.study_id.nunique(), coords_df.condition.nunique(), coords_df.level.nunique()

(1974, 5, 5)

In [20]:
coords_df.condition.unique(), coords_df.level.unique()

(array(['Spinal Canal Stenosis', 'Right Neural Foraminal Narrowing',
        'Left Neural Foraminal Narrowing', 'Left Subarticular Stenosis',
        'Right Subarticular Stenosis'], dtype=object),
 array(['L1/L2', 'L2/L3', 'L3/L4', 'L4/L5', 'L5/S1'], dtype=object))

In [21]:
coords_df.series_id.nunique()

6291

In [22]:
coords_df['ss_id'] = coords_df.apply(lambda row: f'{str(row["study_id"])}_{str(row["series_id"])}', axis=1)
coords_df['instance_id'] = coords_df.apply(lambda row: f'{str(row["study_id"])}_{str(row["series_id"])}_{str(row["instance"])}', axis=1)

In [23]:
train_desc_df.sample()

Unnamed: 0,study_id,series_id,series_description,ss_id
1856,1278694021,896910489,Sagittal T1,1278694021_896910489


In [24]:
coords_df.sample(2)

Unnamed: 0,study_id,series_id,instance,condition,level,x,y,ss_id,instance_id
11904,1075351916,3008943156,14,Left Neural Foraminal Narrowing,L4/L5,256.366355,265.809346,1075351916_3008943156,1075351916_3008943156_14
13620,1217004843,799439424,6,Right Neural Foraminal Narrowing,L2/L3,155.105882,138.729412,1217004843_799439424,1217004843_799439424_6


In [25]:
# rename condition
coords_df['condition'] = coords_df.apply(lambda row: ''.join([w[0] for w in row['condition'].split(' ')]), axis=1)

In [26]:
# rename level
coords_df['level'] = coords_df.level.apply(lambda l: ''.join(l.split('/')))

In [27]:
coords_df['cl'] = coords_df['condition'] + coords_df['level']

In [28]:
coords_df.condition.nunique()

5

In [29]:
coords_df.head(10)

Unnamed: 0,study_id,series_id,instance,condition,level,x,y,ss_id,instance_id,cl
0,4003253,702807833,8,SCS,L1L2,322.831858,227.964602,4003253_702807833,4003253_702807833_8,SCSL1L2
1,4003253,702807833,8,SCS,L2L3,320.571429,295.714286,4003253_702807833,4003253_702807833_8,SCSL2L3
2,4003253,702807833,8,SCS,L3L4,323.030303,371.818182,4003253_702807833,4003253_702807833_8,SCSL3L4
3,4003253,702807833,8,SCS,L4L5,335.292035,427.327434,4003253_702807833,4003253_702807833_8,SCSL4L5
4,4003253,702807833,8,SCS,L5S1,353.415929,483.964602,4003253_702807833,4003253_702807833_8,SCSL5S1
5,4003253,1054713880,4,RNFN,L4L5,187.961759,251.839388,4003253_1054713880,4003253_1054713880_4,RNFNL4L5
6,4003253,1054713880,4,RNFN,L5S1,198.240918,285.613767,4003253_1054713880,4003253_1054713880_4,RNFNL5S1
7,4003253,1054713880,5,RNFN,L3L4,187.227533,210.722753,4003253_1054713880,4003253_1054713880_5,RNFNL3L4
8,4003253,1054713880,6,RNFN,L1L2,194.56979,127.755258,4003253_1054713880,4003253_1054713880_6,RNFNL1L2
9,4003253,1054713880,6,RNFN,L2L3,191.632887,165.93499,4003253_1054713880,4003253_1054713880_6,RNFNL2L3


In [30]:
train_desc_df.series_description.unique()

array(['Sagittal T2/STIR', 'Sagittal T1', 'Axial T2'], dtype=object)

In [31]:
train_desc_df.series_description.isna().sum()

0

In [32]:
coords_df.shape

(48692, 10)

In [33]:
coords_df = pd.merge(coords_df, train_desc_df.loc[:, ['ss_id', 'series_description']],  how='inner', left_on=['ss_id'], right_on=['ss_id'])

coords_df.sample(2)

Unnamed: 0,study_id,series_id,instance,condition,level,x,y,ss_id,instance_id,cl,series_description
36907,3258126294,918902957,12,LNFN,L2L3,256.193124,186.189146,3258126294_918902957,3258126294_918902957_12,LNFNL2L3,Sagittal T1
42501,3742728457,2064757886,6,LNFN,L1L2,181.705775,99.893909,3742728457_2064757886,3742728457_2064757886_6,LNFNL1L2,Sagittal T1


In [34]:
coords_df.shape

(48692, 11)

In [35]:
files_df.sample()

Unnamed: 0,study_id,series_id,image,proj,instancenumber,rows,columns,slicethickness,spacingbetweenslices,patientposition,seriesdescription
48133,2283923187,2721038351,13,37,13,320,320,5.0,6.0,FFS,T2


In [36]:
# coords_df['plane'] = coords_df.apply(lambda row: train_desc_df[train_desc_df['ss_id'] == row['ss_id']].series_description.values[0], axis=1)

In [37]:
# coords_df.sample(5)

In [38]:
# check canal stenosis is noy only in axial plane
coords_df[(coords_df.condition == 'SCS') & (coords_df.series_description != 'Axial T2')].sample()

Unnamed: 0,study_id,series_id,instance,condition,level,x,y,ss_id,instance_id,cl,series_description
17320,1545079332,2817567169,9,SCS,L3L4,216.217207,274.920454,1545079332_2817567169,1545079332_2817567169_9,SCSL3L4,Sagittal T2/STIR


In [39]:
# get the positive slices
coords_df.groupby(['study_id','series_id']).instance.unique()

study_id    series_id 
4003253     702807833                              [8]
            1054713880               [4, 5, 6, 11, 12]
            2448190387          [3, 4, 11, 19, 28, 35]
4646740     3201256954    [15, 16, 22, 28, 29, 34, 40]
            3486248476           [5, 6, 7, 15, 16, 17]
                                      ...             
4287160193  1507070277                             [8]
            1820446240          [4, 9, 10, 16, 22, 28]
4290709089  3274612423                             [9]
            3390218084    [2, 3, 5, 6, 10, 15, 20, 21]
            4237840455                  [4, 5, 11, 12]
Name: instance, Length: 6291, dtype: object

In [40]:
coords_df.ss_id.nunique(), coords_df.instance_id.nunique()

(6291, 24546)

### Files_df

In [41]:
files_df = pd.read_csv(CFG.FILES_CSV)

In [42]:
files_df.sample(5)

Unnamed: 0,study_id,series_id,image,proj,instancenumber,rows,columns,slicethickness,spacingbetweenslices,patientposition,seriesdescription
112055,3919334786,59089913,41,-39,41,512,512,2.8,1.4,FFS,T2
32001,1850731145,2746683404,3,29,3,448,448,4.0,4.8,FFS,T2
123378,4227587668,3708432591,19,97,19,240,256,4.0,4.4,FFS,T2
91677,3397219212,1284046345,1,-37,1,512,512,4.0,5.0,HFS,T2
143197,904367529,2221011236,12,-355,12,640,640,4.0,4.4,HFS,T2


In [43]:
# files_df.rename(columns={'patient': 'study_id', 'series': 'series_id', 'image': 'instance'}, inplace=True)

In [44]:
files_df.patientposition.value_counts(), files_df.patientposition.isna().sum()

(patientposition
 HFS    118249
 FFS     28969
 Name: count, dtype: int64,
 0)

In [45]:
files_df.groupby(['study_id']).patientposition.unique().value_counts()

patientposition
[HFS]    1585
[FFS]     390
Name: count, dtype: int64

In [46]:
files_df['ss_id'] = files_df.apply(lambda row: f'{str(row["study_id"])}_{str(row["series_id"])}', axis=1)
files_df['instance_id'] = files_df.apply(lambda row: f'{str(row["study_id"])}_{str(row["series_id"])}_{str(row["instancenumber"])}', axis=1)

In [47]:
source_dir = CFG.PNG_DIR
files_df['filename'] = files_df.apply(lambda row: f'{source_dir}\\{row.study_id}_{row.series_id}_{row.image}.png', axis=1)

In [48]:
files_df.sample()

Unnamed: 0,study_id,series_id,image,proj,instancenumber,rows,columns,slicethickness,spacingbetweenslices,patientposition,seriesdescription,ss_id,instance_id,filename
55657,2484590654,3051646442,25,-126,25,640,640,4.0,4.8,HFS,,2484590654_3051646442,2484590654_3051646442_25,E:\data\RSNA2024\pngs_256\2484590654_305164644...


In [49]:
train_desc_df.sample()

Unnamed: 0,study_id,series_id,series_description,ss_id
272,181779472,2269463694,Sagittal T1,181779472_2269463694


In [50]:
coords_df = pd.merge(coords_df, files_df[['instance_id', 'rows', 'columns', 'filename','patientposition']], left_on='instance_id', right_on='instance_id')

In [51]:
coords_df.shape

(48692, 15)

In [52]:
coords_df.sample()

Unnamed: 0,study_id,series_id,instance,condition,level,x,y,ss_id,instance_id,cl,series_description,rows,columns,filename,patientposition
43069,3817394595,4179349626,8,LSS,L1L2,359.079646,345.20354,3817394595_4179349626,3817394595_4179349626_8,LSSL1L2,Axial T2,640,640,E:\data\RSNA2024\pngs_256\3817394595_417934962...,HFS


In [53]:
# TODO: make sure we match coords corectly
coords_df['x_perc'] = coords_df['x'] / coords_df['columns']
coords_df['y_perc'] = coords_df['y'] / coords_df['rows']

In [54]:
ax, non_ax = coords_df[coords_df['series_description'] == 'Axial T2'], coords_df[coords_df['series_description'] != 'Axial T2']
ax.shape, non_ax.shape, coords_df.shape

((19220, 17), (29472, 17), (48692, 17))

In [55]:
coords_df.y.min()

2.063097514340344

In [56]:
for c in [ax, non_ax]:
    print(c['x_perc'].min(), c['y_perc'].min())
    print(c['x_perc'].max(), c['y_perc'].max())
    print('/////////////')
    # print(c['x_perc'].mean(), c['y_perc'].mean())

0.2849557522123894 0.3092621664050236
0.6631964653474212 0.8078096961632665
/////////////
0.0048828125 0.0040294873326959845
0.7684669901065448 0.9117647058823529
/////////////


In [57]:
files_df.shape

(147218, 14)

In [58]:
files_df = pd.merge(files_df, train_desc_df.loc[:, ['ss_id', 'series_description']],  how='inner', left_on=['ss_id'], right_on=['ss_id'])

files_df.sample(2)

Unnamed: 0,study_id,series_id,image,proj,instancenumber,rows,columns,slicethickness,spacingbetweenslices,patientposition,seriesdescription,ss_id,instance_id,filename,series_description
128844,513538670,3328881488,15,-12,15,320,320,4.0,4.8,HFS,,513538670_3328881488,513538670_3328881488_15,E:\data\RSNA2024\pngs_256\513538670_3328881488...,Sagittal T2/STIR
134815,690849662,454779683,42,-625,42,640,640,3.5,3.5,HFS,T2,690849662_454779683,690849662_454779683_42,E:\data\RSNA2024\pngs_256\690849662_454779683_...,Axial T2


In [59]:
files_df.shape

(147218, 15)

In [60]:
train_desc_df.series_description.isna().sum(), files_df.series_description.isna().sum()

(0, 0)

In [61]:
# # TODO: set correct values for condition and cl
# # WTF??? what happened here

# files_df['cl'] = 'H'
# files_df['condition'] = 'H'

In [62]:
coords_df.instance_id.count(), coords_df.drop_duplicates(subset=['instance_id']).instance_id.count()

(48692, 24546)

In [63]:
duplicates = coords_df.instance_id.unique().tolist()

len(duplicates)

24546

In [64]:
files_df['healthy'] = True
files_df.loc[files_df.instance_id.isin(duplicates), 'healthy'] = False

In [65]:
files_df.healthy.value_counts()

healthy
True     122672
False     24546
Name: count, dtype: int64

In [66]:
files_df.shape, files_df.healthy.isna().sum()

((147218, 16), 0)

In [67]:
files_df.sample()

Unnamed: 0,study_id,series_id,image,proj,instancenumber,rows,columns,slicethickness,spacingbetweenslices,patientposition,seriesdescription,ss_id,instance_id,filename,series_description,healthy
14748,1387631768,1355036926,11,3,11,760,640,3.0,3.6,HFS,T1,1387631768_1355036926,1387631768_1355036926_11,E:\data\RSNA2024\pngs_256\1387631768_135503692...,Sagittal T1,True


In [68]:
files_df['inst_min'] = files_df.groupby('ss_id')['instancenumber'].transform('min')
files_df['inst_max'] = files_df.groupby('ss_id')['instancenumber'].transform('max')

files_df['inst'] = files_df['instancenumber'] - files_df['inst_min']
files_df['inst_perc'] = files_df['inst'] / files_df['inst_max']

files_df.head(2)

Unnamed: 0,study_id,series_id,image,proj,instancenumber,rows,columns,slicethickness,spacingbetweenslices,patientposition,seriesdescription,ss_id,instance_id,filename,series_description,healthy,inst_min,inst_max,inst,inst_perc
0,100206310,1012284084,1,-394,1,320,320,3.5,3.5,HFS,T2,100206310_1012284084,100206310_1012284084_1,E:\data\RSNA2024\pngs_256\100206310_1012284084...,Axial T2,True,1,60,0,0.0
1,100206310,1012284084,10,-427,10,320,320,3.5,3.5,HFS,T2,100206310_1012284084,100206310_1012284084_10,E:\data\RSNA2024\pngs_256\100206310_1012284084...,Axial T2,True,1,60,9,0.15


In [69]:
coords_df = pd.merge(coords_df, files_df[['instance_id','inst_perc']], left_on='instance_id', right_on='instance_id')

coords_df.sample(2)

Unnamed: 0,study_id,series_id,instance,condition,level,x,y,ss_id,instance_id,cl,series_description,rows,columns,filename,patientposition,x_perc,y_perc,inst_perc
12819,1140449293,2651120449,37,LSS,L5S1,168.95885,173.317192,1140449293_2651120449,1140449293_2651120449_37,LSSL5S1,Axial T2,320,320,E:\data\RSNA2024\pngs_256\1140449293_265112044...,HFS,0.527996,0.541616,0.818182
6012,525034566,2558329102,4,LNFN,L4L5,126.980213,161.315068,525034566_2558329102,525034566_2558329102_4,LNFNL4L5,Sagittal T1,256,256,E:\data\RSNA2024\pngs_256\525034566_2558329102...,HFS,0.496016,0.630137,0.2


In [70]:
# sort files according to immage
files_df.sort_values(['study_id', 'series_id', 'image'], ascending=[False, False, True], inplace=True)

In [71]:
files_df.head(10)

Unnamed: 0,study_id,series_id,image,proj,instancenumber,rows,columns,slicethickness,spacingbetweenslices,patientposition,seriesdescription,ss_id,instance_id,filename,series_description,healthy,inst_min,inst_max,inst,inst_perc
125671,4290709089,4237840455,1,19,1,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_1,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,0,0.0
125678,4290709089,4237840455,2,14,2,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_2,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,1,0.066667
125679,4290709089,4237840455,3,10,3,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_3,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,2,0.133333
125680,4290709089,4237840455,4,5,4,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_4,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,False,1,15,3,0.2
125681,4290709089,4237840455,5,0,5,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_5,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,False,1,15,4,0.266667
125682,4290709089,4237840455,6,-3,6,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_6,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,5,0.333333
125683,4290709089,4237840455,7,-8,7,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_7,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,6,0.4
125684,4290709089,4237840455,8,-12,8,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_8,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,7,0.466667
125685,4290709089,4237840455,9,-17,9,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_9,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,8,0.533333
125672,4290709089,4237840455,10,-22,10,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_10,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,9,0.6


In [78]:
idx1 = files_df.sort_values(['study_id', 'series_id', 'image'], ascending=[False, False, True]).index
idx2 = files_df.sort_values(['study_id', 'series_id', 'proj'], ascending=[False, False, True]).index

idx1.equals(idx2)

False

### Save results

In [73]:
train_df.to_csv(CFG.DEST_FOLDER / 'train.csv', index=False)

In [74]:
coords_df.to_csv(CFG.DEST_FOLDER / 'train_label_coordinates.csv', index=False)

In [75]:
files_df.to_csv(CFG.DEST_FOLDER / 'train_files.csv', index=False)

In [77]:
files_df.head(10)

Unnamed: 0,study_id,series_id,image,proj,instancenumber,rows,columns,slicethickness,spacingbetweenslices,patientposition,seriesdescription,ss_id,instance_id,filename,series_description,healthy,inst_min,inst_max,inst,inst_perc
125671,4290709089,4237840455,1,19,1,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_1,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,0,0.0
125678,4290709089,4237840455,2,14,2,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_2,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,1,0.066667
125679,4290709089,4237840455,3,10,3,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_3,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,2,0.133333
125680,4290709089,4237840455,4,5,4,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_4,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,False,1,15,3,0.2
125681,4290709089,4237840455,5,0,5,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_5,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,False,1,15,4,0.266667
125682,4290709089,4237840455,6,-3,6,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_6,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,5,0.333333
125683,4290709089,4237840455,7,-8,7,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_7,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,6,0.4
125684,4290709089,4237840455,8,-12,8,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_8,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,7,0.466667
125685,4290709089,4237840455,9,-17,9,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_9,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,8,0.533333
125672,4290709089,4237840455,10,-22,10,384,384,4.0,4.6,HFS,,4290709089_4237840455,4290709089_4237840455_10,E:\data\RSNA2024\pngs_256\4290709089_423784045...,Sagittal T1,True,1,15,9,0.6
