In [1]:
!pip install duckdb --no-index --find-links=file:///kaggle/input/polars-and-duckdb/kaggle/working/mysitepackages/duck_pkg

Looking in links: file:///kaggle/input/polars-and-duckdb/kaggle/working/mysitepackages/duck_pkg
Processing /kaggle/input/polars-and-duckdb/kaggle/working/mysitepackages/duck_pkg/duckdb-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Installing collected packages: duckdb
Successfully installed duckdb-0.8.1


In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import os
import polars as pl
import duckdb as dd
from tqdm import tqdm
import matplotlib.pyplot as plt
import cv2
from pydicom import dcmread
import warnings

## Conditions
### spinal_canal_stenosis
### neural_foraminal_narrowing
#### left/right
### subarticular_stenosis
#### left/right

## Levels of vertebrae
#### l1_l2
#### l2_l3
#### l3_l4
#### l4_l5
#### l5_s1

> Each Study has series of images for these three conditions. The series description tells us what type of image is in the dcm file

#### Reading the available training metadata files using polars

In [31]:
train_meta_data = pl.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train.csv'\
                              , low_memory=True)
train_series_desc = pl.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv'\
                                , low_memory=True)
train_label_coordinates = pl.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_label_coordinates.csv'\
                                      , low_memory=True)

melt_cols = train_meta_data.columns[1:]

train_meta_data_melted = train_meta_data.melt(id_vars="study_id", value_vars=melt_cols)

In [32]:
train_meta_data_melted.columns = ['study_id', 'condition_vert_level', 'severity']
train_meta_data_melted.head()

study_id,condition_vert_level,severity
i64,str,str
4003253,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild"""
4646740,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild"""
7143189,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild"""
8785691,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild"""
10728036,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild"""


In [37]:
train_label_coordinates_formatted = dd.sql("select study_id, series_id, instance_number \
,lower(replace(condition,' ','_')||'_'||replace(level,'/','_')) as condition, x, y \
from train_label_coordinates").pl()

train_label_coordinates_formatted.head()

study_id,series_id,instance_number,condition,x,y
i64,i64,i64,str,f64,f64
4003253,702807833,8,"""spinal_canal_stenosis_l1_l2""",322.831858,227.964602
4003253,702807833,8,"""spinal_canal_stenosis_l2_l3""",320.571429,295.714286
4003253,702807833,8,"""spinal_canal_stenosis_l3_l4""",323.030303,371.818182
4003253,702807833,8,"""spinal_canal_stenosis_l4_l5""",335.292035,427.327434
4003253,702807833,8,"""spinal_canal_stenosis_l5_s1""",353.415929,483.964602


#### Checking a specific study/patient's scans and the corresponding details, only to get a better sense of the data

In [38]:
pl.Config(fmt_str_lengths=100)
pl.Config.set_tbl_rows(1000)
print(train_meta_data_melted.shape)
# print(train_meta_data.filter(pl.col('study_id')==100206310).transpose(include_header=True))
print(train_meta_data_melted.filter(pl.col('study_id')==100206310))
print(train_series_desc.filter(pl.col('study_id')==100206310))
print(train_label_coordinates_formatted.shape)
print(train_label_coordinates_formatted.filter(pl.col('study_id')==100206310))

(49375, 3)
shape: (25, 3)
┌───────────┬────────────────────────────────────────┬─────────────┐
│ study_id  ┆ condition_vert_level                   ┆ severity    │
│ ---       ┆ ---                                    ┆ ---         │
│ i64       ┆ str                                    ┆ str         │
╞═══════════╪════════════════════════════════════════╪═════════════╡
│ 100206310 ┆ spinal_canal_stenosis_l1_l2            ┆ Normal/Mild │
│ 100206310 ┆ spinal_canal_stenosis_l2_l3            ┆ Normal/Mild │
│ 100206310 ┆ spinal_canal_stenosis_l3_l4            ┆ Moderate    │
│ 100206310 ┆ spinal_canal_stenosis_l4_l5            ┆ Severe      │
│ 100206310 ┆ spinal_canal_stenosis_l5_s1            ┆ Normal/Mild │
│ 100206310 ┆ left_neural_foraminal_narrowing_l1_l2  ┆ Normal/Mild │
│ 100206310 ┆ left_neural_foraminal_narrowing_l2_l3  ┆ Moderate    │
│ 100206310 ┆ left_neural_foraminal_narrowing_l3_l4  ┆ Moderate    │
│ 100206310 ┆ left_neural_foraminal_narrowing_l4_l5  ┆ Severe      │
│ 100206

In [50]:
pl.Config(fmt_str_lengths=100)
pl.Config.set_tbl_rows(1000)

train_series_coordinates = dd.sql("select t1.study_id, t1.series_id, t1.series_description, t2.instance_number \
,t2.condition, t2.x, t2.y \
from train_series_desc t1 left join train_label_coordinates_formatted t2 \
on t1.study_id = t2.study_id and t1.series_id = t2.series_id \
").pl()

train_series_coordinates.filter(pl.col('study_id')==100206310).sort(['study_id','series_id'])

study_id,series_id,series_description,instance_number,condition,x,y
i64,i64,str,i64,str,f64,f64
100206310,1012284084,"""Axial T2""",37,"""right_subarticular_stenosis_l3_l4""",147.40625,161.142857
100206310,1012284084,"""Axial T2""",46,"""left_subarticular_stenosis_l4_l5""",168.536252,156.27907
100206310,1012284084,"""Axial T2""",20,"""left_subarticular_stenosis_l1_l2""",180.355677,165.0342
100206310,1012284084,"""Axial T2""",55,"""left_subarticular_stenosis_l5_s1""",167.660739,157.154583
100206310,1012284084,"""Axial T2""",55,"""right_subarticular_stenosis_l5_s1""",145.120536,159.428571
100206310,1012284084,"""Axial T2""",20,"""right_subarticular_stenosis_l1_l2""",145.120536,162.285714
100206310,1012284084,"""Axial T2""",46,"""right_subarticular_stenosis_l4_l5""",146.263393,160.0
100206310,1012284084,"""Axial T2""",28,"""left_subarticular_stenosis_l2_l3""",177.729138,160.218878
100206310,1012284084,"""Axial T2""",28,"""right_subarticular_stenosis_l2_l3""",145.691964,160.0
100206310,1012284084,"""Axial T2""",37,"""left_subarticular_stenosis_l3_l4""",173.351573,158.467852


In [53]:
full_training_data = dd.sql("select t1.study_id, t2.series_id, t2.series_description, t2.instance_number \
, t1.condition_vert_level, t1.severity, t2.x, t2.y \
from train_meta_data_melted t1 \
left join train_series_coordinates t2 on t1.study_id = t2.study_id \
and t1.condition_vert_level = t2.condition \
").pl()

full_training_data.shape

(49375, 8)

In [58]:
dd.sql("select study_id, count(distinct(condition_vert_level)) as cond_count, count(distinct(severity)) as sev_count \
from full_training_data group by study_id having sev_count < 3 order by 3").pl()

study_id,cond_count,sev_count
i64,i64,i64
7143189,25,1
26342422,25,1
33736057,25,1
60612428,25,1
87937369,25,1
92407737,25,1
92565488,25,1
109454808,25,1
165018059,25,1
189207389,25,1


#### Looking at the number of studies and series of images available for various scan orientations, it appears that the Saggital orientations are missing for a couple of studies

In [6]:
dd.sql("select series_description, count(distinct(study_id)) as studies, count(distinct(series_id)) as series \
from train_series_desc group by series_description").pl()

series_description,studies,series
str,i64,i64
"""Sagittal T1""",1973,1980
"""Axial T2""",1975,2340
"""Sagittal T2/STIR""",1974,1974


#### Checking whether all the different conditions for all the 5 vertebrae are available in all the studies.
#### It appears that some of the condition/vertebrae combinations are not available in some of the studies
##### e.g. The images with orientation "Axial T2" for the condition "Right Subarticular Stenosis" at the vertebra "L1/L2" is available in 1812 studies out of a total of 1975 studies

In [7]:
part_1 = list(os.listdir('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images'))
#part_1 = list(filter(lambda x: x.find('.DS') == -1, part_1))
len(part_1)

1975

In [8]:
#train_series_desc

p1 = [(x, f"/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/{x}") for x in part_1]
meta_obj = { p[0]: { 'folder_path': p[1], 
                    'SeriesInstanceUIDs': [] 
                   } 
            for p in p1 }

In [9]:
for m in meta_obj:
    meta_obj[m]['SeriesInstanceUIDs'] = list(os.listdir(meta_obj[m]['folder_path']))

In [10]:
train_series_desc.filter((pl.col('study_id') == int('100206310')) & (pl.col('series_id') == int('1792451510')))\
.select(pl.col('series_description')).item(0,0)

'Sagittal T2/STIR'

In [10]:
# grabs the correspoding series descriptions
for k in tqdm(meta_obj):
    for s in meta_obj[k]['SeriesInstanceUIDs']:
        if 'SeriesDescriptions' not in meta_obj[k]:
            meta_obj[k]['SeriesDescriptions'] = []
        try:
            meta_obj[k]['SeriesDescriptions'].append(train_series_desc.filter((pl.col('study_id') == int(k)) \
                                                                              & (pl.col('series_id') == int(s)))\
                                                     .select(pl.col('series_description')).item(0,0))
        except:
            print("Failed on", s, k)

100%|██████████| 1975/1975 [00:02<00:00, 934.42it/s]


In [14]:
# ptobj = meta_obj[list(meta_obj.keys())[10]]
ptobj = meta_obj['100206310']
ptobj

{'folder_path': '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310',
 'SeriesInstanceUIDs': ['1792451510', '1012284084', '2092806862'],
 'SeriesDescriptions': ['Sagittal T2/STIR', 'Axial T2', 'Sagittal T1'],
 'image_files': [18, 60, 18]}

In [15]:
ptobj['image_files'] = []

for idx, i in enumerate(ptobj['SeriesInstanceUIDs']):
    print(idx)
    print(i)
    print("**********")
    ptobj['image_files'].append(glob.glob(f"{ptobj['folder_path']}/{ptobj['SeriesInstanceUIDs'][idx]}/*.dcm"))
    
#len(images)

0
1792451510
**********
1
1012284084
**********
2
2092806862
**********


In [17]:
ptobj['SeriesDescriptions'][0]

'Sagittal T2/STIR'

In [18]:
ptobj['image_files'][0]

['/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/12.dcm',
 '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/18.dcm',
 '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/9.dcm',
 '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/14.dcm',
 '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/11.dcm',
 '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/10.dcm',
 '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/17.dcm',
 '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/1.dcm',
 '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/15.dcm',
 '/kaggle/in

In [16]:
ptobj

{'folder_path': '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310',
 'SeriesInstanceUIDs': ['1792451510', '1012284084', '2092806862'],
 'SeriesDescriptions': ['Sagittal T2/STIR', 'Axial T2', 'Sagittal T1'],
 'image_files': [['/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/12.dcm',
   '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/18.dcm',
   '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/9.dcm',
   '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/14.dcm',
   '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/11.dcm',
   '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1792451510/10.dcm',
   '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification

In [14]:
for k in tqdm(meta_obj):
    meta_obj[k]['image_files'] = []
    for idx, i in enumerate(meta_obj[k]['SeriesInstanceUIDs']):
        meta_obj[k]['image_files']\
        .append(len(glob.glob(f"{meta_obj[k]['folder_path']}/{meta_obj[k]['SeriesInstanceUIDs'][idx]}/*.dcm")))

100%|██████████| 1975/1975 [00:31<00:00, 62.61it/s]


In [15]:
meta_obj['100206310']

{'folder_path': '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310',
 'SeriesInstanceUIDs': ['1792451510', '1012284084', '2092806862'],
 'SeriesDescriptions': ['Sagittal T2/STIR', 'Axial T2', 'Sagittal T1'],
 'image_files': [18, 60, 18]}

In [17]:
meta_obj[list(meta_obj.keys())[1]]

{'folder_path': '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/1972129014',
 'SeriesInstanceUIDs': ['2898623075', '3324327485', '3203550406'],
 'SeriesDescriptions': ['Sagittal T2/STIR', 'Axial T2', 'Sagittal T1'],
 'image_files': [18, 25, 18]}

In [19]:
pl.Config(fmt_str_lengths=100)
pl.Config.set_tbl_rows(1000)
dd.sql(" \
select tsd.series_description, tlc.condition, tlc.level, \
count(distinct(tlc.study_id)) as studies, count(distinct(tlc.instance_number)) as label_defining_images, \
round(count(distinct(tlc.study_id))/1975, 5) as pct_of_total_studies, \
from train_series_desc tsd \
join train_label_coordinates tlc \
on tsd.study_id = tlc.study_id \
and tsd.series_id = tlc.series_id \
group by tsd.series_description, tlc.condition, tlc.level \
order by 1,2, 4 desc \
").pl()

series_description,condition,level,studies,label_defining_images,pct_of_total_studies
str,str,str,i64,i64,f64
"""Axial T2""","""Left Subarticular Stenosis""","""L4/L5""",1971,55,0.99797
"""Axial T2""","""Left Subarticular Stenosis""","""L3/L4""",1971,53,0.99797
"""Axial T2""","""Left Subarticular Stenosis""","""L5/S1""",1964,63,0.99443
"""Axial T2""","""Left Subarticular Stenosis""","""L2/L3""",1892,43,0.95797
"""Axial T2""","""Left Subarticular Stenosis""","""L1/L2""",1810,52,0.91646
"""Axial T2""","""Right Subarticular Stenosis""","""L3/L4""",1971,50,0.99797
"""Axial T2""","""Right Subarticular Stenosis""","""L4/L5""",1971,57,0.99797
"""Axial T2""","""Right Subarticular Stenosis""","""L5/S1""",1967,61,0.99595
"""Axial T2""","""Right Subarticular Stenosis""","""L2/L3""",1891,43,0.95747
"""Axial T2""","""Right Subarticular Stenosis""","""L1/L2""",1812,49,0.91747


#### Now let's check how many studies have moderate or severe levels of the condition "Right Subarticular Stenosis" at the vertebra "L1/L2"

In [20]:
dd.sql("select value, count(distinct(study_id)) from train_meta_data_melted where variable = 'right_subarticular_stenosis_l1_l2' \
group by value").pl()

value,count(DISTINCT study_id)
str,i64
"""Normal/Mild""",1680
,161
"""Moderate""",110
"""Severe""",24


In [21]:
dd.sql("select value, count(distinct(study_id)) from train_meta_data_melted where variable = 'right_subarticular_stenosis_l1_l2' \
and study_id in (select distinct(study_id) from train_label_coordinates where condition = 'Right Subarticular Stenosis'\
and level = 'L1/L2') group by value").pl()

value,count(DISTINCT study_id)
str,i64
"""Normal/Mild""",1678
"""Moderate""",110
"""Severe""",24


In [22]:
train_label_coordinates_list = dd.sql("select distinct study_id from train_meta_data_melted where variable = 'right_subarticular_stenosis_l1_l2' \
and value = 'Normal/Mild' \
and study_id in (select distinct(study_id) from train_label_coordinates where condition = 'Right Subarticular Stenosis'\
and level = 'L1/L2')").pl().to_series().unique().to_list()

train_meta_data_list = dd.sql("select distinct study_id from train_meta_data_melted where variable = 'right_subarticular_stenosis_l1_l2' and value = 'Normal/Mild' ")\
.pl().to_series().unique().to_list()

set(train_label_coordinates_list) ^ set(train_meta_data_list)

{3008676218, 3303545110}

#### The above analysis shows that there are two studies related to Right Subarticular Stenosis of L1/L2 which are not present in the train_label_coordinates file

In [23]:
dd.sql("select * from train_label_coordinates where study_id in ('3008676218','3303545110')").pl()

study_id,series_id,instance_number,condition,level,x,y
i64,i64,i64,str,str,f64,f64
3303545110,304087230,32,"""Left Subarticular Stenosis""","""L5/S1""",358.840036,288.574
3303545110,304087230,36,"""Right Subarticular Stenosis""","""L5/S1""",273.098214,299.428571
3303545110,2789152717,5,"""Right Neural Foraminal Narrowing""","""L4/L5""",252.038567,253.884298
3303545110,2789152717,5,"""Right Neural Foraminal Narrowing""","""L5/S1""",263.322314,288.440771
3303545110,2789152717,6,"""Right Neural Foraminal Narrowing""","""L1/L2""",272.490358,126.236915
3303545110,2789152717,6,"""Right Neural Foraminal Narrowing""","""L2/L3""",262.61708,164.319559
3303545110,2789152717,6,"""Right Neural Foraminal Narrowing""","""L3/L4""",256.975207,211.570248
3303545110,2789152717,13,"""Left Neural Foraminal Narrowing""","""L1/L2""",269.669421,128.352617
3303545110,2789152717,13,"""Left Neural Foraminal Narrowing""","""L2/L3""",261.911846,168.550964
3303545110,2789152717,13,"""Left Neural Foraminal Narrowing""","""L3/L4""",254.859504,211.570248


In [24]:
dd.sql("select * from train_meta_data where study_id in ('3008676218','3303545110')").pl().transpose(include_header=True)

column,column_0,column_1
str,str,str
"""study_id""","""3008676218""","""3303545110"""
"""spinal_canal_stenosis_l1_l2""",,"""Normal/Mild"""
"""spinal_canal_stenosis_l2_l3""",,"""Normal/Mild"""
"""spinal_canal_stenosis_l3_l4""",,"""Normal/Mild"""
"""spinal_canal_stenosis_l4_l5""",,"""Normal/Mild"""
"""spinal_canal_stenosis_l5_s1""",,"""Normal/Mild"""
"""left_neural_foraminal_narrowing_l1_l2""","""Normal/Mild""","""Normal/Mild"""
"""left_neural_foraminal_narrowing_l2_l3""","""Severe""","""Normal/Mild"""
"""left_neural_foraminal_narrowing_l3_l4""","""Moderate""","""Normal/Mild"""
"""left_neural_foraminal_narrowing_l4_l5""","""Moderate""","""Normal/Mild"""


#### It seems from above, that the study 3303545110 is only present for Right Subarticular Stenosis of L5/S1 and not of L1/L2 and the study 3008676218 is not present at all for any conditions/vertebrae combination
#### The questions is then how is 3303545110 labelled as Normal/Mild for Right Subarticular Stenosis of L1/L2 and 3008676218 labelled for all the conditions/vertebrae combination

#### Below are some of the images from the study_id/patient 100206310 

In [24]:
dcm_image_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1012284084/20.dcm'
left_centre_coords = (int(180.355677), int(165.0342))
right_centre_coords = (int(145.120536), int(162.285714))
radius = 10
color = (255, 0, 0)  # Red color in BGR
thickness = 2

dicom_ds = dcmread(dcm_image_path)
IMG = dicom_ds.pixel_array

"""IMG_normalized = cv2.normalize(IMG, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

left_IMG_with_circle = cv2.circle(IMG_normalized.copy(), left_centre_coords, radius, color, thickness)
left_IMG_with_circle = cv2.cvtColor(left_IMG_with_circle, cv2.COLOR_BGR2RGB)

right_IMG_with_circle = cv2.circle(IMG_normalized.copy(), right_centre_coords, radius, color, thickness)
right_IMG_with_circle = cv2.cvtColor(right_IMG_with_circle, cv2.COLOR_BGR2RGB)"""

'IMG_normalized = cv2.normalize(IMG, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)\n\nleft_IMG_with_circle = cv2.circle(IMG_normalized.copy(), left_centre_coords, radius, color, thickness)\nleft_IMG_with_circle = cv2.cvtColor(left_IMG_with_circle, cv2.COLOR_BGR2RGB)\n\nright_IMG_with_circle = cv2.circle(IMG_normalized.copy(), right_centre_coords, radius, color, thickness)\nright_IMG_with_circle = cv2.cvtColor(right_IMG_with_circle, cv2.COLOR_BGR2RGB)'

In [23]:
IMG.shape

(320, 320)

In [None]:
plt.imshow(left_IMG_with_circle)
plt.axis('off')  # Turn off axis numbers and ticks
plt.title("Left subarticular stenosis L1/L2 - Normal/Mild")
plt.show()

In [None]:
plt.imshow(right_IMG_with_circle)
plt.axis('off')  # Turn off axis numbers and ticks
plt.title("Right subarticular stenosis L1/L2 - Normal/Mild")
plt.show()

In [None]:
dcm_image_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1012284084/46.dcm'
left_centre_coords = (int(168.536252), int(156.27907))
right_centre_coords = (int(146.263393), int(160.0))
radius = 10
color = (255, 0, 0)  # Red color in BGR
thickness = 2

dicom_ds = dcmread(dcm_image_path)
IMG = dicom_ds.pixel_array

IMG_normalized = cv2.normalize(IMG, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

left_IMG_with_circle = cv2.circle(IMG_normalized.copy(), left_centre_coords, radius, color, thickness)
left_IMG_with_circle = cv2.cvtColor(left_IMG_with_circle, cv2.COLOR_BGR2RGB)

right_IMG_with_circle = cv2.circle(IMG_normalized.copy(), right_centre_coords, radius, color, thickness)
right_IMG_with_circle = cv2.cvtColor(right_IMG_with_circle, cv2.COLOR_BGR2RGB)

In [None]:
plt.imshow(left_IMG_with_circle)
plt.axis('off')  # Turn off axis numbers and ticks
plt.title("Left subarticular stenosis L4/L5 - Severe")
plt.show()

In [None]:
plt.imshow(right_IMG_with_circle)
plt.axis('off')  # Turn off axis numbers and ticks
plt.title("Right subarticular stenosis L4/L5 - Moderate")
plt.show()

In [None]:
dcm_image_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/2092806862/4.dcm'
right_centre_coords = (int(253.393075), int(278.680244))
radius = 10
color = (255, 0, 0)  # Red color in BGR
thickness = 2

dicom_ds = dcmread(dcm_image_path)
IMG = dicom_ds.pixel_array

IMG_normalized = cv2.normalize(IMG, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

right_IMG_with_circle = cv2.circle(IMG_normalized.copy(), right_centre_coords, radius, color, thickness)
right_IMG_with_circle = cv2.cvtColor(right_IMG_with_circle, cv2.COLOR_BGR2RGB)

In [None]:
plt.imshow(right_IMG_with_circle)
plt.axis('off')  # Turn off axis numbers and ticks
plt.title("Right neural foraminal narrowing L4/L5 - Moderate")
plt.show()

## Aim : To select image files to create Train and Validation set
1. 25% of data will be set aside permanently as a validation set for all experiments. This set won't be used for training/testing/cross-validation
2. 75% of data will be used for all experiments
3. Images are 320 * 320 arrays
4. This seems to be a multi-class, multi-label problem

#### Pipeline to make a sample submission to test the submission data creation logic

In [None]:
submission = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')
submission['row_id'] = 'samples'

In [25]:
import glob
import os

config = {}
config['root_file_path'] = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/'

studies = os.listdir(config['root_file_path'])

In [26]:
studies

['44036939']

In [None]:
rows = {}
weight_dict = {'normal_mild':1, 'moderate':2, 'severe':4}
conditions = ['spinal_canal_stenosis', 'neural_foraminal_narrowing', 'subarticular_stenosis']
sides = ['left', 'right']
vertebrae_levels = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']
severity_levels = ['normal_mild', 'moderate', 'severe']

for c in conditions:
    for v in vertebrae_levels:
        if c != 'spinal_canal_stenosis':
            for s in sides:
                for st in studies:
                    #print(st+'_'+s+'_'+c+'_'+v)
                    rows[st+'_'+s+'_'+c+'_'+v] = np.array([0.333333 * 2, 0.333333 * 2, 0.333333 * 2])
        else:
            for st in studies:
                #print(st+'_'+c+'_'+v)
                rows[st+'_'+c+'_'+v] = np.array([0.333333 * 2, 0.333333 * 2, 0.333333 * 2])

In [None]:
for row_id, feature in tqdm(rows.items()):
    feature_set_reshaped = feature.reshape(1, -1)
    predictions = np.ascontiguousarray(feature_set_reshaped/2)
    df = pd.DataFrame(predictions, columns=severity_levels)
    df.insert(loc=0, column='row_id', value=row_id)
    submission = pd.concat([submission,df]).reset_index(drop=True)
    
i = submission[(submission.row_id == 'samples')].index
submission = submission.drop(i).reset_index(drop=True)

In [None]:
submission.to_csv('submission.csv', index=False)