In [29]:
import pandas as pd
import numpy as np
import json 

import os
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from pydicom import dcmread
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.metrics import AUC, Precision, Recall

import matplotlib.pyplot as plt
import seaborn as sns 

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [31]:
# Loading the dfs
train = pd.read_csv('../data/raw/train.csv')
train_label_coordinates = pd.read_csv('../data/raw/train_label_coordinates.csv')
train_series_descriptions = pd.read_csv('../data/raw/train_series_descriptions.csv')

train.head(3)

Unnamed: 0,study_id,spinal_canal_stenosis_l1_l2,spinal_canal_stenosis_l2_l3,spinal_canal_stenosis_l3_l4,spinal_canal_stenosis_l4_l5,spinal_canal_stenosis_l5_s1,left_neural_foraminal_narrowing_l1_l2,left_neural_foraminal_narrowing_l2_l3,left_neural_foraminal_narrowing_l3_l4,left_neural_foraminal_narrowing_l4_l5,left_neural_foraminal_narrowing_l5_s1,right_neural_foraminal_narrowing_l1_l2,right_neural_foraminal_narrowing_l2_l3,right_neural_foraminal_narrowing_l3_l4,right_neural_foraminal_narrowing_l4_l5,right_neural_foraminal_narrowing_l5_s1,left_subarticular_stenosis_l1_l2,left_subarticular_stenosis_l2_l3,left_subarticular_stenosis_l3_l4,left_subarticular_stenosis_l4_l5,left_subarticular_stenosis_l5_s1,right_subarticular_stenosis_l1_l2,right_subarticular_stenosis_l2_l3,right_subarticular_stenosis_l3_l4,right_subarticular_stenosis_l4_l5,right_subarticular_stenosis_l5_s1
0,4003253,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild
1,4646740,Normal/Mild,Normal/Mild,Moderate,Severe,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Moderate,Moderate,Normal/Mild,Normal/Mild,Moderate,Moderate,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Severe,Normal/Mild,Normal/Mild,Moderate,Moderate,Moderate,Normal/Mild
2,7143189,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild,Normal/Mild


In [33]:
train_series_descriptions.head(3)

Unnamed: 0,study_id,series_id,series_description
0,4003253,702807833,Sagittal T2/STIR
1,4003253,1054713880,Sagittal T1
2,4003253,2448190387,Axial T2


In [35]:
spinal = train[['study_id', 'spinal_canal_stenosis_l1_l2', 'spinal_canal_stenosis_l2_l3', 
                'spinal_canal_stenosis_l3_l4', 'spinal_canal_stenosis_l4_l5', 'spinal_canal_stenosis_l5_s1']]

# Changing the format to long 
spinal = spinal.melt(id_vars=['study_id'], var_name='level', value_name='severity')
spinal = spinal.sort_values(['study_id', 'level']).reset_index(drop=True)
spinal = spinal.dropna()
spinal.head()

Unnamed: 0,study_id,level,severity
0,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild
1,4003253,spinal_canal_stenosis_l2_l3,Normal/Mild
2,4003253,spinal_canal_stenosis_l3_l4,Normal/Mild
3,4003253,spinal_canal_stenosis_l4_l5,Normal/Mild
4,4003253,spinal_canal_stenosis_l5_s1,Normal/Mild


In [37]:
train_label_coordinates.head(3)

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y
0,4003253,702807833,8,Spinal Canal Stenosis,L1/L2,322.831858,227.964602
1,4003253,702807833,8,Spinal Canal Stenosis,L2/L3,320.571429,295.714286
2,4003253,702807833,8,Spinal Canal Stenosis,L3/L4,323.030303,371.818182


In [49]:
train_label = train_label_coordinates[train_label_coordinates['condition']=='Spinal Canal Stenosis']
spinal_merged = pd.merge(spinal, train_label, on='study_id', how='inner')
spinal_merged = pd.merge(spinal_merged, train_series_descriptions, on=['study_id', 'series_id'], how='inner')
spinal_merged.head()

Unnamed: 0,study_id,level_x,severity,series_id,instance_number,condition,level_y,x,y,series_description
0,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L1/L2,322.831858,227.964602,Sagittal T2/STIR
1,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L2/L3,320.571429,295.714286,Sagittal T2/STIR
2,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L3/L4,323.030303,371.818182,Sagittal T2/STIR
3,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L4/L5,335.292035,427.327434,Sagittal T2/STIR
4,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L5/S1,353.415929,483.964602,Sagittal T2/STIR


In [51]:
severity_unique = spinal_merged['severity'].unique()
severity_enc = dict(zip(severity_unique, range(len(severity_unique))))

# Saving the dictionary as a json file 
json.dump(severity_enc, open(f'../data/interim/enc_severity.json', 'w'))

spinal_merged['severity_enc'] = spinal_merged['severity'].map(severity_enc)
spinal_merged.head()
len(spinal_merged)

48765

In [53]:
# Getting a list of the studies needed for our model
studies = spinal_merged['study_id'].unique()
studies = [str(study) for study in studies]

main_path = "F:/RSNA/rsna-2024-lumbar-spine-degenerative-classification/train_images"

dcm_paths = []

for study_folder in os.listdir(main_path):
    if study_folder in studies:
        study_path = os.altsep.join([main_path, study_folder])
        if os.path.isdir(study_path):
            for series_folder in os.listdir(study_path):
                series_path = os.altsep.join([study_path, series_folder])
                if os.path.isdir(series_path):
                    for dcm_file in os.listdir(series_path):
                        if dcm_file.endswith('.dcm'):      
                            dcm_paths.append({
                                'study_id': study_folder,
                                'series_id': series_folder,
                                'dcm_path': os.altsep.join([series_path, dcm_file])
                            })

dicom_df = pd.DataFrame(dcm_paths)
dicom_df = dicom_df.sort_values('study_id').reset_index(drop=True)
dicom_df['study_id'] = dicom_df['study_id'].astype('int64')
dicom_df['series_id'] = dicom_df['series_id'].astype('int64')
dicom_df.head(10)
len(dicom_df)

147131

In [44]:
spinal_merged = pd.merge(spinal_merged, dicom_df, on=['study_id', 'series_id'], how='inner')
spinal_merged.head()

Unnamed: 0,study_id,level_x,severity,series_id,instance_number,condition,level_y,x,y,series_description,severity_enc,dcm_path
0,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L1/L2,322.831858,227.964602,Sagittal T2/STIR,0,F:/RSNA/rsna-2024-lumbar-spine-degenerative-classification/train_images/4003253/702807833/12.dcm
1,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L1/L2,322.831858,227.964602,Sagittal T2/STIR,0,F:/RSNA/rsna-2024-lumbar-spine-degenerative-classification/train_images/4003253/702807833/9.dcm
2,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L1/L2,322.831858,227.964602,Sagittal T2/STIR,0,F:/RSNA/rsna-2024-lumbar-spine-degenerative-classification/train_images/4003253/702807833/8.dcm
3,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L1/L2,322.831858,227.964602,Sagittal T2/STIR,0,F:/RSNA/rsna-2024-lumbar-spine-degenerative-classification/train_images/4003253/702807833/7.dcm
4,4003253,spinal_canal_stenosis_l1_l2,Normal/Mild,702807833,8,Spinal Canal Stenosis,L1/L2,322.831858,227.964602,Sagittal T2/STIR,0,F:/RSNA/rsna-2024-lumbar-spine-degenerative-classification/train_images/4003253/702807833/6.dcm


In [21]:
classes = np.unique(spinal_merged['severity_enc'])
class_wgt = compute_class_weight(class_weight='balanced', classes=classes, y=spinal_merged['severity_enc'])
class_wgt_dict = {c:w for c, w in zip(classes, class_wgt)}

class_wgt_dict

{0: 0.3817135743996114, 1: 4.330864622803996, 2: 6.696404793608522}

In [23]:
# 1. Función para procesar archivos DICOM
def process_dicom(dcm_path, image_size=(224, 224)):
    dcm_path_str = dcm_path.numpy().decode("utf-8")
    dicom = pydicom.dcmread(dcm_path_str)
    pixel_array = dicom.pixel_array
    # Normalizar y redimensionar la imagen
    pixel_array = pixel_array / np.max(pixel_array)
    resized_image = tf.image.resize(pixel_array[..., np.newaxis], image_size)
    return tf.cast(resized_image, tf.float32)

# 2. Función para cargar y procesar imágenes
def load_and_preprocess(path, label):
    image = tf.py_function(func=process_dicom, inp=[path], Tout=tf.float32)
    image.set_shape((224, 224, 1))  # Fijo el shape pq me da error en el entrenamiento (wrong shape (224,224))
    return image, label

In [25]:
# 3. Crear tensor dataset
dataset = tf.data.Dataset.from_tensor_slices((spinal_merged["dcm_path"].values, spinal_merged["severity_enc"].values))
dataset = dataset.map(load_and_preprocess)
dataset_size = len(dataset)

In [13]:
# 5. Dividir dataset en entrenamiento y validación
train_size = int(0.8 * dataset_size)  # 80% para entrenamiento
val_size = dataset_size - train_size  # 20% para validación


train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

# Aplicar batching y prefetch después de dividir
train_dataset = train_dataset.batch(64).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(64).prefetch(tf.data.AUTOTUNE)

In [14]:
# 6. Verificar datasets
print(f"Tamaño del dataset de entrenamiento: {len(train_dataset)}")
print(f"Tamaño del dataset de validación: {len(val_dataset)}")

Tamaño del dataset de entrenamiento: 10373
Tamaño del dataset de validación: 2594


In [15]:
n_classes = 3
train_dataset = train_dataset.map(lambda x, y: (x, to_categorical(y, num_classes=n_classes)))
val_dataset = val_dataset.map(lambda x, y: (x, to_categorical(y, num_classes=n_classes)))

In [16]:
# # 7. Crear modelo CNN
model = models.Sequential([
    layers.Input(shape=(224, 224, 1)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(3, activation='softmax')  # Tres clases
])

# # 8. Compilar el modelo
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), 
                                                                          tf.keras.metrics.AUC()])

In [17]:
# 9. Entrenar el modelo
history = model.fit(train_dataset, validation_data=val_dataset, epochs=2, class_weight=class_wgt_dict)

Epoch 1/2
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27409s[0m 3s/step - accuracy: 0.1549 - auc: 0.3653 - loss: 1.7706 - precision: 0.7444 - recall: 0.0432 - val_accuracy: 0.8742 - val_auc: 0.8985 - val_loss: 1.0661 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/2
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27057s[0m 3s/step - accuracy: 0.2125 - auc: 0.5061 - loss: 1.1361 - precision: 0.1617 - recall: 6.6458e-04 - val_accuracy: 0.8742 - val_auc: 0.8978 - val_loss: 1.0648 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00


In [18]:
model.save('../models/my_model.keras')

In [42]:
from tensorflow.keras.models import load_model

model = load_model('../models/my_model.keras')

history2 = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=5,                     # Train up to epoch 5
    initial_epoch=2,              # Start from epoch 2
    class_weight=class_wgt_dict   # Class weights
)

model.save('../models/my_model_epocs5.keras')

Epoch 3/5
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27313s[0m 3s/step - accuracy: 0.2292 - auc: 0.5135 - loss: 1.1300 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_accuracy: 0.8742 - val_auc: 0.8978 - val_loss: 1.0648 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 4/5
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28333s[0m 3s/step - accuracy: 0.2295 - auc: 0.5139 - loss: 1.1300 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_accuracy: 0.8742 - val_auc: 0.8978 - val_loss: 1.0648 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 5/5
[1m10373/10373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28121s[0m 3s/step - accuracy: 0.2295 - auc: 0.5139 - loss: 1.1300 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_accuracy: 0.8742 - val_auc: 0.8978 - val_loss: 1.0648 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
