In [6]:
!pip install pydicom

Collecting pydicom
  Downloading pydicom-2.4.4-py3-none-any.whl.metadata (7.8 kB)
Downloading pydicom-2.4.4-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.4.4


In [14]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import albumentations as A
import glob
import re
import pydicom
import cv2


In [15]:
IMG_SIZE = [512, 512]
IN_CHANS = 10
N_LABELS = 25
N_CLASSES = 3 * N_LABELS
rd = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/"

In [16]:
class RSNA24TestDataset(Sequence):
    def __init__(self, df, study_ids, batch_size=1, phase='test', transform=None):
        self.df = df
        self.study_ids = study_ids
        self.batch_size = batch_size
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.study_ids) // self.batch_size

    def get_img_paths(self, study_id, series_desc):
        pdf = self.df[self.df['study_id'] == study_id]
        pdf_ = pdf[pdf['series_description'] == series_desc]
        allimgs = []
        for _, row in pdf_.iterrows():
            pimgs = glob.glob(f'{rd}/test_images/{study_id}/{row["series_id"]}/*.dcm')
            pimgs = sorted(pimgs, key=natural_keys)
            allimgs.extend(pimgs)
        return allimgs

    def read_dcm_ret_arr(self, src_path):
        dicom_data = pydicom.dcmread(src_path)
        image = dicom_data.pixel_array
        image = cv2.resize(image, (IMG_SIZE[1], IMG_SIZE[0]))
        image = image / 255.0
        return image

    def __getitem__(self, idx):
        study_id = self.study_ids[idx * self.batch_size: (idx + 1) * self.batch_size]
        series_desc = list(self.df[self.df['study_id'] == study_id[0]]["series_description"].unique())
        imgs = []
        for desc in series_desc:
            img_paths = self.get_img_paths(study_id[0], desc)
            for img_path in img_paths:
                img = self.read_dcm_ret_arr(img_path)
                imgs.append(img)
        imgs = np.array(imgs)
        if self.transform:
            imgs = self.transform(image=imgs)["image"]
        imgs = np.expand_dims(imgs, axis=-1)
        return imgs, study_id


### Model Definition

In [19]:
def get_model():
    model = models.Sequential()
    model.add(layers.Conv3D(32, kernel_size=(3, 3, 3), strides=1, padding='same', activation='relu', input_shape=(IN_CHANS, IMG_SIZE[0], IMG_SIZE[1], 1)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

    model.add(layers.Conv3D(64, kernel_size=(3, 3, 3), strides=1, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

    model.add(layers.Conv3D(128, kernel_size=(3, 3, 3), strides=1, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

    model.add(layers.Conv3D(256, kernel_size=(3, 3, 3), strides=1, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

    model.add(layers.Conv3D(512, kernel_size=(3, 3, 3), strides=1, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2)))

    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(N_CLASSES, activation='softmax'))

    return model

model = get_model()
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary to check the layers and output shapes
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_8 (Conv3D)           (None, 10, 512, 512, 32   896       
                             )                                   
                                                                 
 batch_normalization_5 (Bat  (None, 10, 512, 512, 32   128       
 chNormalization)            )                                   
                                                                 
 max_pooling3d_7 (MaxPoolin  (None, 10, 256, 256, 32   0         
 g3D)                        )                                   
                                                                 
 conv3d_9 (Conv3D)           (None, 10, 256, 256, 64   55360     
                             )                                   
                                                                 
 batch_normalization_6 (Bat  (None, 10, 256, 256, 64  

loading weights and creating predictions

In [20]:
model = load_model('/path/to/your/tensorflow_model.h5') # we need to add in our model

def create_predictions(model, test_dataset):
    outputs = []
    for inputs, study in test_dataset:
        inputs = np.expand_dims(inputs, axis=0)  # Add batch dimension
        output = model.predict(inputs)
        outputs.append(output)
    return outputs

test_dataset = RSNA24TestDataset(df, study_ids)
predictions = create_predictions(model, test_dataset)


OSError: No file or directory found at /path/to/your/tensorflow_model.h5

### Processing the predictions

In [None]:
output_pred = []
for prediction in predictions:
    o_pred = tf.reshape(prediction, (25, 3))
    o_pred = tf.nn.softmax(o_pred, axis=1)
    output_pred.append(o_pred)

predictions_f = []
for prediction in output_pred:
    prediction = prediction.numpy()
    predictions_f.append(prediction[:, [1, 0, 2]])

submission_rows = []
for study in study_ids:
    for condition in submission_columns:
        new_c = condition.lower().replace("/", "_")
        submission_rows.append(str(study) + "_" + new_c)

submission_df = pd.DataFrame()
submission_df[LABELS[0]] = submission_rows

preds_df = pd.DataFrame(columns=LABELS[1:4])
for pred in predictions_f:
    pred = pd.DataFrame(pred, columns=LABELS[1:4])
    preds_df = pd.concat([preds_df, pred], axis=0, ignore_index=True)

final_submission = pd.concat([submission_df, preds_df], axis=1)
final_submission = final_submission.sort_values(by="row_id")
final_submission.to_csv("submission.csv", index=False)
pd.read_csv('submission.csv')
