# Import required libraries

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import os
import polars as pl
#import duckdb as dd
#from tqdm import tqdm
import matplotlib.pyplot as plt
#import cv2
#from pydicom import dcmread
import warnings
#from sklearn.preprocessing import LabelEncoder
import pickle
#import gc
import ctypes
import tensorflow as tf
#tf.compat.v1.disable_eager_execution()
#tf.keras.backend.clear_session()

"""for gpu in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(gpu, True)"""
    
import tensorflow_io as tfio
from tensorflow import keras

#tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [2]:
try: # detect TPUs
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError: # detect GPUs
    strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines
    #strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
    #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # for clusters of multi-GPU machines

print("Number of accelerators: ", strategy.num_replicas_in_sync)

Number of accelerators:  2


In [None]:
# strategy = tf.distribute.MirroredStrategy()
print('DEVICES AVAILABLE: {}'.format(strategy.num_replicas_in_sync))

BATCH_SIZE_PER_REPLICA = 48

#We obtain the BATCH_SIZE dividing by the number of devices. 
#BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * 2

# Feature extraction from image files
## Only if not using tensorflow
### kept here as backward compatibility with earlier work

In [None]:
metadata_root_path = '/kaggle/input/spinal-canal-stenosis-metadata'

for file in os.listdir(metadata_root_path):
    labels = []
    features = []
    data = {}
    metadata_file_path = os.path.join(metadata_root_path, file)
    print(metadata_file_path)
    metadata_df = pl.read_csv(metadata_file_path, low_memory=True)
    for j in tqdm(range(metadata_df.shape[0])):
        dcm_image_path = metadata_df.item(j,0)
        dicom_ds = dcmread(dcm_image_path)
        img_array = dicom_ds.pixel_array
        features.append(np.mean(img_array.T, axis=0))
        # features.append(dcmread(dcm_image_path).pixel_array)
        labels.append(metadata_df.item(j,2))

    print("feature list length --> ", len(features))
    print("label list length --> ", len(labels))

    #extracted_training_features = np.array([np.resize(img,(128,128)) for img in features])
    #training_labels = np.array(labels)

    #del labels
    #del features
    #gc.collect()

    extracted_training_features_file_name = "{0}_training_features".format(file.replace('_feature_metadata.csv',''))
    labels_file_name = "{0}_labels".format(file.replace('.csv',''))

    with open(extracted_training_features_file_name, "wb") as file:
        pickle.dump(features, file)
    with open(labels_file_name, "wb") as file:
        pickle.dump(labels, file)

    del labels
    del features
    gc.collect()
    libc = ctypes.CDLL("libc.so.6") # clearing cache 
    libc.malloc_trim(0)

    #data["image_array"] = features
    #data["encoded_severity"] = labels
    #extracted_training_data = pd.DataFrame(data)
    #extracted_training_data.to_csv("{0}.csv.gz".format(file.replace('.csv','')), index=False, compression='gzip')

    print('finished dumping features & labels for {0}'.format(file))

# Review pipeline with one sample image

In [None]:
image_bytes \
= tf.io.read_file('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/100206310/1012284084/1.dcm')

image = tfio.image.decode_dicom_image(image_bytes, scale='auto', dtype=tf.float32)

m, M=tf.math.reduce_min(image), tf.math.reduce_max(image)
image = (tf.image.grayscale_to_rgb(image)-m)/(M-m)
image = tf.image.resize(image, (128,128))

# Preprocessing functions

In [None]:
def read_and_parse_dicom_files(full_file_path):
    tf.config.run_functions_eagerly(True)
    raw_image = tf.io.read_file(full_file_path)
    sp = tf.strings.split(tf.gather(tf.strings.split(full_file_path, 'images/'), 1), '/')
    N = tf.size(sp)
    LEN = tf.strings.length(tf.gather(sp, 0))+tf.strings.length(tf.gather(sp, 2))
    
    # Add missing file metadata to avoid warnnigs flooding
    if   LEN==12: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==13: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==14: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==15: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==16: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==17: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==18: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x98\x00\x00\x00\x02\x00\x01\x00')
    
    #image_bytes = tf.io.read_file(full_file_path)
    #image = tfio.image.decode_dicom_image(image_bytes, scale='auto', dtype=tf.float32)
    image = tfio.image.decode_dicom_image(raw_image, scale='auto', dtype=tf.float32)
    m, M=tf.math.reduce_min(image), tf.math.reduce_max(image)
    image = (tf.image.grayscale_to_rgb(image)-m)/(M-m)
    image = tf.image.resize(image, (128,128))
    return tf.squeeze(image)

In [None]:
def load_dataset(image_path, labels):
    image = read_and_parse_dicom_files(image_path)
    return {"images": tf.cast(image, tf.float32), "labels": tf.cast(labels, tf.float32)}

def dict_to_tuple(inputs):
    return inputs["images"], inputs["labels"]

# Train, Test, Validation & holdout splits
## holdout set to be used for CV

In [2]:
from sklearn.model_selection import train_test_split

metadata_file_path = '/kaggle/input/spinal-canal-stenosis-metadata/spinal_canal_stenosis_l1_l2_feature_metadata.csv'
metadata_df = pl.read_csv(metadata_file_path, low_memory=True)

In [3]:
for_train, holdout = train_test_split(metadata_df, test_size=0.4, random_state=42)

x_train, x_test_val = train_test_split(for_train, test_size=0.3, random_state=42)
x_test, x_valid = train_test_split(x_test_val, test_size=0.2, random_state=42)

print("Training data shape : {0}".format(x_train.shape))
print("Test data shape : {0}".format(x_test.shape))
print("Validation data shape : {0}".format(x_valid.shape))
print("Holdout data shape : {0}".format(holdout.shape))

Training data shape : (61794, 3)
Test data shape : (21187, 3)
Validation data shape : (5297, 3)
Holdout data shape : (58853, 3)


In [None]:
holdout_image_filenames = pl.Series(holdout.select(pl.col('full_img_path'))).to_list()
holdout_image_labels = pl.Series(holdout.select(pl.col('encoded_severity'))).to_list()

train_image_filenames = pl.Series(x_train.select(pl.col('full_img_path'))).to_list()
train_image_labels = pl.Series(x_train.select(pl.col('encoded_severity'))).to_list()

test_image_filenames = pl.Series(x_test.select(pl.col('full_img_path'))).to_list()
test_image_labels = pl.Series(x_test.select(pl.col('encoded_severity'))).to_list()

valid_image_filenames = pl.Series(x_valid.select(pl.col('full_img_path'))).to_list()
valid_image_labels = pl.Series(x_valid.select(pl.col('encoded_severity'))).to_list()

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_image_filenames, train_image_labels))

test_dataset = tf.data.Dataset.from_tensor_slices((test_image_filenames, test_image_labels))

valid_dataset = tf.data.Dataset.from_tensor_slices((valid_image_filenames, valid_image_labels))

holdout_dataset = tf.data.Dataset.from_tensor_slices((holdout_image_filenames, holdout_image_labels))

In [None]:
train_ds = train_dataset.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.batch(batch_size=BATCH_SIZE, drop_remainder=True)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

test_ds = test_dataset.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.batch(batch_size=BATCH_SIZE, drop_remainder=True)
test_ds = test_ds.prefetch(tf.data.AUTOTUNE)

valid_ds = valid_dataset.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
valid_ds = valid_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
valid_ds = valid_ds.batch(batch_size=BATCH_SIZE, drop_remainder=True)
valid_ds = valid_ds.prefetch(tf.data.AUTOTUNE)

holdout_ds = holdout_dataset.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
holdout_ds = holdout_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
holdout_ds = holdout_ds.batch(batch_size=BATCH_SIZE, drop_remainder=True)
holdout_ds = holdout_ds.prefetch(tf.data.AUTOTUNE)

In [None]:
"""elem = next(iter(train_ds))
elem"""

#### Create a solution dataframe with predictions as 1 or 0 depending on the labels
#### using this solution dataframe and the actual predictions and sample weights (given in the problem statement)
#### build a custom keras metric

In [4]:
pl.Config(fmt_str_lengths=1000)
x_train.head(10)

full_img_path,severity,encoded_severity
str,str,i64
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/581655354/3280289726/17.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/1666601651/108284302/27.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/2944629826/1953111568/13.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/296314829/1382866353/5.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/3039901962/171545159/15.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/594735110/408469134/21.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/1995123254/2154684591/15.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/2238966046/2610869166/9.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/801319011/1184504151/12.dcm""","""Normal/Mild""",1
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/2741835855/2526981653/18.dcm""","""Normal/Mild""",1


In [19]:
def get_study_id(full_image_path):
    return full_image_path.split('/')[-3]

x_train_studies = x_train.with_columns(
    pl.col("full_img_path")
    .map_elements(get_study_id, return_dtype=pl.String)
    .alias("study_id")
)
x_train_studies.head(10)

full_img_path,severity,encoded_severity,study_id
str,str,i64,str
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/581655354/3280289726/17.dcm""","""Normal/Mild""",1,"""581655354"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/1666601651/108284302/27.dcm""","""Normal/Mild""",1,"""1666601651"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/2944629826/1953111568/13.dcm""","""Normal/Mild""",1,"""2944629826"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/296314829/1382866353/5.dcm""","""Normal/Mild""",1,"""296314829"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/3039901962/171545159/15.dcm""","""Normal/Mild""",1,"""3039901962"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/594735110/408469134/21.dcm""","""Normal/Mild""",1,"""594735110"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/1995123254/2154684591/15.dcm""","""Normal/Mild""",1,"""1995123254"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/2238966046/2610869166/9.dcm""","""Normal/Mild""",1,"""2238966046"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/801319011/1184504151/12.dcm""","""Normal/Mild""",1,"""801319011"""
"""/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images/2741835855/2526981653/18.dcm""","""Normal/Mild""",1,"""2741835855"""


In [5]:
x_train.select([pl.col('severity'), pl.col('encoded_severity')]).unique()

severity,encoded_severity
str,i64
"""Severe""",2
"""Normal/Mild""",1
"""Moderate""",0


In [35]:
x_train.select(pl.col('encoded_severity')).unique().to_series().to_list()

[0, 1, 2]

### Code to create custom metric aligned with Competition evaluation criteria

In [3]:
train_studies_metadata_file_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train.csv'
train_studies_metadata_df = pl.read_csv(train_studies_metadata_file_path, low_memory=True)
print("before dropping nulls :", train_studies_metadata_df.shape)
train_studies_metadata_df = train_studies_metadata_df.drop_nulls()
print("after dropping nulls :", train_studies_metadata_df.shape)

studies_full = train_studies_metadata_df.select(pl.col('study_id')).unique().to_series().to_list()
print(len(studies_full))
#train_studies_metadata_df.select([pl.col('study_id'),pl.col('spinal_canal_stenosis_l1_l2')]).head(10)

before dropping nulls : (1975, 26)
after dropping nulls : (1790, 26)
1790


In [4]:
config = {}
config['root_file_path'] = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images'
config['start'] = 10
config['end'] = 110
#studies = os.listdir(config['root_file_path'])[config['start']:config['end']]
studies = studies_full[config['start']:config['end']]
print(len(studies))

test_dict = {}
for study in studies:
    image_files = []
    for dirname, _, filenames in os.walk(config['root_file_path']+'/'+str(study)):
        for filename in filenames:
            test_dict[os.path.join(dirname, filename).split('/')[-3]] = image_files
            image_files.append(os.path.join(dirname, filename))
            
print(len(test_dict))

100
100


In [21]:
def label_encoder(label):
    if label == 'Normal/Mild':
        return 2
    elif label == 'Severe':
        return 3
    else:
        return 1
    
def attach_weights(label):
    if label == 'Normal/Mild':
        return 1
    elif label == 'Severe':
        return 4
    else:
        return 2

train_studies_metadata_df_up = train_studies_metadata_df.unpivot(index="study_id")
train_studies_metadata_df_up.columns = ['study_id', 'condition', 'severity']

train_studies_metadata_df_up = train_studies_metadata_df_up.with_columns([
    pl.col("severity").map_elements(label_encoder, return_dtype=pl.Int32).alias("encoded_severity"),
    pl.col("severity").map_elements(attach_weights, return_dtype=pl.Int32).alias("sample_weight"),
    (pl.col("study_id").cast(pl.String)+'_'+pl.col("condition")).alias("row_id")
])

print(train_studies_metadata_df_up.shape)
train_studies_metadata_df_up.head(10)

(44750, 6)


study_id,condition,severity,encoded_severity,sample_weight,row_id
i64,str,str,i32,i32,str
4003253,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""4003253_spinal_canal_stenosis_…"
4646740,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""4646740_spinal_canal_stenosis_…"
7143189,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""7143189_spinal_canal_stenosis_…"
8785691,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""8785691_spinal_canal_stenosis_…"
10728036,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""10728036_spinal_canal_stenosis…"
11340341,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""11340341_spinal_canal_stenosis…"
11943292,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""11943292_spinal_canal_stenosis…"
13317052,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""13317052_spinal_canal_stenosis…"
22191399,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""22191399_spinal_canal_stenosis…"
26342422,"""spinal_canal_stenosis_l1_l2""","""Normal/Mild""",2,1,"""26342422_spinal_canal_stenosis…"


In [29]:
temp = train_studies_metadata_df_up\
.select([pl.col('study_id'), pl.col('row_id'), pl.col('encoded_severity'), pl.col('severity'), pl.col('sample_weight')])
temp.head(10)

study_id,row_id,encoded_severity,severity,sample_weight
i64,str,i32,str,i32
4003253,"""4003253_spinal_canal_stenosis_…",2,"""Normal/Mild""",1
4646740,"""4646740_spinal_canal_stenosis_…",2,"""Normal/Mild""",1
7143189,"""7143189_spinal_canal_stenosis_…",2,"""Normal/Mild""",1
8785691,"""8785691_spinal_canal_stenosis_…",2,"""Normal/Mild""",1
10728036,"""10728036_spinal_canal_stenosis…",2,"""Normal/Mild""",1
11340341,"""11340341_spinal_canal_stenosis…",2,"""Normal/Mild""",1
11943292,"""11943292_spinal_canal_stenosis…",2,"""Normal/Mild""",1
13317052,"""13317052_spinal_canal_stenosis…",2,"""Normal/Mild""",1
22191399,"""22191399_spinal_canal_stenosis…",2,"""Normal/Mild""",1
26342422,"""26342422_spinal_canal_stenosis…",2,"""Normal/Mild""",1


In [30]:
train_studies_metadata_df_final = temp.pivot("severity", index=["study_id","row_id"], values="encoded_severity")
#.select([pl.col('study_id'), pl.col('row_id'), pl.col('encoded_severity'), pl.col('severity')])\
#.pivot("severity", index=["study_id","row_id"], values="encoded_severity")

train_studies_metadata_df_final.columns = ['study_id', 'row_id', 'normal_mild', 'moderate', 'severe']

train_studies_metadata_df_final.head(10)

study_id,row_id,normal_mild,moderate,severe
i64,str,i32,i32,i32
4003253,"""4003253_spinal_canal_stenosis_…",2,,
4646740,"""4646740_spinal_canal_stenosis_…",2,,
7143189,"""7143189_spinal_canal_stenosis_…",2,,
8785691,"""8785691_spinal_canal_stenosis_…",2,,
10728036,"""10728036_spinal_canal_stenosis…",2,,
11340341,"""11340341_spinal_canal_stenosis…",2,,
11943292,"""11943292_spinal_canal_stenosis…",2,,
13317052,"""13317052_spinal_canal_stenosis…",2,,
22191399,"""22191399_spinal_canal_stenosis…",2,,
26342422,"""26342422_spinal_canal_stenosis…",2,,


In [35]:
train_studies_metadata_df_final_2 = train_studies_metadata_df_final.join(temp, on=["study_id","row_id"], how="inner")
#train_studies_metadata_df_final.columns = ['study_id', 'row_id', 'normal_mild', 'moderate', 'severe', 'sample_weight']
train_studies_metadata_df_final_2.head(10)

study_id,row_id,normal_mild,moderate,severe,encoded_severity,severity,sample_weight
i64,str,i32,i32,i32,i32,str,i32
4003253,"""4003253_spinal_canal_stenosis_…",2,,,2,"""Normal/Mild""",1
4646740,"""4646740_spinal_canal_stenosis_…",2,,,2,"""Normal/Mild""",1
7143189,"""7143189_spinal_canal_stenosis_…",2,,,2,"""Normal/Mild""",1
8785691,"""8785691_spinal_canal_stenosis_…",2,,,2,"""Normal/Mild""",1
10728036,"""10728036_spinal_canal_stenosis…",2,,,2,"""Normal/Mild""",1
11340341,"""11340341_spinal_canal_stenosis…",2,,,2,"""Normal/Mild""",1
11943292,"""11943292_spinal_canal_stenosis…",2,,,2,"""Normal/Mild""",1
13317052,"""13317052_spinal_canal_stenosis…",2,,,2,"""Normal/Mild""",1
22191399,"""22191399_spinal_canal_stenosis…",2,,,2,"""Normal/Mild""",1
26342422,"""26342422_spinal_canal_stenosis…",2,,,2,"""Normal/Mild""",1


In [36]:
train_studies_metadata_df_final_2 = train_studies_metadata_df_final_2.drop(['encoded_severity', 'severity'])

In [37]:
train_studies_metadata_df_final_2 = train_studies_metadata_df_final_2.with_columns([
    pl.when(pl.col('normal_mild').is_not_null()).then(1).otherwise(0).alias('true_normal_mild'),
    pl.when(pl.col('moderate').is_not_null()).then(1).otherwise(0).alias('true_moderate'),
    pl.when(pl.col('severe').is_not_null()).then(1).otherwise(0).alias('true_severe'),
])

train_studies_metadata_df_final_2.head(10)

study_id,row_id,normal_mild,moderate,severe,sample_weight,true_normal_mild,true_moderate,true_severe
i64,str,i32,i32,i32,i32,i32,i32,i32
4003253,"""4003253_spinal_canal_stenosis_…",2,,,1,1,0,0
4646740,"""4646740_spinal_canal_stenosis_…",2,,,1,1,0,0
7143189,"""7143189_spinal_canal_stenosis_…",2,,,1,1,0,0
8785691,"""8785691_spinal_canal_stenosis_…",2,,,1,1,0,0
10728036,"""10728036_spinal_canal_stenosis…",2,,,1,1,0,0
11340341,"""11340341_spinal_canal_stenosis…",2,,,1,1,0,0
11943292,"""11943292_spinal_canal_stenosis…",2,,,1,1,0,0
13317052,"""13317052_spinal_canal_stenosis…",2,,,1,1,0,0
22191399,"""22191399_spinal_canal_stenosis…",2,,,1,1,0,0
26342422,"""26342422_spinal_canal_stenosis…",2,,,1,1,0,0


In [38]:
train_studies_metadata_df_final_2 = train_studies_metadata_df_final_2.drop(['normal_mild', 'moderate', 'severe'])
train_studies_metadata_df_final_2.columns = ['study_id', 'row_id', 'sample_weight', 'normal_mild', 'moderate', 'severe']
train_studies_metadata_df_final_2.head(10)

study_id,row_id,sample_weight,normal_mild,moderate,severe
i64,str,i32,i32,i32,i32
4003253,"""4003253_spinal_canal_stenosis_…",1,1,0,0
4646740,"""4646740_spinal_canal_stenosis_…",1,1,0,0
7143189,"""7143189_spinal_canal_stenosis_…",1,1,0,0
8785691,"""8785691_spinal_canal_stenosis_…",1,1,0,0
10728036,"""10728036_spinal_canal_stenosis…",1,1,0,0
11340341,"""11340341_spinal_canal_stenosis…",1,1,0,0
11943292,"""11943292_spinal_canal_stenosis…",1,1,0,0
13317052,"""13317052_spinal_canal_stenosis…",1,1,0,0
22191399,"""22191399_spinal_canal_stenosis…",1,1,0,0
26342422,"""26342422_spinal_canal_stenosis…",1,1,0,0


In [39]:
train_studies_metadata_df_final_2.shape

(44750, 6)

In [55]:
solutions = train_studies_metadata_df_final_2.filter(pl.col('study_id').is_in(studies))
solutions = solutions.drop(['study_id'])
print(solutions.shape)
#solutions.head(10)

(2500, 5)


### Now generate the predictions

In [41]:
model = keras.models.\
load_model("/kaggle/input/keras_base_scs_l1_l2/tensorflow2/default/1/keras_base_spinal_canal_stenosis_l1_l2.h5")

In [42]:
def read_and_parse_dicom_files_for_inf(full_file_path):
    tf.config.run_functions_eagerly(True)
    raw_image = tf.io.read_file(full_file_path)
    sp = tf.strings.split(tf.gather(tf.strings.split(full_file_path, 'images/'), 1), '/')
    N = tf.size(sp)
    LEN = tf.strings.length(tf.gather(sp, 0))+tf.strings.length(tf.gather(sp, 2))
    
    # Add missing file metadata to avoid warnnigs flooding
    if   LEN==12: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==13: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==14: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==15: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==16: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==17: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==18: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x98\x00\x00\x00\x02\x00\x01\x00')
    
    #image_bytes = tf.io.read_file(full_file_path)
    #image = tfio.image.decode_dicom_image(image_bytes, scale='auto', dtype=tf.float32)
    image = tfio.image.decode_dicom_image(raw_image, scale='auto', dtype=tf.float32)
    m, M=tf.math.reduce_min(image), tf.math.reduce_max(image)
    image = (tf.image.grayscale_to_rgb(image)-m)/(M-m)
    image = tf.image.resize(image, (128,128))
    return tf.squeeze(image)

In [43]:
vfunc = np.vectorize(read_and_parse_dicom_files_for_inf, otypes=[object])

def get_predictions(key, model_to_use):
    final_feature_list = vfunc(test_dict[key]).tolist()
    final = np.array(final_feature_list)
    return model_to_use.predict(final)

In [44]:
from multiprocessing import cpu_count
n_cores = cpu_count()
print(f'Number of Logical CPU cores: {n_cores}')

Number of Logical CPU cores: 4


In [45]:
#from joblib import Parallel, delayed
from tqdm import tqdm

#y_proba = (Parallel(n_jobs=90)(delayed(get_predictions)(st, model) for st in tqdm(test_dict.keys())))
y_proba = [get_predictions(st, model) for st in tqdm(test_dict.keys())]

  outputs = ufunc(*inputs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 290ms/step


  1%|          | 1/100 [00:06<11:15,  6.83s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 277ms/step


  2%|▏         | 2/100 [00:10<08:23,  5.14s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 286ms/step


  3%|▎         | 3/100 [00:13<06:23,  3.96s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 272ms/step


  4%|▍         | 4/100 [00:18<07:04,  4.42s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 249ms/step


  5%|▌         | 5/100 [00:21<05:56,  3.76s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 283ms/step


  6%|▌         | 6/100 [00:24<05:44,  3.66s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 248ms/step


  7%|▋         | 7/100 [00:27<05:21,  3.45s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 263ms/step


  8%|▊         | 8/100 [00:31<05:27,  3.56s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 251ms/step


  9%|▉         | 9/100 [00:34<05:11,  3.43s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 242ms/step


 10%|█         | 10/100 [00:38<05:25,  3.62s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 288ms/step


 11%|█         | 11/100 [00:43<06:10,  4.16s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 260ms/step


 12%|█▏        | 12/100 [00:49<06:32,  4.46s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 247ms/step


 13%|█▎        | 13/100 [00:51<05:40,  3.92s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 292ms/step


 14%|█▍        | 14/100 [00:55<05:42,  3.99s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 256ms/step


 15%|█▌        | 15/100 [00:59<05:22,  3.79s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 270ms/step


 16%|█▌        | 16/100 [01:03<05:40,  4.05s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 262ms/step


 17%|█▋        | 17/100 [01:06<05:10,  3.74s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 244ms/step


 18%|█▊        | 18/100 [01:09<04:36,  3.37s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 242ms/step


 19%|█▉        | 19/100 [01:14<05:21,  3.97s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 241ms/step


 20%|██        | 20/100 [01:19<05:29,  4.12s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step


 21%|██        | 21/100 [01:21<04:49,  3.66s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step


 22%|██▏       | 22/100 [01:24<04:21,  3.35s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 261ms/step


 23%|██▎       | 23/100 [01:29<04:59,  3.89s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 262ms/step


 24%|██▍       | 24/100 [01:34<05:26,  4.30s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 264ms/step


 25%|██▌       | 25/100 [01:39<05:29,  4.39s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 244ms/step


 26%|██▌       | 26/100 [01:44<05:47,  4.69s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 288ms/step


 27%|██▋       | 27/100 [01:47<04:54,  4.03s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 251ms/step


 28%|██▊       | 28/100 [01:51<04:50,  4.04s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 264ms/step


 29%|██▉       | 29/100 [01:53<04:10,  3.53s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 242ms/step


 30%|███       | 30/100 [01:58<04:32,  3.89s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 267ms/step


 31%|███       | 31/100 [02:04<05:03,  4.40s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 249ms/step


 32%|███▏      | 32/100 [02:08<04:49,  4.26s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 241ms/step


 33%|███▎      | 33/100 [02:10<04:12,  3.76s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 249ms/step


 34%|███▍      | 34/100 [02:13<03:51,  3.51s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 259ms/step


 35%|███▌      | 35/100 [02:16<03:43,  3.43s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 239ms/step


 36%|███▌      | 36/100 [02:20<03:48,  3.57s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 249ms/step


 37%|███▋      | 37/100 [02:25<04:16,  4.07s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 268ms/step


 38%|███▊      | 38/100 [02:29<04:00,  3.89s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 255ms/step


 39%|███▉      | 39/100 [02:33<03:55,  3.87s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 258ms/step


 40%|████      | 40/100 [02:36<03:49,  3.83s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 263ms/step


 41%|████      | 41/100 [02:41<04:02,  4.12s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 239ms/step


 42%|████▏     | 42/100 [02:46<04:09,  4.31s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 283ms/step


 43%|████▎     | 43/100 [02:51<04:15,  4.49s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 261ms/step


 44%|████▍     | 44/100 [02:55<04:06,  4.41s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 263ms/step


 45%|████▌     | 45/100 [02:59<03:47,  4.13s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 240ms/step


 46%|████▌     | 46/100 [03:03<03:44,  4.16s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step


 47%|████▋     | 47/100 [03:05<03:07,  3.53s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 244ms/step


 48%|████▊     | 48/100 [03:09<03:17,  3.80s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 251ms/step


 49%|████▉     | 49/100 [03:13<03:13,  3.79s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step


 50%|█████     | 50/100 [03:16<02:51,  3.42s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 244ms/step


 51%|█████     | 51/100 [03:18<02:32,  3.12s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 238ms/step


 52%|█████▏    | 52/100 [03:23<02:52,  3.59s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 240ms/step


 53%|█████▎    | 53/100 [03:26<02:41,  3.44s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step


 54%|█████▍    | 54/100 [03:29<02:38,  3.46s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 257ms/step


 55%|█████▌    | 55/100 [03:33<02:39,  3.55s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238ms/step


 56%|█████▌    | 56/100 [03:36<02:21,  3.21s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step


 57%|█████▋    | 57/100 [03:39<02:17,  3.21s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 236ms/step


 58%|█████▊    | 58/100 [03:46<03:04,  4.40s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 234ms/step


 59%|█████▉    | 59/100 [03:50<02:57,  4.34s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 294ms/step


 60%|██████    | 60/100 [03:53<02:40,  4.00s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 235ms/step


 61%|██████    | 61/100 [03:59<02:53,  4.45s/it]

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 233ms/step


 62%|██████▏   | 62/100 [04:10<04:02,  6.38s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step


 63%|██████▎   | 63/100 [04:12<03:13,  5.23s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 232ms/step


 64%|██████▍   | 64/100 [04:19<03:21,  5.59s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step


 65%|██████▌   | 65/100 [04:22<02:47,  4.79s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 257ms/step


 66%|██████▌   | 66/100 [04:25<02:26,  4.31s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step


 67%|██████▋   | 67/100 [04:27<02:02,  3.72s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step


 68%|██████▊   | 68/100 [04:29<01:44,  3.26s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 236ms/step


 69%|██████▉   | 69/100 [04:33<01:45,  3.41s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step


 70%|███████   | 70/100 [04:35<01:30,  3.03s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 243ms/step


 71%|███████   | 71/100 [04:38<01:29,  3.08s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 239ms/step


 72%|███████▏  | 72/100 [04:41<01:22,  2.96s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 235ms/step


 73%|███████▎  | 73/100 [04:46<01:37,  3.59s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 232ms/step


 74%|███████▍  | 74/100 [04:50<01:33,  3.61s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step


 75%|███████▌  | 75/100 [04:53<01:25,  3.41s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 254ms/step


 76%|███████▌  | 76/100 [04:57<01:25,  3.55s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 246ms/step


 77%|███████▋  | 77/100 [05:04<01:47,  4.66s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step


 78%|███████▊  | 78/100 [05:07<01:30,  4.12s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 259ms/step


 79%|███████▉  | 79/100 [05:09<01:17,  3.70s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 230ms/step


 80%|████████  | 80/100 [05:14<01:19,  3.96s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step


 81%|████████  | 81/100 [05:16<01:04,  3.37s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 238ms/step


 82%|████████▏ | 82/100 [05:20<01:02,  3.45s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 234ms/step


 83%|████████▎ | 83/100 [05:23<00:57,  3.39s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step


 84%|████████▍ | 84/100 [05:25<00:49,  3.12s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 229ms/step


 85%|████████▌ | 85/100 [05:31<00:59,  3.98s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step


 86%|████████▌ | 86/100 [05:34<00:50,  3.58s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step


 87%|████████▋ | 87/100 [05:37<00:43,  3.36s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 249ms/step


 88%|████████▊ | 88/100 [05:41<00:44,  3.69s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step


 89%|████████▉ | 89/100 [05:44<00:36,  3.35s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 241ms/step


 90%|█████████ | 90/100 [05:48<00:34,  3.43s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 233ms/step


 91%|█████████ | 91/100 [05:52<00:32,  3.66s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 225ms/step


 92%|█████████▏| 92/100 [05:54<00:25,  3.18s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step


 93%|█████████▎| 93/100 [05:56<00:20,  2.98s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 232ms/step


 94%|█████████▍| 94/100 [06:01<00:20,  3.40s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step


 95%|█████████▌| 95/100 [06:04<00:16,  3.25s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 234ms/step


 96%|█████████▌| 96/100 [06:07<00:12,  3.24s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 233ms/step


 97%|█████████▋| 97/100 [06:13<00:11,  3.99s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 231ms/step


 98%|█████████▊| 98/100 [06:17<00:08,  4.12s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step


 99%|█████████▉| 99/100 [06:20<00:03,  3.79s/it]

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 241ms/step


100%|██████████| 100/100 [06:33<00:00,  3.94s/it]


In [57]:
rows = {}

for i in range(len(y_proba)):
    rows[list(test_dict.keys())[i]+'_spinal_canal_stenosis_l1_l2'] = np.mean(y_proba[i], axis=0)
    
submission = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')
submission['row_id'] = 'samples'

conditions = ['spinal_canal_stenosis', 'neural_foraminal_narrowing', 'subarticular_stenosis']
sides = ['left', 'right']
vertebrae_levels = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']
severity_levels = ['normal_mild', 'moderate', 'severe']

for c in conditions:
    for v in vertebrae_levels:
        if c != 'spinal_canal_stenosis':
            for s in sides:
                for st in test_dict.keys():
                    rows[st+'_'+s+'_'+c+'_'+v] = np.array([0.333333, 0.333333, 0.333333])
        else:
            for st in test_dict.keys():
                if c+'_'+v == 'spinal_canal_stenosis_l1_l2':
                    pass
                else:
                    rows[st+'_'+c+'_'+v] = np.array([0.333333, 0.333333, 0.333333])
                    
for row_id, feature in tqdm(rows.items()):
    feature_set_reshaped = feature.reshape(1, -1)
    predictions = np.ascontiguousarray(feature_set_reshaped)
    df = pd.DataFrame(predictions, columns=severity_levels)
    df.insert(loc=0, column='row_id', value=row_id)
    submission = pd.concat([submission,df]).reset_index(drop=True)

i = submission[(submission.row_id == 'samples')].index
submission = submission.drop(i).reset_index(drop=True)

100%|██████████| 2500/2500 [00:01<00:00, 1268.36it/s]


In [58]:
solutions_pd = solutions.to_pandas()
print(submission.shape)
print(solutions_pd.shape)

(2500, 4)
(2500, 5)


In [48]:
def get_condition(full_location: str) -> str:
    # Given an input like spinal_canal_stenosis_l1_l2 extracts 'spinal'
    for injury_condition in ['spinal', 'foraminal', 'subarticular']:
        if injury_condition in full_location:
            return injury_condition
    raise ValueError(f'condition not found in {full_location}')

In [62]:
target_levels = ['normal_mild', 'moderate', 'severe']

if not pd.api.types.is_numeric_dtype(submission[target_levels].values):
        raise ParticipantVisibleError('All submission values must be numeric')

if not np.isfinite(submission[target_levels].values).all():
    raise ParticipantVisibleError('All submission values must be finite')

if solutions_pd[target_levels].min().min() < 0:
    raise ParticipantVisibleError('All labels must be at least zero')
if submission[target_levels].min().min() < 0:
    raise ParticipantVisibleError('All predictions must be at least zero')

In [63]:
solutions_pd['study_id'] = solutions_pd['row_id'].apply(lambda x: x.split('_')[0])
solutions_pd['location'] = solutions_pd['row_id'].apply(lambda x: '_'.join(x.split('_')[1:]))
solutions_pd['condition'] = solutions_pd['row_id'].apply(get_condition)

In [64]:
row_id_column_name = 'row_id'

del solutions_pd[row_id_column_name]
del submission[row_id_column_name]
assert sorted(submission.columns) == sorted(target_levels)

submission['study_id'] = solutions_pd['study_id']
submission['location'] = solutions_pd['location']
submission['condition'] = solutions_pd['condition']

In [79]:
from sklearn.metrics import log_loss

condition_losses = []
condition_weights = []
for condition in ['spinal', 'foraminal', 'subarticular']:
    condition_indices = solutions_pd.loc[solutions_pd['condition'] == condition].index.values
    condition_loss = log_loss(
        y_true=solutions_pd.loc[condition_indices, target_levels].values,
        y_pred=submission.loc[condition_indices, target_levels].values,
        sample_weight=solutions_pd.loc[condition_indices, 'sample_weight'].values
    )
    condition_losses.append(condition_loss)
    condition_weights.append(1)

In [80]:
condition_losses

[1.3672705151313351, 1.0986122886681098, 1.0986122886681098]

In [81]:
any_severe_spinal_labels = pd.Series(solutions_pd.loc[solutions_pd['condition'] == 'spinal'].groupby('study_id')['severe'].max())
any_severe_spinal_weights = pd.Series(solutions_pd.loc[solutions_pd['condition'] == 'spinal'].groupby('study_id')['sample_weight'].max())
any_severe_spinal_predictions = pd.Series(submission.loc[submission['condition'] == 'spinal'].groupby('study_id')['severe'].max())

In [82]:
any_severe_scalar = 1.0

any_severe_spinal_loss = log_loss(
    y_true=any_severe_spinal_labels,
    y_pred=any_severe_spinal_predictions,
    sample_weight=any_severe_spinal_weights
)
condition_losses.append(any_severe_spinal_loss)
condition_weights.append(any_severe_scalar)

#print("final score during training : ", np.average(condition_losses, weights=condition_weights))

In [83]:
condition_losses

[1.3672705151313351,
 1.0986122886681098,
 1.0986122886681098,
 0.6480666463044015]

In [84]:
condition_weights

[1, 1, 1, 1.0]

In [85]:
print("final score during training : ", np.average(condition_losses, weights=condition_weights))

final score during training :  1.053140434692989


### Training

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers

with strategy.scope():
    
    rsna_input = layers.Input(shape=(128,128,3), name="rsna_input")
    
    conv_base = EfficientNetB0(include_top=False, weights="imagenet", input_tensor=rsna_input)
    conv_base.trainable = False
    
    x = layers.GlobalAveragePooling2D(name="avg_pool")(conv_base.output)
    x = layers.BatchNormalization()(x)
    
    hidden_layer1 = layers.Dense(200, activation="relu", kernel_initializer=keras.initializers.LecunNormal(seed=None))(x)
    hidden_layer2 = layers.Dense(100, activation="selu")(hidden_layer1)
    hidden_layer3 = layers.Dense(50, activation="selu")(hidden_layer2)
    rsna_output = layers.Dense(3, activation="softmax")(hidden_layer3)
    #model = tf.keras.models.Model(inputs=rsna_input, outputs=rsna_output)
    model = tf.keras.Model(rsna_input, rsna_output)
    
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("keras_effnet_spinal_canal_stenosis_l1_l2.keras")
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

    model.compile(loss="sparse_categorical_crossentropy", optimizer="adamax", metrics=["accuracy"])

In [None]:
config = model.get_config()
print(config["layers"][0]["config"])

In [None]:
model.summary()

In [None]:
history = model.fit(train_ds, epochs=5, validation_data=valid_ds, callbacks=[checkpoint_cb, early_stopping_cb])

In [None]:
pd.DataFrame(history.history).plot(figsize=(10,6))
plt.grid(True)
plt.gca().set_ylim(0.60,1)
plt.show()

In [None]:
model.save("keras_base_spinal_canal_stenosis_l1_l2.h5")

In [None]:
cropped_image = tf.image.resize_with_crop_or_pad(image, 100, 100)

In [None]:
fig, axes = plt.subplots(1,1, figsize=(5,5))
axes.imshow(np.squeeze(cropped_image.numpy()), cmap='gray')
axes.set_title('image')

In [None]:
model = keras.models.\
load_model("/kaggle/input/keras_base_scs_l1_l2/tensorflow2/default/1/keras_base_spinal_canal_stenosis_l1_l2.h5")

### Remove the squeeze from image file preprocessing

In [None]:
def read_and_parse_dicom_files_for_inf(full_file_path):
    tf.config.run_functions_eagerly(True)
    raw_image = tf.io.read_file(full_file_path)
    sp = tf.strings.split(tf.gather(tf.strings.split(full_file_path, 'images/'), 1), '/')
    N = tf.size(sp)
    LEN = tf.strings.length(tf.gather(sp, 0))+tf.strings.length(tf.gather(sp, 2))
    
    # Add missing file metadata to avoid warnnigs flooding
    if   LEN==12: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==13: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==14: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==15: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==16: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==17: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==18: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x98\x00\x00\x00\x02\x00\x01\x00')
    
    #image_bytes = tf.io.read_file(full_file_path)
    #image = tfio.image.decode_dicom_image(image_bytes, scale='auto', dtype=tf.float32)
    image = tfio.image.decode_dicom_image(raw_image, scale='auto', dtype=tf.float32)
    m, M=tf.math.reduce_min(image), tf.math.reduce_max(image)
    image = (tf.image.grayscale_to_rgb(image)-m)/(M-m)
    image = tf.image.resize(image, (128,128))
    return image

In [None]:
test_image = \
read_and_parse_dicom_files_for_inf('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/44036939/3481971518/1.dcm')

In [None]:
test_image.shape

In [None]:
y_proba = model.predict(test_image)

In [None]:
y_proba[0]

In [None]:
np.array([0.333333, 0.333333, 0.333333])

In [None]:
model.evaluate(test_ds)

In [None]:
severity_levels = ['normal_mild', 'moderate', 'severe']
rows = {'44036939_spinal_canal_stenosis_l1_l2': y_proba[0]}

In [None]:
for key, value in rows.items():
    value_reshaped = value.reshape(1, -1)
    predictions = np.ascontiguousarray(value_reshaped)
    df = pd.DataFrame(predictions, columns=severity_levels)
    df.insert(loc=0, column='row_id', value=key)

In [None]:
df