In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import os
import polars as pl
# import duckdb as dd
from tqdm import tqdm
from itertools import product
"""import matplotlib.pyplot as plt
import cv2
from pydicom import dcmread
import warnings
from sklearn.preprocessing import LabelEncoder
import pickle
import gc
import ctypes"""
# from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import tensorflow_io as tfio
from tensorflow import keras
from tensorflow.python.keras import backend as K
from joblib import Parallel, delayed

In [None]:
"""try: # detect TPUs
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError: # detect GPUs
    strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines
    
print("Number of accelerators: ", strategy.num_replicas_in_sync)"""

In [2]:
def label_encoder(label):
    if label == 'Normal/Mild':
        return 2
    elif label == 'Severe':
        return 3
    else:
        return 1
    
def attach_weights(label):
    if label == 'Normal/Mild':
        return 1
    elif label == 'Severe':
        return 4
    else:
        return 2
    
def get_condition(full_location: str) -> str:
    # Given an input like spinal_canal_stenosis_l1_l2 extracts 'spinal'
    for injury_condition in ['spinal', 'foraminal', 'subarticular']:
        if injury_condition in full_location:
            return injury_condition
    raise ValueError(f'condition not found in {full_location}')

In [3]:
Test = True
config = {}

if Test:
    config['root_file_path'] = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images'
    config['start'] = 10
    config['end'] = 30
    
    train_studies_metadata_file_path = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train.csv'
    train_studies_metadata_df = pl.read_csv(train_studies_metadata_file_path, low_memory=True)
    print("before dropping nulls :", train_studies_metadata_df.shape)
    train_studies_metadata_df = train_studies_metadata_df.drop_nulls()
    print("after dropping nulls :", train_studies_metadata_df.shape)

    studies_full = train_studies_metadata_df.select(pl.col('study_id')).unique().to_series().to_list()
    print("total number of studies : ", len(studies_full))
    
    studies = studies_full[config['start']:config['end']]
    #studies = os.listdir(config['root_file_path'])
    test_dict = {}
else:
    config['root_file_path'] = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/'
    studies = os.listdir(config['root_file_path'])
    test_dict = {}
    
for study in studies:
    image_files = []
    for dirname, _, filenames in os.walk(config['root_file_path']+'/'+str(study)):
        for filename in filenames:
            test_dict[os.path.join(dirname, filename).split('/')[-3]] = image_files
            image_files.append(os.path.join(dirname, filename))
            
print(len(test_dict))

before dropping nulls : (1975, 26)
after dropping nulls : (1790, 26)
total number of studies :  1790
20


In [4]:
def create_solution_df(run_config, run_test_dict):

    print("total number of run_test_dict items : ",len(run_test_dict))
    
    train_studies_metadata_df_up = train_studies_metadata_df.unpivot(index="study_id")
    train_studies_metadata_df_up.columns = ['study_id', 'condition', 'severity']

    train_studies_metadata_df_up = train_studies_metadata_df_up.with_columns([
        pl.col("severity").map_elements(label_encoder, return_dtype=pl.Int32).alias("encoded_severity"),
        pl.col("severity").map_elements(attach_weights, return_dtype=pl.Int32).alias("sample_weight"),
        (pl.col("study_id").cast(pl.String)+'_'+pl.col("condition")).alias("row_id")
    ])

    print("train_studies_metadata_df_up shape : ",train_studies_metadata_df_up.shape)
    
    temp = train_studies_metadata_df_up.select([pl.col('study_id'), pl.col('row_id'), pl.col('encoded_severity'), pl.col('severity'), pl.col('sample_weight')])
    train_studies_metadata_df_final = temp.pivot("severity", index=["study_id","row_id"], values="encoded_severity")
    train_studies_metadata_df_final.columns = ['study_id', 'row_id', 'normal_mild', 'moderate', 'severe']
    
    train_studies_metadata_df_final_2 = train_studies_metadata_df_final.join(temp, on=["study_id","row_id"], how="inner")
    train_studies_metadata_df_final_2 = train_studies_metadata_df_final_2.drop(['encoded_severity', 'severity'])
    train_studies_metadata_df_final_2 = train_studies_metadata_df_final_2.with_columns([
        pl.when(pl.col('normal_mild').is_not_null()).then(1).otherwise(0).alias('true_normal_mild'),
        pl.when(pl.col('moderate').is_not_null()).then(1).otherwise(0).alias('true_moderate'),
        pl.when(pl.col('severe').is_not_null()).then(1).otherwise(0).alias('true_severe'),
    ])
    
    train_studies_metadata_df_final_2 = train_studies_metadata_df_final_2.drop(['normal_mild', 'moderate', 'severe'])
    train_studies_metadata_df_final_2.columns = ['study_id', 'row_id', 'sample_weight', 'normal_mild', 'moderate', 'severe']
    
    solutions = train_studies_metadata_df_final_2.filter(pl.col('study_id').is_in(studies))
    solutions = solutions.drop(['study_id'])
    print("shape of solutions dataframe : ", solutions.shape)
    
    return solutions.to_pandas()

In [5]:
from sklearn.metrics import log_loss

def calculate_final_score(solution_df, submission_df):
    
    target_levels = ['normal_mild', 'moderate', 'severe']

    if not pd.api.types.is_numeric_dtype(submission_df[target_levels].values):
            raise ParticipantVisibleError('All submission_df values must be numeric')

    if not np.isfinite(submission_df[target_levels].values).all():
        raise ParticipantVisibleError('All submission_df values must be finite')

    if solution_df[target_levels].min().min() < 0:
        raise ParticipantVisibleError('All labels must be at least zero')
    if submission_df[target_levels].min().min() < 0:
        raise ParticipantVisibleError('All predictions must be at least zero')
        
    solution_df['study_id'] = solution_df['row_id'].apply(lambda x: x.split('_')[0])
    solution_df['location'] = solution_df['row_id'].apply(lambda x: '_'.join(x.split('_')[1:]))
    solution_df['condition'] = solution_df['row_id'].apply(get_condition)
    
    row_id_column_name = 'row_id'

    del solution_df[row_id_column_name]
    del submission_df[row_id_column_name]
    assert sorted(submission_df.columns) == sorted(target_levels)

    submission_df['study_id'] = solution_df['study_id']
    submission_df['location'] = solution_df['location']
    submission_df['condition'] = solution_df['condition']
    
    condition_losses = []
    condition_weights = []
    
    for condition in ['spinal', 'foraminal', 'subarticular']:
        condition_indices = solution_df.loc[solution_df['condition'] == condition].index.values
        condition_loss = log_loss(
            y_true=solution_df.loc[condition_indices, target_levels].values,
            y_pred=submission_df.loc[condition_indices, target_levels].values,
            sample_weight=solution_df.loc[condition_indices, 'sample_weight'].values
        )
        condition_losses.append(condition_loss)
        condition_weights.append(1)
        
    any_severe_spinal_labels = pd.Series(solution_df.loc[solution_df['condition'] == 'spinal'].groupby('study_id')['severe'].max())
    any_severe_spinal_weights = pd.Series(solution_df.loc[solution_df['condition'] == 'spinal'].groupby('study_id')['sample_weight'].max())
    any_severe_spinal_predictions = pd.Series(submission_df.loc[submission_df['condition'] == 'spinal'].groupby('study_id')['severe'].max())
    
    any_severe_scalar = 1.0

    any_severe_spinal_loss = log_loss(
        y_true=any_severe_spinal_labels,
        y_pred=any_severe_spinal_predictions,
        sample_weight=any_severe_spinal_weights
    )
    condition_losses.append(any_severe_spinal_loss)
    condition_weights.append(any_severe_scalar)

    print("final score during training : ", np.average(condition_losses, weights=condition_weights))
    
    return np.average(condition_losses, weights=condition_weights)

In [6]:
if Test:
    solution_data = create_solution_df(config, test_dict)

total number of run_test_dict items :  20
train_studies_metadata_df_up shape :  (44750, 6)
shape of solutions dataframe :  (500, 5)


In [7]:
from multiprocessing import cpu_count
n_cores = cpu_count()
print(f'Number of Logical CPU cores: {n_cores}')

Number of Logical CPU cores: 4


In [8]:
model_dict = {}

"""model_dict['right_neural_foraminal_narrowing_l1_l2'] = keras.models.\
load_model("/kaggle/input/keras_base_right_neural_foraminal_narrowing_l1_l2/tensorflow2/default/1/keras_base_right_neural_foraminal_narrowing_l1_l2.h5")

model_dict['right_neural_foraminal_narrowing_l2_l3'] = keras.models.\
load_model("/kaggle/input/keras_base_right_neural_foraminal_narrowing_l2_l3/tensorflow2/default/1/keras_base_right_neural_foraminal_narrowing_l2_l3.h5")

model_dict['right_neural_foraminal_narrowing_l3_l4'] = keras.models.\
load_model("/kaggle/input/keras_base_right_neural_foraminal_narrowing_l3_l4/tensorflow2/default/1/keras_base_right_neural_foraminal_narrowing_l3_l4.h5")

model_dict['right_neural_foraminal_narrowing_l4_l5'] = keras.models.\
load_model("/kaggle/input/keras_base_right_neural_foraminal_narrowing_l4_l5/tensorflow2/default/1/keras_base_right_neural_foraminal_narrowing_l4_l5.h5")

model_dict['right_neural_foraminal_narrowing_l5_s1'] = keras.models.\
load_model("/kaggle/input/keras_base_right_neural_foraminal_narrowing_l5_s1/tensorflow2/default/1/keras_base_right_neural_foraminal_narrowing_l5_s1.h5")

model_dict['spinal_canal_stenosis_l1_l2'] = keras.models.\
load_model("/kaggle/input/keras_base_scs_l1_l2/tensorflow2/default/2/keras_base_spinal_canal_stenosis_l1_l2.h5")

model_dict['spinal_canal_stenosis_l2_l3'] = keras.models.\
load_model("/kaggle/input/keras_base_spinal_canal_stenosis_l2_l3/tensorflow2/default/1/keras_base_spinal_canal_stenosis_l2_l3.h5")
"""

model_dict['left_neural_foraminal_narrowing_l1_l2'] = keras.models.\
load_model("/kaggle/input/keras_base_left_neural_foraminal_narrowing_l1_l2/tensorflow2/default/2/keras_base_left_neural_foraminal_narrowing_l1_l2.h5")

"""model_dict['spinal_canal_stenosis_l3_l4'] = keras.models.\
load_model("/kaggle/input/keras_base_spinal_canal_stenosis_l3_l4/tensorflow2/default/2/keras_base_spinal_canal_stenosis_l3_l4.h5")"""

model_dict['spinal_canal_stenosis_l4_l5'] = keras.models.\
load_model("/kaggle/input/keras_base_spinal_canal_stenosis_l4_l5/tensorflow2/default/2/keras_base_spinal_canal_stenosis_l4_l5.h5")

model_dict['spinal_canal_stenosis_l5_s1'] = keras.models.\
load_model("/kaggle/input/keras_base_spinal_canal_stenosis_l5_s1/tensorflow2/default/2/keras_base_spinal_canal_stenosis_l5_s1.h5")


In [9]:
def read_and_parse_dicom_files_for_inf(full_file_path):
    tf.config.run_functions_eagerly(True)
    raw_image = tf.io.read_file(full_file_path)
    sp = tf.strings.split(tf.gather(tf.strings.split(full_file_path, 'images/'), 1), '/')
    N = tf.size(sp)
    LEN = tf.strings.length(tf.gather(sp, 0))+tf.strings.length(tf.gather(sp, 2))
    
    # Add missing file metadata to avoid warnnigs flooding
    if   LEN==12: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==13: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==14: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==15: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==16: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==17: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==18: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x98\x00\x00\x00\x02\x00\x01\x00')
    
    #image_bytes = tf.io.read_file(full_file_path)
    #image = tfio.image.decode_dicom_image(image_bytes, scale='auto', dtype=tf.float32)
    image = tfio.image.decode_dicom_image(raw_image, scale='auto', dtype=tf.float32)
    m, M=tf.math.reduce_min(image), tf.math.reduce_max(image)
    image = (tf.image.grayscale_to_rgb(image)-m)/(M-m)
    image = tf.image.resize(image, (128,128))
    return tf.squeeze(image)

In [10]:
vfunc = np.vectorize(read_and_parse_dicom_files_for_inf, otypes=[object])

def get_predictions(key, model_to_use):
    final_feature_list = vfunc(test_dict[key]).tolist()
    final = np.array(final_feature_list)
    return model_to_use.predict(final)

## Inference With GPU Support

In [None]:
"""rows = {}
with strategy.scope():
    if Test:
        for key, value in model_dict.items():
            print("running for key :", key)
            y_proba = [get_predictions(st, model_dict[key]) for st in tqdm(test_dict.keys())] ## 27 min with 2 GPUs; not under strategy
            for i in range(len(y_proba)):
                rows[list(test_dict.keys())[i]+'_'+key] = np.mean(y_proba[i], axis=0)
    else:
        #y_proba = [get_predictions(st, model) for st in test_dict.keys()]
        for key, value in model_dict.items():
            y_proba = [get_predictions(st, model_dict[key]) for st in test_dict.keys()] ## 27 min with 2 GPUs; not under strategy
            for i in range(len(y_proba)):
                rows[list(test_dict.keys())[i]+'_'+key] = np.mean(y_proba[i], axis=0)"""

## Inference w/o GPU support using parallel processing

In [11]:
key_combo = product(model_dict.keys(), test_dict.keys())

rows = {}

if Test:
    y_proba = (Parallel(n_jobs=4)(delayed(get_predictions)(tpl[1], model_dict[tpl[0]]) for tpl in tqdm(key_combo)))
    for key, value in model_dict.items():
        for i in range(len(y_proba)):
                rows[list(test_dict.keys())[i%len(test_dict)]+'_'+key] = np.mean(y_proba[i], axis=0)
else:
    y_proba = (Parallel(n_jobs=4)(delayed(get_predictions)(tpl[1], model_dict[tpl[0]]) for tpl in key_combo))
    for key, value in model_dict.items():
        for i in range(len(y_proba)):
                rows[list(test_dict.keys())[i%len(test_dict)]+'_'+key] = np.mean(y_proba[i], axis=0)

  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))
  outputs = ufunc(*inputs)
  outputs = ufunc(*inputs)
4it [00:19, 28.70it/s]

[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m2s[0m 3s/step

  outputs = ufunc(*inputs)
  outputs = ufunc(*inputs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step


8it [00:23,  3.46s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2s/step
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m1s[0m 2s/step

12it [00:37,  3.42s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m1s[0m 1s/step

16it [00:54,  3.77s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m4/7[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m5s[0m 2s/step

20it [01:16,  4.37s/it]

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step
[1m3/4[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m2s[0m 2s/step

24it [01:34,  4.41s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step


  saveable.load_own_variables(weights_store.get(inner_path))


[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m1s[0m 2s/step

  saveable.load_own_variables(weights_store.get(inner_path))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m2s[0m 2s/step

28it [01:48,  4.19s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step


  saveable.load_own_variables(weights_store.get(inner_path))


[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m2s[0m 2s/step

  saveable.load_own_variables(weights_store.get(inner_path))


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step
[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m2s[0m 1s/step

32it [02:05,  4.18s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step


36it [02:22,  4.18s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m5/7[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m3s[0m 2s/step

40it [02:46,  4.74s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2s/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step


44it [02:59,  4.31s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step


48it [03:15,  4.22s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m3/4[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m2s[0m 2s/step

52it [03:30,  4.10s/it]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m1s[0m 2s/step

56it [03:47,  4.10s/it]

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2s/step
[1m4/7[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m6s[0m 2s/step

60it [04:08,  4.14s/it]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step





[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 981ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 758ms/step


## Using different parallelism

In [None]:
"""rows = {}
y_proba = {}

if Test:
    for key, value in model_dict.items():
        y_proba[key] = (Parallel(n_jobs=4)(delayed(get_predictions)(st, model_dict[key]) for st in tqdm(test_dict.keys())))
        for i in range(len(y_proba[key])):
                rows[list(test_dict.keys())[i%len(test_dict)]+'_'+key] = np.mean(y_proba[key][i], axis=0)
else:
    for key, value in model_dict.items():
        y_proba[key] = (Parallel(n_jobs=4)(delayed(get_predictions)(st, model_dict[key]) for st in test_dict.keys()))
        for i in range(len(y_proba[key])):
                rows[list(test_dict.keys())[i%len(test_dict)]+'_'+key] = np.mean(y_proba[key][i], axis=0)"""

In [26]:
submission = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')
submission['row_id'] = 'samples'

### Create a dictionary based on all combinations and whether models are available for them or not

In [27]:
conditions = ['spinal_canal_stenosis', 'neural_foraminal_narrowing', 'subarticular_stenosis']
sides = ['left', 'right']
vertebrae_levels = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']
# severity_levels = ['normal_mild', 'moderate', 'severe']
severity_levels = ['moderate', 'normal_mild', 'severe']

condn_sides_vrtlvl_combos = product(conditions, sides, vertebrae_levels)

In [28]:
combinations = {}

for i in condn_sides_vrtlvl_combos:
    #print(i)
    if i[0] == 'spinal_canal_stenosis':
        if (i[2] == 'l4_l5') or (i[2] == 'l5_s1'):
            combinations[i[0]+'_'+i[2]] = 'Y'
        else:
            combinations[i[0]+'_'+i[2]] = 'N'
    else:
        if (i[1] == 'left') and (i[0] == 'neural_foraminal_narrowing'):
            if i[2] == 'l1_l2':
                combinations[i[1]+'_'+i[0]+'_'+i[2]] = 'Y'
            else:
                combinations[i[1]+'_'+i[0]+'_'+i[2]] = 'N'
        else:
            combinations[i[1]+'_'+i[0]+'_'+i[2]] = 'N'
            
#print(combinations)

In [29]:
combinations

{'spinal_canal_stenosis_l1_l2': 'N',
 'spinal_canal_stenosis_l2_l3': 'N',
 'spinal_canal_stenosis_l3_l4': 'N',
 'spinal_canal_stenosis_l4_l5': 'Y',
 'spinal_canal_stenosis_l5_s1': 'Y',
 'left_neural_foraminal_narrowing_l1_l2': 'Y',
 'left_neural_foraminal_narrowing_l2_l3': 'N',
 'left_neural_foraminal_narrowing_l3_l4': 'N',
 'left_neural_foraminal_narrowing_l4_l5': 'N',
 'left_neural_foraminal_narrowing_l5_s1': 'N',
 'right_neural_foraminal_narrowing_l1_l2': 'N',
 'right_neural_foraminal_narrowing_l2_l3': 'N',
 'right_neural_foraminal_narrowing_l3_l4': 'N',
 'right_neural_foraminal_narrowing_l4_l5': 'N',
 'right_neural_foraminal_narrowing_l5_s1': 'N',
 'left_subarticular_stenosis_l1_l2': 'N',
 'left_subarticular_stenosis_l2_l3': 'N',
 'left_subarticular_stenosis_l3_l4': 'N',
 'left_subarticular_stenosis_l4_l5': 'N',
 'left_subarticular_stenosis_l5_s1': 'N',
 'right_subarticular_stenosis_l1_l2': 'N',
 'right_subarticular_stenosis_l2_l3': 'N',
 'right_subarticular_stenosis_l3_l4': 'N',
 

In [30]:
for st in test_dict.keys():
    for key, value in combinations.items():
        if value == 'Y':
            pass
        else:
            rows[st+'_'+key] = np.array([0.333333, 0.333333, 0.333333])

In [15]:
rows

{'4290709089_left_neural_foraminal_narrowing_l1_l2': array([0.11795083, 0.8639113 , 0.01813781], dtype=float32),
 '1641631752_left_neural_foraminal_narrowing_l1_l2': array([0.11788788, 0.8637801 , 0.01833196], dtype=float32),
 '3220085946_left_neural_foraminal_narrowing_l1_l2': array([0.1176634 , 0.8638491 , 0.01848754], dtype=float32),
 '2361533111_left_neural_foraminal_narrowing_l1_l2': array([0.11848991, 0.8628298 , 0.01868039], dtype=float32),
 '481397395_left_neural_foraminal_narrowing_l1_l2': array([0.11762601, 0.86404085, 0.01833296], dtype=float32),
 '2434132259_left_neural_foraminal_narrowing_l1_l2': array([0.11831606, 0.86297053, 0.01871359], dtype=float32),
 '3581755700_left_neural_foraminal_narrowing_l1_l2': array([0.11850445, 0.8628451 , 0.0186505 ], dtype=float32),
 '987719637_left_neural_foraminal_narrowing_l1_l2': array([0.11819403, 0.86313504, 0.0186708 ], dtype=float32),
 '2030054462_left_neural_foraminal_narrowing_l1_l2': array([0.11772036, 0.86391985, 0.01835985], d

In [None]:
# weight_dict = {'normal_mild':1, 'moderate':2, 'severe':4}
"""conditions = ['spinal_canal_stenosis', 'neural_foraminal_narrowing', 'subarticular_stenosis']
sides = ['left', 'right']
vertebrae_levels = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']
severity_levels = ['normal_mild', 'moderate', 'severe']

for c in conditions:
    for v in vertebrae_levels:
        if c != 'spinal_canal_stenosis':
            for s in sides:
                if s+'_'+c != 'right_neural_foraminal_narrowing':
                    for st in test_dict.keys():
                        rows[st+'_'+s+'_'+c+'_'+v] = np.array([0.333333, 0.333333, 0.333333])
                else:
                    pass
        else:
            pass"""

In [31]:
if Test:
    for row_id, feature in tqdm(rows.items()):
        feature_set_reshaped = feature.reshape(1, -1)
        predictions = np.ascontiguousarray(feature_set_reshaped)
        df = pd.DataFrame(predictions, columns=severity_levels)
        df.insert(loc=0, column='row_id', value=row_id)
        submission = pd.concat([submission,df]).reset_index(drop=True)

    i = submission[(submission.row_id == 'samples')].index
    submission = submission.drop(i).reset_index(drop=True)
else:
    for row_id, feature in rows.items():
        feature_set_reshaped = feature.reshape(1, -1)
        predictions = np.ascontiguousarray(feature_set_reshaped)
        df = pd.DataFrame(predictions, columns=severity_levels)
        df.insert(loc=0, column='row_id', value=row_id)
        submission = pd.concat([submission,df]).reset_index(drop=True)

    i = submission[(submission.row_id == 'samples')].index
    submission = submission.drop(i).reset_index(drop=True)

100%|██████████| 500/500 [00:00<00:00, 574.15it/s]


In [19]:
submission.to_csv('submission.csv', index=False)

In [33]:
if Test:
    calculate_final_score(solution_data, submission)
    print(set(solution_data['location'] == submission['location']))

final score during training :  0.9065361555318086
{True}
