In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import os
import polars as pl
# import duckdb as dd
from tqdm import tqdm
import matplotlib.pyplot as plt
import cv2
from pydicom import dcmread
import warnings
from sklearn.preprocessing import LabelEncoder
import pickle
import gc
import ctypes
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow import keras
from tensorflow.python.keras import backend as K
from joblib import Parallel, delayed

In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [25]:
for gpu in gpus:
    print(gpu)

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')


In [28]:
import tensorflow as tf
try: # detect TPUs
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError: # detect GPUs
    strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1"]) # for GPU or multi-GPU machines
    #strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
    #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # for clusters of multi-GPU machines

print("Number of accelerators: ", strategy.num_replicas_in_sync)

Number of accelerators:  2


In [2]:
model = keras.models.\
load_model("/kaggle/input/keras_base_lnfn_l1_l2/keras/default/1/keras_base_left_neural_foraminal_narrowing_l1_l2.h5")

In [3]:
submission = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')
submission['row_id'] = 'samples'

In [5]:
Test = True
config = {}

if Test:
    config['root_file_path'] = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_images'
    config['start'] = 100
    config['end'] = 110
    studies = os.listdir(config['root_file_path'])[config['start']:config['end']]
    test_dict = {}
    image_files = []

    for study in studies:
        for dirname, _, filenames in os.walk(config['root_file_path']+'/'+study):
            for filename in filenames:
                test_dict[os.path.join(dirname, filename).split('/')[-3]] = image_files
                image_files.append(os.path.join(dirname, filename))
else:
    config['root_file_path'] = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/'
    test_dict = {}
    image_files = []

    for dirname, _, filenames in os.walk(config['root_file_path']):
        for filename in filenames:
            test_dict[os.path.join(dirname, filename).split('/')[-3]] = image_files
            image_files.append(os.path.join(dirname, filename))

In [None]:
image_files_list = list(test_dict.keys())

dataset_to_predict_for = tf.data.Dataset.from_tensor_slices(image_files_list)

In [None]:
type(dataset_to_predict_for)

In [None]:
def read_and_parse_dicom_files(full_file_path):
    tf.config.run_functions_eagerly(True)
    raw_image = tf.io.read_file(full_file_path)
    sp = tf.strings.split(tf.gather(tf.strings.split(full_file_path, 'images/'), 1), '/')
    N = tf.size(sp)
    LEN = tf.strings.length(tf.gather(sp, 0))+tf.strings.length(tf.gather(sp, 2))
    
    # Add missing file metadata to avoid warnnigs flooding
    if   LEN==12: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==13: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==14: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==15: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==16: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==17: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==18: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x98\x00\x00\x00\x02\x00\x01\x00')
    
    #image_bytes = tf.io.read_file(full_file_path)
    #image = tfio.image.decode_dicom_image(image_bytes, scale='auto', dtype=tf.float32)
    image = tfio.image.decode_dicom_image(raw_image, scale='auto', dtype=tf.float32)
    #m, M=tf.math.reduce_min(image), tf.math.reduce_max(image)
    #image = (tf.image.grayscale_to_rgb(image)-m)/(M-m)
    image = tf.image.resize(image, (128,128))
    return tf.squeeze(image)

In [None]:
dataset_to_predict_for = dataset_to_predict_for.map(read_and_parse_dicom_files, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
dataset_to_predict_for = dataset_to_predict_for.batch(batch_size=32)
dataset_to_predict_for = dataset_to_predict_for.prefetch(tf.data.AUTOTUNE)

In [None]:
# dataset_to_predict_for.take(1)
elem = next(iter(dataset_to_predict_for))
elem

In [5]:
from multiprocessing import cpu_count
n_cores = cpu_count()
print(f'Number of Logical CPU cores: {n_cores}')

Number of Logical CPU cores: 4


In [6]:
def get_feature_array(img_file_list):
    #pbar.update(1)
    dicom_ds = dcmread(img_file_list)
    img_array = dicom_ds.pixel_array
    return np.resize(np.mean(img_array.T, axis=0),(128,))

vfunc = np.vectorize(get_feature_array, otypes=[object])

def get_predictions(key, model_to_use):
    final_feature_list = vfunc(test_dict[key]).tolist()
    final = np.array(final_feature_list)
    return model_to_use.predict(final)

"""parallel = Parallel(n_jobs=4, return_as="generator")
output_generator = parallel(delayed(vfunc)(test_dict[st]) for st in tqdm(test_dict.keys()))
final_feature_list = list(output_generator)"""

y_proba = (Parallel(n_jobs=4)(delayed(get_predictions)(st, model) for st in tqdm(test_dict.keys())))

  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))
100%|██████████| 10/10 [00:29<00:00,  2.92s/it]

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  





[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [7]:
rows = {}

for i in range(len(y_proba)):
    rows[list(test_dict.keys())[i]] = np.mean(y_proba[i], axis=0)

In [14]:
y_proba[2]

array([[3.5238117e-02, 9.6381611e-01, 9.4579818e-04],
       [3.5238117e-02, 9.6381611e-01, 9.4579818e-04],
       [3.5238117e-02, 9.6381611e-01, 9.4579818e-04],
       ...,
       [3.5238117e-02, 9.6381611e-01, 9.4579818e-04],
       [3.5238117e-02, 9.6381611e-01, 9.4579818e-04],
       [3.5238117e-02, 9.6381611e-01, 9.4579818e-04]], dtype=float32)

In [8]:
rows

{'2379426952': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '1805845915': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '228290246': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3503499724': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '934686772': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3448721674': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3748910433': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '413910863': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3542358517': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3635664607': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32)}

In [17]:
rows_v2

{'2379426952': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '1805845915': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '228290246': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3503499724': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '934686772': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3448721674': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3748910433': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '413910863': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3542358517': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32),
 '3635664607': array([3.2771137e-02, 9.6635056e-01, 8.7302667e-04], dtype=float32)}

In [22]:
print(y_proba_v2[0])
print(y_proba_v2[100])
print(y_proba_v2[700])

[3.5238117e-02 9.6381611e-01 9.4579818e-04]
[9.1692789e-05 9.9990827e-01 0.0000000e+00]
[3.5238117e-02 9.6381611e-01 9.4579818e-04]


In [23]:
np.max(y_proba_v2, axis=0)

array([0.03523812, 0.99999994, 0.00157638], dtype=float32)

In [24]:
def get_feature_array_v2(img_file_list):
    pbar.update(1)
    dicom_ds = dcmread(img_file_list)
    img_array = dicom_ds.pixel_array
    return np.resize(np.mean(img_array.T, axis=0),(128,))

vfunc_v2 = np.vectorize(get_feature_array_v2, otypes=[object])

rows_v2 = {}
global pbar 
pbar = 0
for st in test_dict.keys():
    with tqdm(total=len(test_dict[st])) as pbar:
        final_feature_list = vfunc_v2(test_dict[st]).tolist()
    final_v2 = np.array(final_feature_list)
    y_proba_v2 = model.predict(final_v2)
    print(y_proba_v2[100])
    rows_v2[st] = np.mean(y_proba_v2, axis=0)

100%|██████████| 768/768 [00:11<00:00, 65.64it/s] 

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 





[9.1692789e-05 9.9990827e-01 0.0000000e+00]


100%|██████████| 768/768 [00:11<00:00, 69.66it/s] 

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 





[9.1692789e-05 9.9990827e-01 0.0000000e+00]


100%|██████████| 768/768 [00:10<00:00, 69.88it/s] 

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 





[9.1692789e-05 9.9990827e-01 0.0000000e+00]


100%|██████████| 768/768 [00:10<00:00, 69.93it/s] 

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 





[9.1692789e-05 9.9990827e-01 0.0000000e+00]


 76%|███████▌  | 582/768 [00:09<00:02, 64.20it/s] 


KeyboardInterrupt: 

In [16]:
final_v2.shape ## this is the needed shape for inference

(768, 128)

In [None]:
BATCH_SIZE_PER_REPLICA = 64
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
test_dict_dataset = tf.data.Dataset.from_tensor_slices(test_dict).batch(GLOBAL_BATCH_SIZE)
test_dict_dataset_dist = strategy.experimental_distribute_dataset(test_dict_dataset)

In [34]:
type(test_dict_dataset_dist)

tensorflow.python.distribute.input_lib.DistributedDataset

In [33]:
#import cupy as cp


with strategy.scope():
#with tf.device("/job:localhost/replica:0/task:0/device:GPU:0"): 

    def get_feature_array(img_file_list):
        pbar.update(1)
        dicom_ds = dcmread(img_file_list)
        img_array = dicom_ds.pixel_array
        return np.resize(np.mean(img_array.T, axis=0),(128,))

    vfunc = np.vectorize(get_feature_array, otypes=[object])

    rows = {}
    global pbar 
    pbar = 0
    for st in test_dict.keys():
        with tqdm(total=len(test_dict[st])) as pbar:
            final_feature_list = vfunc(test_dict[st]).tolist()
        final = np.array(final_feature_list)
        y_proba = model.predict(final)
        rows[st] = np.mean(y_proba, axis=0)

 40%|████      | 1472/3671 [00:17<00:25, 85.39it/s] 


KeyboardInterrupt: 

In [27]:
#with strategy.scope():
with tf.device("/job:localhost/replica:0/task:0/device:GPU:0"):
    
    def get_feature_array(img_file_list):
        dicom_ds = dcmread(img_file_list)
        img_array = dicom_ds.pixel_array
        return np.resize(np.mean(img_array.T, axis=0),(128,))

    vfunc = np.vectorize(get_feature_array, otypes=[object])
    
    def get_predictions(key, model_to_use):
        if Test:
            pbar.update(1)
        final_feature_list = vfunc(test_dict[key]).tolist()
        final = np.array(final_feature_list)
        return model_to_use.predict(final)

    vfunc2 = np.vectorize(get_predictions, otypes=[object])
    
    if Test:
        global pbar 
        pbar = 0

        with tqdm(total=len(test_dict.keys())) as pbar:
            y_proba = vfunc2(list(test_dict.keys()), model)
    else:
        y_proba = vfunc2(list(test_dict.keys()), model)

  0%|          | 0/50 [00:00<?, ?it/s]

[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


  4%|▍         | 2/50 [00:44<17:53, 22.37s/it]

[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


  6%|▌         | 3/50 [01:29<24:50, 31.72s/it]

[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


  8%|▊         | 4/50 [02:14<28:08, 36.71s/it]

[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


 10%|█         | 5/50 [03:40<33:05, 44.13s/it]


KeyboardInterrupt: 

In [95]:
len(rows)

250

In [9]:
#rows = {}
# weight_dict = {'normal_mild':1, 'moderate':2, 'severe':4}
conditions = ['spinal_canal_stenosis', 'neural_foraminal_narrowing', 'subarticular_stenosis']
sides = ['left', 'right']
vertebrae_levels = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']
severity_levels = ['normal_mild', 'moderate', 'severe']

for c in conditions:
    for v in vertebrae_levels:
        if c != 'spinal_canal_stenosis':
            for s in sides:
                for st in test_dict.keys():
                    if s+'_'+c+'_'+v == 'left_neural_foraminal_narrowing_l1_l2':
                        #print("going for model ", st+'_'+s+'_'+c+'_'+v)
                        pass
                    else:
                        #print(st+'_'+s+'_'+c+'_'+v)
                        rows[st+'_'+s+'_'+c+'_'+v] = np.array([0.333333, 0.333333, 0.333333])
        else:
            for st in test_dict.keys():
                #print(st+'_'+c+'_'+v)
                rows[st+'_'+c+'_'+v] = np.array([0.333333, 0.333333, 0.333333])

In [None]:
test_dict_key_list = list(test_dict.keys())

for st in range(len(test_dict_key_list)):
    # print(test_dict_key_list[st])
    rows[test_dict_key_list[st]+'_left_neural_foraminal_narrowing_l1_l2'] = np.mean(y_proba[st], axis=0)

In [10]:
for row_id, feature in tqdm(rows.items()):
    feature_set_reshaped = feature.reshape(1, -1)
    predictions = np.ascontiguousarray(feature_set_reshaped)
    df = pd.DataFrame(predictions, columns=severity_levels)
    df.insert(loc=0, column='row_id', value=row_id)
    submission = pd.concat([submission,df]).reset_index(drop=True)
    
i = submission[(submission.row_id == 'samples')].index
submission = submission.drop(i).reset_index(drop=True)

100%|██████████| 250/250 [00:00<00:00, 1040.02it/s]


In [None]:
submission.to_csv('submission.csv', index=False)

In [11]:
submission

Unnamed: 0,row_id,normal_mild,moderate,severe
0,2379426952,0.032771,0.966351,0.000873
1,1805845915,0.032771,0.966351,0.000873
2,228290246,0.032771,0.966351,0.000873
3,3503499724,0.032771,0.966351,0.000873
4,934686772,0.032771,0.966351,0.000873
...,...,...,...,...
245,3448721674_right_subarticular_stenosis_l5_s1,0.333333,0.333333,0.333333
246,3748910433_right_subarticular_stenosis_l5_s1,0.333333,0.333333,0.333333
247,413910863_right_subarticular_stenosis_l5_s1,0.333333,0.333333,0.333333
248,3542358517_right_subarticular_stenosis_l5_s1,0.333333,0.333333,0.333333
