In [1]:
import numpy as np
import pandas as pd
import os, random
from fastai.vision.all import *
import shutil
import pydicom
import cv2
import glob

from tqdm.notebook import tqdm
from joblib import Parallel, delayed



In [2]:
new_learn = load_learner('/kaggle/input/atd-baseline1-model/model.pt', cpu = False)

In [3]:
#TEST_PATH = '/kaggle/input/rsna-2023-abdominal-trauma-detection/test_images/'
TEST_PATH = '/kaggle/working/test_images/'
SAVE_FOLDER = '/kaggle/working/test_png/'
SIZE = 512
EXTENSION = 'png'

os.makedirs(SAVE_FOLDER, exist_ok = True)
os.makedirs(TEST_PATH, exist_ok = True)
!cp -r /kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/10004 /kaggle/working/test_images/
!cp -r /kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/10005 /kaggle/working/test_images/
print('Number of test patients:', len(os.listdir(TEST_PATH)))

Number of test patients: 2


In [4]:
def process(patient, size = SIZE, save_folder = "", data_path = ""):
        
    def sub_process(patient, study, size = SIZE, save_folder = "", data_path = ""):
        imgs = {}
        for f in sorted(glob.glob(data_path + f"{patient}/{study}/*.dcm")):
            
            dicom = pydicom.dcmread(f)
            pos_z = dicom[(0x20, 0x32)].value[-1]
            
            img = dicom.pixel_array
            img = (img - img.min())/(img.max() - img.min() + 1e-6)
            
            imgs[pos_z] = img
        
        for i, k in enumerate(sorted(imgs.keys())):
            
            img = imgs[k]
            
            img = cv2.resize(img, (size, size))
            cv2.imwrite(save_folder + f"{patient}_{study}_{i}.png", (img * 255).astype(np.uint8))
    
    _ = Parallel(n_jobs = 2)(
        delayed(sub_process)(patient, study, size=512, save_folder=SAVE_FOLDER, data_path=TEST_PATH)
        for study in tqdm(sorted(os.listdir(data_path + patient)))
        )

In [5]:
import multiprocessing
print('Number of cores: ', multiprocessing.cpu_count())

Number of cores:  2


In [6]:
patients = os.listdir(TEST_PATH)

# _ = Parallel(n_jobs = 2)(
#    delayed(process)(patient, size=512, save_folder=SAVE_FOLDER, data_path=TEST_PATH)
#    for patient in tqdm(patients)
# )
 
preds_list = []
fnames_list = []

for patient in tqdm(patients):
    process(patient, size=512, save_folder=SAVE_FOLDER, data_path=TEST_PATH)
    files = get_image_files(SAVE_FOLDER)
    
    test_dl = new_learn.dls.test_dl(files, with_labels = False)
    test_dl.batch_size = 256

    preds_list.append(new_learn.get_preds(dl = test_dl)[0].numpy()) 
    fnames_list.append(files)
    for file in files:
        os.remove(file)

#files = get_image_files(SAVE_FOLDER)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
#test_dl = new_learn.dls.test_dl(files, with_labels = False)
#test_dl.batch_size = 256

#preds = new_learn.get_preds(dl = test_dl)[0].numpy()

In [8]:
for idx, arr in enumerate(preds_list):
    if idx == 0:
        preds = arr
    else:
        preds = np.concatenate((preds, arr))

In [9]:
from itertools import chain
fnames_list = list(chain.from_iterable(fnames_list))

In [10]:
test_files_probs = pd.DataFrame()

test_files_probs['fname'] = pd.Series(fnames_list, dtype = 'string')

test_files_probs['bowel_healthy'] = pd.Series(1 - preds[:, 0])
test_files_probs['bowel_injury'] = pd.Series(preds[:, 0])
test_files_probs['extravasation_healthy'] = pd.Series(1 - preds[:, 1])
test_files_probs['extravasation_injury'] = pd.Series(preds[:, 1])
test_files_probs['extravasation_healthy'] = pd.Series(1 - preds[:, 1])
test_files_probs['kidney_healthy'] = pd.Series(1 - preds[:, 2])
test_files_probs['kidney_low'] = pd.Series(preds[:, 2]/2)
test_files_probs['kidney_high'] = pd.Series(preds[:, 2]/2)
test_files_probs['liver_healthy'] = pd.Series(1 - preds[:, 3])
test_files_probs['liver_low'] = pd.Series(preds[:, 3]/2)
test_files_probs['liver_high'] = pd.Series(preds[:, 3]/2)
test_files_probs['spleen_healthy'] = pd.Series(1 - preds[:, 4])
test_files_probs['spleen_low'] = pd.Series(preds[:, 4]/2)
test_files_probs['spleen_high'] = pd.Series(preds[:, 4]/2)

#test_files_probs

In [11]:
patient_id_list = []
for idx, fname in enumerate(test_files_probs['fname']):
    patient_id_list.append(fname.split('/')[-1].split('_')[0])
    
test_files_probs['patient_id'] = pd.Series(patient_id_list, dtype = 'string')
#test_files_probs

In [12]:
test_files_probs = test_files_probs.drop('fname', axis = 1)

In [13]:
x = test_files_probs.groupby('patient_id').max()

x.insert(0, 'patient_id', x.index)

x = x.reset_index(drop = True)
x

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,10004,0.990216,0.844152,0.984456,0.85561,0.94699,0.48662,0.48662,0.966347,0.432327,0.432327,0.712732,0.496667,0.496667
1,10005,0.699357,0.892937,0.648911,0.805173,0.931711,0.286092,0.286092,0.873215,0.258282,0.258282,0.530343,0.481476,0.481476


In [14]:
x.to_csv('submission.csv', header = True, index = False)