In [None]:
!cp ../input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2

In [None]:
!rm gdcm.tar
!rm -rf gdcm

In [None]:
import fastai

In [None]:
from fastai.vision.all import *
from fastai.basics import *
from fastai.vision.utils import *

In [None]:
test_path = Path("/kaggle/input/siim-covid19-detection/test/")

In [None]:
test_files = get_files(test_path, extensions=".dcm")

In [None]:
len(test_files)

In [None]:
img_ids = []
study_ids = []
for file in test_files:
    img_ids.append(file.stem)
    study_ids.append(file.parent.parent.name)

In [None]:
img2study = {}
study2imgs = defaultdict(list)
for (img, study) in zip(img_ids, study_ids):
    img2study[img] = study
    study2imgs[study].append(img)

In [None]:
len(study2imgs)

In [None]:
path_png = Path("./test_converted_png")
path_png.mkdir(exist_ok=True)

In [None]:
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

def convert_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    img = PILImage.create(data)
    new_size = resize_to(img, targ_sz=800, use_min=True)
    img = img.resize(new_size)
    img.save(path_png/f"{path.stem}.png")


In [None]:
start = time.time()

In [None]:
with ThreadPoolExecutor(defaults.cpus) as e:
    list(e.map(convert_xray, test_files))

In [None]:
end=time.time()

In [None]:
print(end - start)

In [None]:
learn = load_learner("../input/cov19models/softmax-100-epoch-xres50", cpu=False)

In [None]:
png_files = get_image_files(path_png)

In [None]:
test_dl = learn.dls.test_dl(png_files)

In [None]:
preds = learn.get_preds(dl=test_dl)

In [None]:
preds = preds[0]

In [None]:
test_ids = [x.stem for x in test_dl.dataset.items]

In [None]:
img2pred = {_id:p for _id,p in zip(test_ids, preds)}

In [None]:
study2preds = defaultdict(list)
for study_id, img_ids in study2imgs.items():
    for img_id in img_ids:
        study2preds[study_id].append(img2pred[img_id])

In [None]:
def my_round(x):
    if x < 0.0001:
        return 0
    else:
        return round(x, 5)

In [None]:
study2preds = {k:list(map(my_round, torch.stack(v, dim=0).mean(dim=0).tolist())) for k,v in study2preds.items()}

In [None]:
study_ids_col = []
study_preds_col = []
for study, study_preds in study2preds.items():
    study_ids_col.append(study + "_study")
    pred_string = " ".join([f"{learn.dls.vocab[i]} {study_preds[i]} 0 0 1 1" for i in range(len(learn.dls.vocab))])
    study_preds_col.append(pred_string)

In [None]:
len(study_ids_col)

In [None]:
images = [x.stem + "_image" for x in get_files( Path("../input/siim-covid19-detection/test"), extensions=".dcm")]
images_preds = ["none 1 0 0 1 1"] * len(images)

In [None]:
submission = pd.DataFrame({"id": study_ids_col + images, "PredictionString": study_preds_col + images_preds})

In [None]:
submission

In [None]:
submission.to_csv("submission.csv", index=False)

In [None]:
!rm -rf ./test_converted_png