In [1]:
import os

from PIL import Image
import pandas as pd
from tqdm import tqdm
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [2]:


def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

In [3]:
def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [4]:
import pathlib
data_dir = pathlib.Path('D:\Datasets\siim_covid19_detection')

In [5]:
train = pd.read_csv(os.path.join(data_dir,'train_image_level.csv'))

In [7]:
path = os.path.join(data_dir, r'train\ae3e63d94c13\288554eb6182\e00f9fe0cce5.dcm')
dicom = read_xray(path)

In [14]:

split = 'train'
save_dir = os.path.join(data_dir, f'orig_size\\{split}')

os.makedirs(save_dir, exist_ok=True)
save_dir_img = os.path.join(save_dir, 'image')
os.makedirs(save_dir_img, exist_ok=True)
save_dir_study = os.path.join(save_dir, 'study')
os.makedirs(save_dir_study, exist_ok=True)

for dirname, _, filenames in os.walk(f'{str(data_dir)}\\{split}'):
    for file in filenames:
        # set keep_ratio=True to have original aspect ratio
        xray = read_xray(os.path.join(dirname, file))  
        im = Image.fromarray(xray)
        im = im.convert('RGB')
        im.save(os.path.join(save_dir_img, file.replace('.dcm', '_image.png')))
        study = dirname.split(os.sep)[-2] + '_study.png'
        im.save(os.path.join(save_dir_study, study))



In [8]:
# save_dir = f'/kaggle/tmp/{split}/study/'
# os.makedirs(save_dir, exist_ok=True)

# for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):
#     for file in filenames:
#         # set keep_ratio=True to have original aspect ratio
#         xray = read_xray(os.path.join(dirname, file))
#         im = resize(xray, size=600)  
#         study = dirname.split('/')[-2] + '_study.png'
#         im.save(os.path.join(save_dir, study))


0it [00:00, ?it/s]

In [9]:
%%time
!tar -zcf image.tar.gz -C "/kaggle/tmp/train/image/" .
!tar -zcf study.tar.gz -C "/kaggle/tmp/train/study/" .

CPU times: user 1.16 s, sys: 182 ms, total: 1.34 s
Wall time: 1min 7s
