In [None]:
!conda install gdcm -c conda-forge -y

In [None]:
import os

from PIL import Image
import pandas as pd
from tqdm.auto import tqdm

In [None]:
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # MONOCHROME1 MONOCHROME2 RGB PALETTE COLOR 

    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.max(data) - data
    
    # image array에서 최솟값이 0이되도록 빼주고 가장 큰 값으로 다 나눠서 0-1사이의 값으로 변환후, 255를 곱해준다
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data*255).astype(np.uint8)
    
    return data

In [None]:
def resize(array, size, keep_ratio = False, resample=Image.LANCZOS):
    im = Image.fromarray(array)
    if keep_ratio:
        # creates nice thumbnails of all JPEG images in the current directory preserving aspect ratios with 128x128 max resolution.
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    return im
           

In [None]:
train = pd.read_csv("../input/siim-covid19-detection/train_image_level.csv")
train

In [None]:
path = "../input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm"
dicom = pydicom.read_file(path)
print(dicom.pixel_array.shape)


In [None]:
split = 'train'
save_dir = f'/kaggle/tmp/{split}/'

os.makedirs(save_dir, exist_ok=True)
save_dir = f'/kaggle/tmp/{split}/image/'
os.makedirs(save_dir, exist_ok=True)

Id = []
dim0 = []
dim1 = []

for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):

    for file in filenames:
        # set keep_ratio=True to have original aspect ratio
        xray = read_xray(os.path.join(dirname, file))
        
        dim0.append(xray.shape[0])
        dim1.append(xray.shape[1])
        Id.append(file.split(".")[0])
             
        im = resize(xray, size=512)
    
        im.save(os.path.join(save_dir, file.replace('.dcm', '_image.png')))

In [None]:
!tar -zcf image.tar.gz -C "/kaggle/tmp/train/image/" .

<a href="./image.tar.gz"> Download File_image </a>

In [None]:
df_meta = pd.DataFrame({"id":Id, "dim0":dim0, "dim1":dim1})
df_meta['split'] = "train"
df_meta

In [None]:
df_meta.to_csv('metadf.csv',index=False)

<a href="./metadf.csv"> Download File_image </a>

In [None]:
save_dir = f"/kaggle/tmp/{split}/study/"
os.makedirs(save_dir, exist_ok = True)



for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):
    for file in filenames:
        xray = read_xray(os.path.join(dirname, file))        
        im = resize(xray, size = 600)
        study = dirname.split('/')[-2] + "_study.png"
        im.save(os.path.join(save_dir, study))

In [None]:
!tar -zcf study.tar.gz -C "/kaggle/tmp/train/study/" .


<a href="./study.tar.gz"> Download File_study </a>