In [1]:
import os
import glob
import shutil
import skimage.io
import pydicom 
import numpy as np
from tqdm import tqdm

In [2]:
def dcm_to_png(dcm_file, png_file=None, fix_monochrome=True, apply_windowing=True, long_edge=512):
    """ Function to convert from a DICOM image to png
        @param dcm_file: An opened file like object to read te dicom data
        @param png_file: An opened file like object to write the png data
    """

    # Extracting data from the mri file
    dicom = pydicom.dcmread(dcm_file)
    image = dicom.pixel_array
    # shape = image.shape

    
    # if apply_windowing:
    # image = pydicom.pixel_data_handlers.util.apply_modality_lut(image, dicom)
    image = pydicom.pixel_data_handlers.util.apply_voi_lut(image, dicom)
    # image = pydicom.pixel_data_handlers.util.apply_windowing(image, dicom)

    if fix_monochrome and dicom.PhotometricInterpretation != "MONOCHROME1":
        image = np.amax(image) - image

    if long_edge is not None:
        # Resize long edge
        factor = float(long_edge) / max(image.shape[:2])
        shape = (int(image.shape[0] * factor), int(image.shape[1] * factor))
        image = skimage.transform.resize(image, shape)
        
    if np.max(image) != 0:
        image = image / np.max(image)
    image = (image * 255).astype(np.uint8)
    
    if png_file is None:
        png_file = dcm_file.replace("dcm", "png")
    skimage.io.imsave(png_file, image)

In [3]:
def glob_files(folders: str = None, extension: str = '*.nii.gz'):  # type: ignore
    assert folders is not None
    # paths = [glob.glob(os.path.join(folder, extension), recursive=True) for folder in folders]
    paths = glob.glob(os.path.join(folders, extension), recursive=True) 
    files = sorted([item for item in paths if os.path.isfile(item)])
    print(len(files))
    print(files[:1])
    return files

In [4]:
filenames = glob_files(folders="data/RSNA2023", extension="**/*.dcm")

54710
['data/RSNA2023/test_images/10008/1591370361.dcm']


In [5]:
from joblib import Parallel, delayed  
from tqdm import tqdm 

Parallel(n_jobs=16)(delayed(dcm_to_png)(filename)
    for filename in tqdm(filenames)
)

100%|██████████| 54710/54710 [1:22:15<00:00, 11.08it/s]


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,