# DICOM to PNG converter

Converting all DICOM images in given dataset to .png images and upload to default workspace blobstore


`/fhl/datasets/<dataset_name>/image/`


Note:

    - This notebook runs on Compute Instance (Linux OS)
    - This notebook will have long runing time (depends on VM)

Make sure pydicom is installed. `pip install pydicom`

In [None]:
import os
import pydicom
import cv2

import azureml.core
from azureml.core import Workspace, Dataset

In [None]:
ws = Workspace.from_config()
default_ds = ws.datastores['workspaceblobstore']
dicom_dataset_name = 'RSNA_training'
dataset_name = 'NoWindow'

In [None]:
# get datastore 
from azureml.core.datastore import Datastore
if 'datasets' in ws.datastores.keys():
    ds = ws.datastores['datasets']
else:
    account_key = os.getenv("ACCOUNT_KEY")
    ds = Datastore.register_azure_blob_container(ws, datastore_name='datasets', container_name='imagecontainer', 
                                             account_name='dicommodel2290602728', account_key=account_key, 
                                             resource_group='dicom-model-rg')

In [None]:
# get DICOM source dataset
dicom_dataset = None

if dicom_dataset_name in ws.datasets:
    dicom_dataset = ws.datasets.get(dicom_dataset_name)
    print('Found the dataset', dicom_dataset_name)
else:
    raise RuntimeError('Dataset {} not found.'.format(dicom_dataset_name))

In [None]:
# method ot convert a dicom file to a .PNG
def convert_to_png(dicom_input_file: str, png_output_file: str, window_center: int = 4000, window_width: int = 10000):

    if not (dicom_input_file.lower().endswith('.dcm') or dicom_input_file.lower().endswith('.dicm')):
        raise RuntimeError("{} is not a DICOM file.".format(dicom_input_file))

    if not os.path.exists(dicom_input_file):
        raise RuntimeError("{} not fould.".format(dicom_input_file))

    base_name = os.path.basename(dicom_input_file)

    dicom_img = pydicom.read_file(dicom_input_file)

    #  TODO: 0028,1052 (Rescale Intercept) and 0028, 1053 (Rescale Slope)
    #  These are the paramaters which define the linear rescaling from raw pixel values inthe DICOM field to Hounsfield Units.

    img = dicom_img.pixel_array
    min_hu = img.min()
    max_hu = img.max()
    lower_limit = max(min_hu, window_center - window_width / 2)
    upper_limit = min(max_hu, window_center + window_width / 2)

    _, _ = cv2.threshold(img, lower_limit, max_hu, cv2.THRESH_TOZERO)
    _, _ = cv2.threshold(img, upper_limit, max_hu, cv2.THRESH_TOZERO_INV)

    scaled_img = cv2.convertScaleAbs(img, alpha=255.0 / (upper_limit - lower_limit))
    output_folder = os.path.dirname(png_output_file)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    cv2.imwrite(png_output_file, scaled_img)

In [None]:
dicom_files = []
dicom_mount_point = None
png_temp_folder = os.path.join(os.getenv("TEMP") if os.name=="nt" else "/tmp", dicom_dataset_name)

with dicom_dataset.mount() as mount_context:
    # mount and list all dicom files
    dicom_mount_point = mount_context.mount_point
    for root_dir, _, files in os.walk(mount_context.mount_point):
        for file_name in files:
            if file_name.lower().endswith('.dcm') or file_name.lower().endswith('.dicm'):
                dicom_full_path = os.path.join(root_dir, file_name)
                dicom_files.append(dicom_full_path)
   
    for dicom in dicom_files:
        relative_path = os.path.relpath(dicom, dicom_mount_point).lower()
        png_relative_path = None
        if relative_path.endswith(".dcm"):
            png_relative_path = relative_path.replace('.dcm', '.png')
        else:
            png_relative_path = relative_path.replace('.dicm', '.png')

        output_file = os.path.join(png_temp_folder, png_relative_path)
        print('converting {} to {}'.format(dicom, output_file ))
        convert_to_png(dicom, output_file)
        
        if not os.path.exists(output_file):
            raise RuntimeError("{} not fould.".format(output_file))

In [None]:
count = 0
for dir, _, files in os.walk(png_temp_folder):
    for f in files:
     if f.endswith('.png'):
        count += 1

print('Total {} PNG files generated under {}.'.format(count, png_temp_folder))

In [None]:
# upload to default blobstore of the workspace

default_ds.upload(src_dir = png_temp_folder, target_path= '/fhl/datasets/' + dataset_name + '/image/', overwrite= True)


In [None]:
print('Done')