In [None]:
import os
import pydicom
import cv2

import azureml.core
from azureml.core import Workspace, Dataset

In [None]:
ws = Workspace.from_config()
default_ds = ws.datastores['workspaceblobstore']
dicom_dataset_name = 'RSNA_training'
watermarked_dataset_name = 'WaterMarked'

In [None]:
# get DICOM source dataset
dicom_dataset = None

if dicom_dataset_name in ws.datasets:
    dicom_dataset = ws.datasets.get(dicom_dataset_name)
    print('Found the dataset', dicom_dataset_name)
else:
    raise RuntimeError('Dataset {} not found.'.format(dicom_dataset_name))

In [None]:
# create water mark text
watermarks = {}
dicom_mount_point = None
watermarked_imge_folder = os.path.join(os.getenv("TEMP") if os.name=="nt" else "/tmp", watermarked_dataset_name)

with dicom_dataset.mount() as mount_context:
    # mount and list all dicom files
    dicom_mount_point = mount_context.mount_point
    for root_dir, _, files in os.walk(mount_context.mount_point):
        for file_name in files:
            if file_name.lower().endswith('.dcm') or file_name.lower().endswith('.dicm'):
                patient_id = file_name[: -4]
                dicom_full_path = os.path.join(root_dir, file_name)
                dicom = pydicom.read_file(dicom_full_path)
                watermark_text = "{} {}".format(dicom['PatientSex'].value, dicom['ViewPosition'].value)
                watermarks[patient_id] = watermark_text

In [None]:
png_temp_folder = os.path.join(os.getenv("TEMP") if os.name=="nt" else "/tmp", 'NoWindow')
dataset = Dataset.get_by_name(ws, name='NoWindow')
dataset.download(target_path=png_temp_folder, overwrite=True)


In [None]:
fusion_base_path = os.path.join(os.getenv("TEMP") if os.name=="nt" else "/tmp", watermarked_dataset_name)
if not os.path.exists(fusion_base_path):
    os.makedirs(fusion_base_path)

count = 0
opacity = 50/100
for dir, _, files in os.walk(png_temp_folder):
    for f in files:
     if f.endswith('.png'):
        count += 1
        patient_id = f[: -4]
        # read dicom image
        img = cv2.imread(os.path.join(dir, f))
        overlay = img.copy()
        output = img.copy()
        cv2.putText(overlay, text, (int((img.shape[0]) / 5), int((img.shape[1]) - 20 )), cv2.FONT_HERSHEY_SIMPLEX, 5.0, (255,255,255), 10)
        # apply the overlay
        cv2.addWeighted(overlay, opacity, output, 1 - opacity, 0, output)
        new_png_file = os.path.join(fusion_base_path, f)
        print('Creating {}'.format(new_png_file))
        cv2.imwrite(new_png_file, output)

print('Total {} PNG files generated under {}.'.format(count, watermarked_dataset_name))

In [None]:
fusion_base_path
os.path.isdir(fusion_base_path)

In [None]:
default_ds.upload(src_dir = fusion_base_path, target_path= '/fhl/datasets/' + watermarked_dataset_name + '/image/', overwrite= True)