In [None]:
import os
from glob import glob
from shutil import copyfile
import pydicom as dicom
import numpy as np
import pandas as pd

WORK_BASE_PATH = '/home/huray/data/test'
_ORIGINAL_DIR = 'original'
_TEMPORARY_DIR = 'temp'
_ANONYMIZED_DIR = 'anonymized'

In [None]:
#원본 dicom 파일목록을 read
#dicom_path = '/home/huray/workspace/rails/dicoms/20180130/origins_original'
print(os.path.join(WORK_BASE_PATH, _ORIGINAL_DIR, '**/*.dcm'))
orig_dicoms = glob(os.path.join(WORK_BASE_PATH, _ORIGINAL_DIR, '**/*.dcm'), recursive=True)
num_orig_dicom_file = len(orig_dicoms)
#print(orig_dicoms)

In [None]:
#original vs. renamed file mapping정보를 위해 rename된 file name 저장을 위한 list
renamed_file_list = ['' for i in range(num_orig_dicom_file)]

#익명화를 위해 원본 dicom file들을 임시 direcotory로 복사
print('copy original dicom file to temporary folder.')
for index, dcm in enumerate(orig_dicoms):
    #원본 파일로 부터 file or folder name 등 변경해야 할 부분들을 set
    #실제 원본파일의 파일명을 그대로 쓰면 안된다고 함(규정상).
    #so, 원본 파일명을 변경하여 temporary 폴더에 복사한 후 익명화 작업 수행.
    dcm_original = dcm
    #dcm = dcm.replace('N', '20180130')
    #dcm = dcm.replace(' (', '_NORMAL_')
    #dcm = dcm.replace(').', '.')
    dcm = dcm.replace(_ORIGINAL_DIR, _TEMPORARY_DIR)

    #print(dcm, dcm_original)
    os.makedirs(os.path.dirname(dcm), exist_ok=True) #대상 폴더가 없으면 생성
    copyfile(dcm_original, dcm)

    #rename된 file list
    renamed_file_list[index] = dcm

    print('{}/{} completed.\r'.format(index+1, num_orig_dicom_file), end='')

In [None]:
#익명화 작업을 위해 임시 directory로 복사된 dicom파일 목록을 read
#dicom_path = '/home/huray/workspace/rails/dicoms/20180130/origins'
#dicoms = glob(dicom_path + '**/*.dcm')
print(os.path.join(WORK_BASE_PATH, _TEMPORARY_DIR, '**/*.dcm'))
temp_dicoms = glob(os.path.join(WORK_BASE_PATH, _TEMPORARY_DIR, '**/*.dcm'), recursive=True)
num_dest_dicom_file = len(temp_dicoms)
print(temp_dicoms[:5])

In [None]:
#중요를 정보 익명화 하고, 완료된 파일을 별도의 directory(anonymized)에 저장
print('***********************************')
print('*      Data Anonymizing           *')
print('***********************************')
print('{} dicom files.'.format(num_dest_dicom_file))
print("start.")
for index, dcm in enumerate(temp_dicoms):
    #print("reading file:", dcm)
    df = dicom.read_file(dcm)

    df[0x0008, 0x0020].value = "UNKNOWN" # study date
    df[0x0008, 0x0023].value = "UNKNOWN" # content date
    #df[0x0010, 0x0010].value = "UNKNOWN" # patient's name
    df[0x0010, 0x0010].value = "UNKNOWN" # patient's name
    df[0x0010, 0x0020].value = "UNKNOWN" # patient's ID
    df[0x0010, 0x0030].value = "UNKNOWN" # patient's birth date
    df[0x0010, 0x0040].value = "UNKNOWN" # patient's sex

    try:
        df[0x0010, 0x1010].value = "UNKNOWN" # patient's age            
        df[0x0038, 0x0300].value = "UNKNOWN" # current patient location
        df[0x0008, 0x1070].value = "UNKNOWN" # operator's name
        df[0x0008, 0x0021].value = "UNKNOWN" # series date
        df[0x0008, 0x0022].value = "UNKNOWN" # acquisition date
        df[0x0008, 0x0080].value = "UNKNOWN" # institution name
        df[0x0008, 0x0081].value = "UNKNOWN" # institution address
    except:
        pass

    dcm = dcm.replace(_TEMPORARY_DIR, _ANONYMIZED_DIR)

    #print(dcm)
    os.makedirs(os.path.dirname(dcm), exist_ok=True)
    df.save_as(dcm)
    print('{}/{} completed.\r'.format(index+1, num_dest_dicom_file), end='')

In [None]:
#CREATE MAPPING TABLE
#Create for original dicom finame vs. anonymized dicom file name mapping table
print("start making map info table.")
map_dicom_file_name_df = pd.DataFrame({'orig_file_list':orig_dicoms, 'renamed_file_list':renamed_file_list})
map_dicom_file_name_df = map_dicom_file_name_df[['orig_file_list', 'renamed_file_list']]
map_dicom_file_name_df.to_csv(os.path.join(WORK_BASE_PATH, _ANONYMIZED_DIR,'dicom_data_mapping_table.csv'), header=False, index=False)
print("end.")

In [None]:
# DICOM FILE ERROR CHECKER
#dicom_path = '/home/huray/workspace/rails/dicoms/20180130/anonymized'
#dicoms = glob(dicom_path + '/*.dcm')
print(os.path.join(WORK_BASE_PATH, _ANONYMIZED_DIR, '**/*.dcm'))
anonymized_dicoms = glob(os.path.join(WORK_BASE_PATH, _ANONYMIZED_DIR, '**/*.dcm'), recursive=True)
num_anonymized_dicom_file = len(anonymized_dicoms)
print(anonymized_dicoms[:5])

In [None]:
counter = 0
errored_file = []
print('***********************************')
print('*      Check dicom file error     *')
print('***********************************')
print('{} dicom files.'.format(num_anonymized_dicom_file))
print("start.")
for index, dcm_path in enumerate(anonymized_dicoms):
    #print(dcm_path)
    dcm = dicom.read_file(dcm_path)
    try:
        if np.amax(dcm.pixel_array) <= 256:
            counter += 1
    except:
        errored_file.append(dcm_path)
        continue
        
    print('{}/{} completed.\r'.format(index+1, num_anonymized_dicom_file), end='')    
        

In [None]:
print(counter, len(errored_file))