In [327]:
import re
import subprocess
from pathlib import Path

import dicom2nifti
import pydicom
import shutil
import tempfile
import nilearn.image
import numpy as npy
import pandas as pd
import nibabel as nib
from p_tqdm import p_map
from hashlib import sha3_256

In [328]:
# parameters
project = 'TUE1014TSHOD'
in_dir = Path(f'/media/nora/import_move/')/project
out_dir = Path(f'/media/dataheppt1/raheppt1/dcm_out')/project
out_dir.mkdir(exist_ok=True)
ano_dir = out_dir/'anonymization'/project
ano_dir.mkdir(exist_ok=True, parents=True)
nora_dir = Path('/media/nora/imgdata')/project
num_cpus = 7

### DICOM to nifti

#### Hashing

In [329]:
def name_to_hash(patient, study):
    hash_name_long = sha3_256(patient.encode("utf-8")).hexdigest()
    hash_seq_long = sha3_256(study.encode("utf-8")).hexdigest()
    hash_name = hash_name_long[:7]
    seq_name = hash_seq_long[:3]
    complete_hash = hash_name + seq_name
    return complete_hash

#### PET SUV conversion

In [330]:
def conv_time(time_str):
    return (float(time_str[:2]) * 3600 + float(time_str[2:4]) * 60 + float(time_str[4:13]))

def calculate_suv_factor(dcm_path):
    ds = pydicom.dcmread(str(dcm_path))
    total_dose = ds.RadiopharmaceuticalInformationSequence[0].RadionuclideTotalDose
    start_time = ds.RadiopharmaceuticalInformationSequence[0].RadiopharmaceuticalStartTime
    half_life = ds.RadiopharmaceuticalInformationSequence[0].RadionuclideHalfLife
    acq_time = ds.AcquisitionTime
    weight = ds.PatientWeight
    time_diff = conv_time(acq_time) - conv_time(start_time)
    act_dose = total_dose * 0.5 ** (time_diff / half_life)
    suv_factor = 1000 * weight / act_dose
    return suv_factor

def convert_pet(pet, suv_factor=1.0):
    affine = pet.affine
    pet_data = pet.get_fdata()
    pet_suv_data = (pet_data*suv_factor).astype(np.float32)
    pet_suv = nib.Nifti1Image(pet_suv_data, affine)
    return pet_suv 

#### Dicom Preprocessing

In [335]:
def process_directory(dcm_dir):
    info = {}

    # parsing
    pet_dcm_header = next(dcm_dir.glob('**/P*.dcm'))
    ct_dcm_header = next(dcm_dir.glob('**/C*.dcm'))
    ds = pydicom.dcmread(str(pet_dcm_header))

    # create directories
    subject_hash = name_to_hash(ds.PatientName, ds.StudyID)
    subject_dir = out_dir/subject_hash
    subject_dir.mkdir(exist_ok=True)
    shutil.copy(pet_dcm_header, ano_dir/f'{subject_hash}_pet.dcm')
    shutil.copy(ct_dcm_header, ano_dir/f'{subject_hash}_ct.dcm')
    temp = tempfile.TemporaryDirectory()
    temp_path = Path(temp.name)

    # get subject info
    suv_factor = calculate_suv_factor(pet_dcm_header)
    info['Project'] = [project]
    info['Hash'] = [subject_hash]
    info['Name'] = [str(ds.PatientName)]
    info['PatientID'] = [str(ds.PatientID)]
    info['StudyID']= [str(ds.StudyID)]
    info['BirthDate'] = [str(ds.PatientBirthDate)]
    info['StudyDate'] = [str(ds.StudyDate)]
    info['AccessionNumber'] = [str(ds.AccessionNumber)]
    info['Age'] = [int(ds.PatientAge[:-1])]
    info['SUVfactor'] = [suv_factor]

    # convert dicom to nifti
    dicom2nifti.convert_directory(dcm_dir, temp_path, compression=True, reorient=True)

    # store niftis
    ct_path = next(temp_path.glob('*gk*.nii.gz'))
    pet_path = next(temp_path.glob('*pet*.nii.gz'))
    pet = nib.load(pet_path)
    ct = nib.load(ct_path)

    pet_suv = convert_pet(pet, suv_factor)
    nib.save(pet, subject_dir/f'{subject_hash}_petcorr.nii.gz')
    nib.save(pet_suv, subject_dir/f'{subject_hash}_petsuv.nii.gz')
    nib.save(ct, subject_dir/f'{subject_hash}_ct.nii.gz')

    if nora_dir:
        name = info['Name'][0].split('^')
        nora_subject_dir = nora_dir/f'{name[0]}_{name[1]}_{info["PatientID"][0]}/{info["StudyID"][0]}_{info["StudyDate"][0]}'
        nora_subject_dir.mkdir(exist_ok=True, parents=True)
        nib.save(pet_suv, nora_subject_dir/f'{subject_hash}_petsuv.nii.gz')
        nib.save(ct, nora_subject_dir/f'{subject_hash}_ct.nii.gz')
        subprocess.run(['nora', '-p', project, '--add', str(nora_subject_dir)])

    temp.cleanup()
    return info

In [336]:
# multiprocessing
res = p_map(process_directory, list(in_dir.glob('*')), num_cpus=num_cpus)

HBox(children=(FloatProgress(value=0.0, max=97.0), HTML(value='')))




In [337]:
res

[{'Project': ['TUE1014TSHOD'],
  'Hash': ['467e9b4ef9'],
  'Name': ['ATASEVEN^ZIYA'],
  'PatientID': ['0005494023'],
  'StudyID': ['0042855736'],
  'BirthDate': ['19680615'],
  'StudyDate': ['20141023'],
  'AccessionNumber': ['0042855736'],
  'Age': [46],
  'SUVfactor': [0.0004068024105894509]},
 {'Project': ['TUE1014TSHOD'],
  'Hash': ['8b15c3176f'],
  'Name': ['BAIER^CHRISTA'],
  'PatientID': ['0005538427'],
  'StudyID': ['0048501112'],
  'BirthDate': ['19410307'],
  'StudyDate': ['20160616'],
  'AccessionNumber': ['0048501112'],
  'Age': [75],
  'SUVfactor': [0.00036641044752852033]},
 {'Project': ['TUE1014TSHOD'],
  'Hash': ['c80a3ab799'],
  'Name': ['BÖHM^ELENA'],
  'PatientID': ['0005165791'],
  'StudyID': ['0053504524'],
  'BirthDate': ['19720827'],
  'StudyDate': ['20171122'],
  'AccessionNumber': ['0053504524'],
  'Age': [45],
  'SUVfactor': [0.0003349213373034523]},
 {'Project': ['TUE1014TSHOD'],
  'Hash': ['e9aa6fda8c'],
  'Name': ['BOUGIOUKLIS^ATHANASIOS'],
  'PatientID': [

In [338]:
df = pd.concat([pd.DataFrame.from_dict(x) for x in res])
df.to_csv(ano_dir.parent/f'{project}.csv')
df

Unnamed: 0,Project,Hash,Name,PatientID,StudyID,BirthDate,StudyDate,AccessionNumber,Age,SUVfactor
0,TUE1014TSHOD,467e9b4ef9,ATASEVEN^ZIYA,0005494023,0042855736,19680615,20141023,0042855736,46,0.000407
0,TUE1014TSHOD,8b15c3176f,BAIER^CHRISTA,0005538427,0048501112,19410307,20160616,0048501112,75,0.000366
0,TUE1014TSHOD,c80a3ab799,BÖHM^ELENA,0005165791,0053504524,19720827,20171122,0053504524,45,0.000335
0,TUE1014TSHOD,e9aa6fda8c,BOUGIOUKLIS^ATHANASIOS,0005794587,0055112567,19920416,20180514,0055112567,26,0.000369
0,TUE1014TSHOD,a04d3f6013,BRIOL^JUTTA,0005017119,0060432302,19620421,20191125,0060432302,57,0.000259
...,...,...,...,...,...,...,...,...,...,...
0,TUE1014TSHOD,a0d2a92f92,YARANERI^HÜSEYIN,0005606545,0047149032,19550820,20160122,0047149032,60,0.000354
0,TUE1014TSHOD,58ad4c69c6,YASAR^NECAT,0005910026,0058884175,19640804,20190524,0058884175,54,0.000379
0,TUE1014TSHOD,a92e725753,ZIEGER^PATRICK,0005786772,0054196541,19730412,20180119,0054196541,44,0.000368
0,TUE1014TSHOD,e7dafecb95,ZIMMERMANN^JÖRG,0005945161,0060172210,19410822,20191017,0060172210,78,0.000370
