In [1]:
import SimpleITK as sitk
import os, glob
from os.path import dirname, join
from pprint import pprint

import pydicom
from pydicom.data import get_testdata_files
from pydicom.filereader import read_dicomdir

In [2]:
def add_dot(s):
    ls = s.split(" ")
    ls[0] = ls[0] + "."
    ls = " ".join(ls)
    return ls

In [31]:
kw = {
    't1': {
            'planes': ['ax', 'tra'],
            'neg': ['sag', 'cor', 'mprage'],
            'ops': ['pre', 'post', 'pg'],
            'contrast': ['gad', 'gd']
        },
    't2': {
        'planes': ['ax', 'tra'],
        'neg': ['sag', 'cor', 'mprage'],
        'ops': ['blade', 'flair'],
        'contrast': ['dark fluid', 'dark', 'fluid', 'fs', 'dark-fluid']
    }
}

In [4]:
def create_perms(ls, addDot=False):
    
    if addDot:
        ls = [x.lower() for x in ls] + \
                  [x.upper() for x in ls] + \
                  [x.title() for x in ls] + \
                  [add_dot(x).lower() for x in ls] + \
                  [add_dot(x).upper() for x in ls] + \
                  [add_dot(x).title() for x in ls]
    else:
        ls = [x.lower() for x in ls] + \
                  [x.upper() for x in ls] + \
                  [x.title() for x in ls]
        
    return ls

In [5]:
create_perms(['t1'])

['t1', 'T1', 'T1']

In [38]:
def match_rule_t1(ser_desc, kw, reader=None, debug=False):
    
    seq = 't1'
    main_seq_key_name = ""
    # get plane permutations
    plane_perm = create_perms(kw[seq]['planes'], True)
    
    neg = create_perms(kw[seq]['neg'], True)
    
    # first find if there are any negative keywords present in this string
    for ne_kw in neg:
        if ne_kw in ser_desc:
            if debug:
                print('Found negative keyword {} in {}'.format(ne_kw, ser_desc))
            return {-1: 'invalid keyword'}
    
    # first find the plane
    f_plane = False
    for pl_kw in plane_perm:
        if f_plane == False and pl_kw in ser_desc:
            if debug:
                print('Found plane {} in {}'.format(pl_kw, ser_desc))
            f_plane = True
            break

    # get sequence permutations
    seq_perm = create_perms([seq])
    
    # find sequence
    f_seq = False
    for se_kw in seq_perm:
        if f_seq == False and se_kw in ser_desc:
            if debug:
                print('Found seq {} in {}'.format(se_kw, ser_desc))
            f_seq = True
            main_seq_key_name += "{}".format(se_kw.upper())
            break

    if not f_seq:
        return {-1: 'Cannot find sequence name, cannot assume sequence safely'}
    
    # find ops (pre, post, pg)
    
    ops_perm = create_perms(kw[seq]['ops'])
    
    f_op = False
    for op_kw in ops_perm:
        if f_op == False and op_kw in ser_desc:
            if debug:
                print('Found op {} in {}'.format(op_kw, ser_desc))
            f_op = op_kw.lower()
            if f_op == 'post' or f_op == 'pg' or f_op == False:
                # there's no pre or post, but there is GAD, assume its CE
                if reader is not None:
                    assert reader.GetMetaData(0, "0018|0010") != "", "Something is wrong, no contrast agent defined in DICOM!"
                main_seq_key_name += "CE"
            break
            
    # find contrast
    
    co_perm = create_perms(kw[seq]['contrast'])
    
    f_co = False
    for co_kw in co_perm:
        if co_kw in ser_desc:
            if debug:
                print('Found contrast {} in {}'.format(co_kw, ser_desc))
            if 'CE' not in main_seq_key_name and f_op != 'pre':
                main_seq_key_name += "CE"
            f_co = True
            break
            
    return main_seq_key_name

In [39]:
def match_rule_t2(ser_desc, kw, reader=None, debug=False):
    
    seq = 't2'
    main_seq_key_name = ""
    # get plane permutations
    plane_perm = create_perms(kw[seq]['planes'], True)
    
    neg = create_perms(kw[seq]['neg'], True)
    
    # first find if there are any negative keywords present in this string
    for ne_kw in neg:
        if ne_kw in ser_desc:
            if debug:
                print('Found negative keyword {} in {}'.format(ne_kw, ser_desc))
            return {-1: 'invalid keyword'}
    
    # first find the plane
    f_plane = False
    for pl_kw in plane_perm:
        if f_plane == False and pl_kw in ser_desc:
            if debug:
                print('Found plane {} in {}'.format(pl_kw, ser_desc))
            f_plane = True
            break

    # get sequence permutations
    seq_perm = create_perms([seq])
    
    # find sequence
    f_seq = False
    for se_kw in seq_perm:
        if f_seq == False and se_kw in ser_desc:
            if debug:
                print('Found seq {} in {}'.format(se_kw, ser_desc))
            f_seq = True
            main_seq_key_name += "{}".format(se_kw.upper())
            break

    if not f_seq:
        return {-1: 'Cannot find sequence name, cannot assume sequence safely'}
    
    # find ops (blade, flair)
    
    ops_perm = create_perms(kw[seq]['ops'])
    
    f_op = False
    for op_kw in ops_perm:
        if f_op == False and op_kw in ser_desc:
            if debug:
                print('Found op {} in {}'.format(op_kw, ser_desc))
            f_op = op_kw.lower()
            if f_op == 'flair':
                # there's no pre or post, but there is GAD, assume its CE
                main_seq_key_name += "FLAIR"
            break
            
    # find contrast
    
    co_perm = create_perms(kw[seq]['contrast'])
    
    f_co = False
    for co_kw in co_perm:
        if co_kw in ser_desc and not f_co:
            if debug:
                print('Found contrast {} in {}'.format(co_kw, ser_desc))
            if 'FLAIR' not in main_seq_key_name:
                main_seq_key_name += "FLAIR"
            f_co = True
            break
            
    return main_seq_key_name

In [27]:
test_str = ["Ax. T1 SE Pre gad",
            "Ax. T1 SE Post gad",
           "Ax T1 Pre",
           "Ax T1",
           'ax t1',
            'AX. T1 SE PG',
           'Ax. T1 GAD', 
           "SAG. T1 TURBO DARK FLUID",
           "Cor T1",
            "Ax pre gad",
            "ax T1 MPRAGE PG",
            "Ax T2",
            "Ax T2 DARK FS"
           ]
neg_test_str = []

In [28]:
for tstr in test_str:
    print(tstr, " - ", match_rule_t1(tstr, kw, debug=False))
    print('')

Ax. T1 SE Pre gad  -  T1FLAIR

Ax. T1 SE Post gad  -  T1FLAIR

Ax T1 Pre  -  T1

Ax T1  -  T1

ax t1  -  T1

AX. T1 SE PG  -  T1

Ax. T1 GAD  -  T1FLAIR

SAG. T1 TURBO DARK FLUID  -  {-1: 'invalid keyword'}

Cor T1  -  {-1: 'invalid keyword'}

Ax pre gad  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

ax T1 MPRAGE PG  -  {-1: 'invalid keyword'}

Ax T2  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

Ax T2 DARK FS  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}



In [35]:
for tstr in test_str:
    print(tstr, " - ", match_rule_t2(tstr, kw, debug=True))
    print('')

Found plane Ax in Ax. T1 SE Pre gad
Ax. T1 SE Pre gad  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

Found plane Ax in Ax. T1 SE Post gad
Ax. T1 SE Post gad  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

Found plane Ax in Ax T1 Pre
Ax T1 Pre  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

Found plane Ax in Ax T1
Ax T1  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

Found plane ax in ax t1
ax t1  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

Found plane AX in AX. T1 SE PG
AX. T1 SE PG  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

Found plane Ax in Ax. T1 GAD
Ax. T1 GAD  -  {-1: 'Cannot find sequence name, cannot assume sequence safely'}

Found negative keyword SAG in SAG. T1 TURBO DARK FLUID
SAG. T1 TURBO DARK FLUID  -  {-1: 'invalid keyword'}

Found negative keyword Cor in Cor T1
Cor T1  -  {-1: 'invalid keyword'}

Found plane Ax in Ax pre gad
Ax pre 

In [36]:
match_rule_t2("AX. T1 SE POST GAD", kw, debug=True)

Found plane AX in AX. T1 SE POST GAD


{-1: 'Cannot find sequence name, cannot assume sequence safely'}

In [None]:
path = "/local-scratch2/VGH_Data/IMediaExport/DICOM"
tags = {"Patient Name": "0010|0010",
        "Patient ID": "0010|0020",
        "Series Description": "0008|103e",
        "Modality": "0008|0060",
        "Contrast Agent": "0018|0010"
       }
pat

In [75]:
path = "/local-scratch2/VGH_Data/Export_Anmol_May_21_10/IMediaExport/DICOM"
tags = {"Patient Name": "0010|0010",
        "Patient ID": "0010|0020",
        "Series Description": "0008|103e",
        "Modality": "0008|0060",
        "Contrast Agent": "0018|0010"
       }
pat_data = {}
patients = glob.glob(os.path.join(path, "*"))
for pat in patients:
    for curr_std in glob.glob(os.path.join(pat, "*")):
        for curr_series in glob.glob(os.path.join(curr_std, "*")):
            curr_series_obj = glob.glob(os.path.join(curr_series, "*"))[0]
            # print( "Reading Dicom directory:", curr_series_obj )
            reader = sitk.ImageSeriesReader()

            dicom_names = reader.GetGDCMSeriesFileNames( curr_series_obj )
            if dicom_names == ():
#                 print("Empty List {}".format(curr_series_obj))
                continue
            reader.SetFileNames(dicom_names)
            reader.MetaDataDictionaryArrayUpdateOn()
            reader.LoadPrivateTagsOn()
            try:
                image = reader.Execute()
            except RuntimeError as e:
                print(e)
                continue
            no_flag = False
            # print only if I find Ax T1 in the study
            ser_desc = reader.GetMetaData(0, "0008|103e")
            pat_id = reader.GetMetaData(0, tags['Patient ID'])
            if not (pat_id in pat_data.keys()):
                pat_data[pat_id] = {
                    'T1': None,
                    'T2': None,
                    'T1CE': None,
                    'T2FLAIR': None
                }
            
            match_str = match_rule_t1(ser_desc, kw, reader)
            if isinstance(match_str, dict):
                
                match_str = match_rule_t2(ser_desc, kw)
            
                if isinstance(match_str, dict):
                    print('Patient {} Cannot match = {}'.format(reader.GetMetaData(0, tags['Patient Name']),
                                                                               ser_desc))
                    continue
                else:
                    print("Matched Patient {}, SeriesDescription {} to {}".format(reader.GetMetaData(0, tags['Patient Name']),
                                                                                                 ser_desc, match_str))
            else:
                
                print("="*50)
                print("Matched Patient {}, SeriesDescription {} to {}".format(reader.GetMetaData(0, tags['Patient Name']),
                                                                                                 ser_desc, match_str))
                
            for ct, t in tags.items():
                try:
                    print(ct, ": ", reader.GetMetaData(0, t))
                except RuntimeError:
                    print('Key {} not present'.format(t))
                
            
            image = sitk.Resample(image, [240, 240, 155], 
                                  sitk.Transform(), 
                                  sitk.sitkBSpline, 
                                  image.GetOrigin(), 
                                  [1,1,1], 
                                  image.GetDirection(), 
                                  0.0, 
                                  image.GetPixelID())
            size = image.GetSize()
            pat_data[pat_id][match_str] = image
            
            # sitk.Show(image)
            # input()
            print( "Image size:", size[0], size[1], size[2] )
            print("-"*50)
            print("")

Patient KALIM^MOHAMMED^^^  Cannot match = localizer 
Patient KALIM^MOHAMMED^^^  Cannot match = SAG. T1 TURBO DARK FLUID
Patient KALIM^MOHAMMED^^^  Cannot match = AX. DWI_TRACEW
Patient KALIM^MOHAMMED^^^  Cannot match = AX. DWI_ADC 
Patient KALIM^MOHAMMED^^^  Cannot match = mIP_Images(SW)
Patient KALIM^MOHAMMED^^^  Cannot match = Mag_Images
Patient KALIM^MOHAMMED^^^  Cannot match = Pha_Images
Patient KALIM^MOHAMMED^^^  Cannot match = SWI_Images
Matched Patient KALIM^MOHAMMED^^^ , SeriesDescription TRA T2 BLADE to T2
Patient ID :  04191134
Series Description :  TRA T2 BLADE
Key 0018|0010 not present
Modality :  MR
Patient Name :  KALIM^MOHAMMED^^^ 
Image size: 240 240 155
--------------------------------------------------

Matched Patient KALIM^MOHAMMED^^^ , SeriesDescription Ax. T1 SE Pre gad  to T1
Patient ID :  04191134
Series Description :  Ax. T1 SE Pre gad 
Key 0018|0010 not present
Modality :  MR
Patient Name :  KALIM^MOHAMMED^^^ 
Image size: 240 240 155
--------------------------

Patient JAFAR^Mohammad^S^^ Cannot match = AX. DTI 20 DIRECTIONS_ADC 
Patient JAFAR^Mohammad^S^^ Cannot match = AX. DTI 20 DIRECTIONS_TRACEW
Patient JAFAR^Mohammad^S^^ Cannot match = AX. DTI 20 DIRECTIONS_FA
Patient JAFAR^Mohammad^S^^ Cannot match = AX. DTI 20 DIRECTIONS_ColFA 
Patient JAFAR^Mohammad^S^^ Cannot match = SAG. T1 TURBO DARK FLUID_FIL_1
Patient JAFAR^Mohammad^S^^ Cannot match = B1000 
Patient HWANG^Leona^S^^  Cannot match = LOC 
Patient HWANG^Leona^S^^  Cannot match = SAG T1 FLAIR
Patient HWANG^Leona^S^^  Cannot match = AX DWI_TRACEW 
Patient HWANG^Leona^S^^  Cannot match = AX DWI_ADC
Patient HWANG^Leona^S^^  Cannot match = mIP_Images(SW)
Patient HWANG^Leona^S^^  Cannot match = Mag_Images
Patient HWANG^Leona^S^^  Cannot match = Pha_Images
Patient HWANG^Leona^S^^  Cannot match = SWI_Images
Matched Patient HWANG^Leona^S^^ , SeriesDescription AX T1 PRE  to T1
Patient ID :  03206504
Series Description :  AX T1 PRE 
Key 0018|0010 not present
Modality :  MR
Patient Name :  HWANG^

Image size: 240 240 155
--------------------------------------------------

Patient HASHIMOTO^Masayuki^^^  Cannot match = mIP_Images(SW)
Patient HASHIMOTO^Masayuki^^^  Cannot match = Mag_Images
Patient HASHIMOTO^Masayuki^^^  Cannot match = Pha_Images
Patient HASHIMOTO^Masayuki^^^  Cannot match = SWI_Images
Patient HASHIMOTO^Masayuki^^^  Cannot match = AX. DTI 30 DIRECTIONS 
Patient HASHIMOTO^Masayuki^^^  Cannot match = AX. DTI 30 DIRECTIONS_ADC 
Patient HASHIMOTO^Masayuki^^^  Cannot match = AX. DTI 30 DIRECTIONS_TRACEW
Patient HASHIMOTO^Masayuki^^^  Cannot match = AX. DTI 30 DIRECTIONS_FA
Patient HASHIMOTO^Masayuki^^^  Cannot match = AX. DTI 30 DIRECTIONS_ColFA 
Matched Patient HASHIMOTO^Masayuki^^^ , SeriesDescription AX T1 SE pre gad to T1
Patient ID :  03614103
Series Description :  AX T1 SE pre gad
Key 0018|0010 not present
Modality :  MR
Patient Name :  HASHIMOTO^Masayuki^^^ 
Image size: 240 240 155
--------------------------------------------------

Patient HASHIMOTO^Masayuki^^^ 

In [76]:
for k in pat_data.keys():
    for s in pat_data[k].keys():
        try:
            print(k, ":", "{}: ".format(s), pat_data[k][s].GetSize())
        except:
            continue

04269360 : T2:  (240, 240, 155)
04269360 : T1:  (240, 240, 155)
04269360 : T1CE:  (240, 240, 155)
04269360 : T2FLAIR:  (240, 240, 155)
04191134 : T2:  (240, 240, 155)
04191134 : T1:  (240, 240, 155)
04191134 : T1CE:  (240, 240, 155)
04191134 : T2FLAIR:  (240, 240, 155)
03239421 : T2:  (240, 240, 155)
03239421 : T1:  (240, 240, 155)
03239421 : T1CE:  (240, 240, 155)
03239421 : T2FLAIR:  (240, 240, 155)
04065424 : T2:  (240, 240, 155)
04065424 : T1:  (240, 240, 155)
04065424 : T1CE:  (240, 240, 155)
04065424 : T2FLAIR:  (240, 240, 155)
9094519153 : T2:  (240, 240, 155)
9094519153 : T1:  (240, 240, 155)
9094519153 : T1CE:  (240, 240, 155)
03614103 : T2:  (240, 240, 155)
03614103 : T1:  (240, 240, 155)
03614103 : T1CE:  (240, 240, 155)
03614103 : T2FLAIR:  (240, 240, 155)
04264171 : T2:  (240, 240, 155)
04264171 : T1:  (240, 240, 155)
04264171 : T1CE:  (240, 240, 155)
04264171 : T2FLAIR:  (240, 240, 155)
04141753 : T2:  (240, 240, 155)
04141753 : T1:  (240, 240, 155)
04141753 : T1CE:  (240

In [78]:
save_path = os.path.join(os.path.sep.join(path.split(os.path.sep)[0:-1]), "Cleaned_Data")

In [83]:
for k in pat_data.keys():
    pat_path = os.path.join(save_path, k)    
    for s in pat_data[k].keys():
        if pat_data[k][s] is not None:
            seq_path = os.path.join(pat_path, s)
            seq_name = os.path.join(seq_path, "Image_{}.nii.gz".format(s))
            if not os.path.exists(seq_path):
                os.makedirs(seq_path)

            sitk.WriteImage(pat_data[k][s], seq_name)