In [1]:
import os 
import glob
from pathlib import Path
import pandas as pd 
import numpy as np 
import json 

This code is using an older version of pydicom, which is no longer 
maintained as of Jan 2017.  You can access the new pydicom features and API 
by installing `pydicom` from PyPI.
See 'Transitioning to pydicom 1.x' section at pydicom.readthedocs.org 
for more information.



In this notebook, I want to find DTI/DWI sequences or processed ADC for the use in my analysis distinguishing the 3 WHO subtypes. 

In [2]:
PATH = Path('/working/lupolab/julia/tcia_analysis/datasets/')

## Finding diffusion in GBM 

First I'll create a dictionary of cohorts, patients + their sequences: 

In [3]:
GBM_exam_dict = {}

In [4]:
for i in os.listdir(PATH/'TCGA-GBM-nifti'): 
    GBM_exam_dict[i]= os.listdir(PATH/'TCGA-GBM-nifti'/i)

In [5]:
GBM_exam_dict

{'TCGA-06-0210': ['08-17-1995-19584'],
 'TCGA-08-0350': ['12-15-1998-MSTEALTH-91569'],
 'TCGA-14-0865': ['06-04-1995-MRI_BRAIN_W_WOUT_CONTRAST-31488',
  '09-29-1995-CT_HEAD_WO_CON-80840',
  '11-23-1995-MRI_BRAIN_W_WOUT_CONTRAST-06737',
  '09-23-1995-MRI_BRAIN_W_WOUT_CONTRAST-18668',
  '08-22-1995-CT_HEAD_WOUT_CO-80769',
  '08-20-1995-MRI_BRAIN_WITH_CONTRAST-48768',
  '03-20-1995-MRI_BRAIN_W_WOUT_CONTRAST-54713',
  '07-28-1995-MRI_BRAIN_W_WOUT_CONTRAST-19998',
  '02-01-1996-MRI_BRAIN_W_WOUT_CONTRAST-11648',
  '08-21-1995-MRI_BRAIN_W_WOUT_CONTRAST-38462',
  '10-02-1995-CT_HEAD_WITHOUT_CONTRA-91249',
  '08-21-1995-CT_HEAD_WITHOUT-34241',
  '08-26-1995-CT_HEAD_WAND_WOUT_CO-11371'],
 'TCGA-14-0812': ['03-17-1996-MRI_BRAIN_WITH_CONTRAST-97835',
  '03-19-1996-MRI_BRAIN_W_WOUT_CONTRAST-64984'],
 'TCGA-12-1099': ['04-03-2001-MRI_BRAIN_WENHANCEMENT-17608'],
 'TCGA-12-1093': ['09-20-1999-MRI_BRAIN_WWO_C-34026',
  '09-23-1999-BRAIN-80462',
  '09-26-1999-MRI_BRAIN_WENHANCEMENT-16868'],
 'TCGA-02-00

Now I'm looking at each exam and listing all the series in each so that I can note all those that have keywords adc, dti or dwi 

In [6]:
GBM_exam_series_dict = {}

In [7]:
for key, value in GBM_exam_dict.items(): 
    for exam in value: 
        new_key = key+'/'+exam
        GBM_exam_series_dict[new_key] = os.listdir(PATH/'TCGA-GBM-nifti'/key/exam)

In [8]:
GBM_exam_series_dict

{'TCGA-06-0210/08-17-1995-19584': ['1-SAG_LOCAL-29333.nii.gz',
  '3-AXIAL_T1-88865.nii.gz',
  '2-AXIAL_T2-83081.nii.gz',
  '4-AXIAL_T1_POST_GD-64939.nii.gz'],
 'TCGA-08-0350/12-15-1998-MSTEALTH-91569': ['15-3DFSE_AXIAL-04462.nii.gz',
  '1-TI_SAG_MR_BRAIN_WHOLE_HEAD-52088.nii.gz',
  '5-3DFSE_AXIAL-96333.nii.gz',
  '3-FMPSPGR_SAG-00857.nii.gz',
  '6-FLAIR_AXIAL-64078.nii.gz',
  '4-3DSPGR_AXIAL-74692.nii.gz',
  '14-3DSPGR_AXIAL-02601.nii.gz',
  '2-FSE_AX_MR_BRAIN-11136.nii.gz'],
 'TCGA-14-0865/06-04-1995-MRI_BRAIN_W_WOUT_CONTRAST-31488': ['401-BRAIN_WWO_T2_AX_GRASE-74294a_n.nii.gz',
  '101-BRAIN_WWO_mst_scout-71529.nii.gz',
  '701-BRAIN_WWO_T1_COR_POST-08207.nii.gz',
  '301-BRAIN_WWO_T1_AX_PRE-13193.nii.gz',
  '601-BRAIN_WWO_T1_AX_POST-95894.nii.gz',
  '401-BRAIN_WWO_T2_AX_GRASE-74294a_n_skull.nii.gz',
  '301-BRAIN_WWO_T1_AX_PRE-13193a_n_skull.nii.gz',
  '701-BRAIN_WWO_T1_COR_POST-08207a_n_mask.nii.gz',
  '201-BRAIN_WWO_T1_SAG_23SL-32502.nii.gz',
  '501-BRAIN_WWO_FLAIR_AX-67274a_n.nii.gz'

Now I'm going to iterate over the prior dictionary, check for DTI/DWI/ADC keywords in each exam, and return a dictionary of the exam + its corresponding DTI series name: 

In [12]:
GBM_has_DTI_dict = {}

In [13]:
for key, value in GBM_exam_series_dict.items(): 
    dti_search = [x for x in value if 'dti' in x.lower() or 'apparent' in x.lower() or 'dwi' in x.lower() or 'adc' in x.lower()]
    if len(dti_search) >= 1: 
        GBM_has_DTI_dict[key] = dti_search

In [14]:
GBM_has_DTI_dict

{'TCGA-14-0865/02-01-1996-MRI_BRAIN_W_WOUT_CONTRAST-11648': ['302-brain_with_SS_DWI-89989.nii.gz',
  '301-brain_with_SS_DWI-45511.nii.gz'],
 'TCGA-12-1093/09-20-1999-MRI_BRAIN_WWO_C-34026': ['3-AX_DWI-58185.nii.gz'],
 'TCGA-02-0070/07-10-2000-MRI_BRAIN_WWO_CONTRAST-48290': ['2-AX_DWI-74272.nii.gz',
  '800-DTI_N27-26260.nii.gz',
  '802-DTI_N27-19564.nii.gz',
  '8-DTI_N27-04978.nii.gz',
  '801-DTI_N27-40119.nii.gz'],
 'TCGA-06-0119/12-26-2003-90543': ['369-Apparent_Diffusion_Coefficien-89403.nii.gz'],
 'TCGA-06-0164/02-18-2003-93162': ['701-DTI_ASSET-03800.nii.gz',
  '702-DTI_ASSET-61521.nii.gz',
  '700-DTI_ASSET-46080.nii.gz'],
 'TCGA-06-0121/03-25-2003-38993': ['7-DTI_ASSET-00930.nii.gz'],
 'TCGA-06-0121/03-25-2003-95804': ['701-DTI_ASSET-07155.nii.gz',
  '702-DTI_ASSET-69796.nii.gz',
  '700-DTI_ASSET-17883.nii.gz'],
 'TCGA-14-3477/12-07-2002-MRI_Brain_w_wo_Contrast-16253': ['601-Apparent_Diffusion_Coefficient_mms-85380.nii.gz',
  '6-AX_DWI-68615.nii.gz'],
 'TCGA-14-3477/08-15-2002-MRI

In [15]:
len(GBM_has_DTI_dict.keys())

210

## Finding diffusion in LGG 


In [25]:
LGG_exam_dict = {}

In [26]:
for i in os.listdir(PATH/'TCGA-LGG-nifti'): 
    LGG_exam_dict[i]= os.listdir(PATH/'TCGA-LGG-nifti'/i)

In [27]:
LGG_exam_dict

{'TCGA-CS-5396': ['03-02-2001-MRI_BRAIN_CONTRAST_MRA_BRAIN-47605'],
 'TCGA-CS-6669': ['01-02-2002-MRI_BRAIN_COMBO-45633'],
 'TCGA-HT-7480': ['08-04-1999-MRI_BRAIN_FOR_STEREOTACTIC_WWO_CONTR-78527'],
 'TCGA-DU-5853': ['08-23-1995-MRI_BRAIN_WWO_CONTRAST-78436'],
 'TCGA-CS-6186': ['06-01-2000-MRI_BRAIN_per_R-97608',
  '06-01-2000-CHEST_AP_IP-85405'],
 'TCGA-FG-A6J1': ['04-23-2004-NR_MRI_BRAIN_WWO-08141'],
 'TCGA-HT-7684': ['08-16-1995-MRI_BRAIN_WWO_CONTRAST-29908'],
 'TCGA-HT-8010': ['07-28-1998-MRI_BRAIN_FOR_STEREOTACTIC_WWO_CONTR-05777'],
 'TCGA-DU-6404': ['06-29-1985-TUMOR_VOLUME-62392'],
 'TCGA-DU-5872': ['12-03-1995-MRI_BRAIN_WWO_CONTRAST-92374',
  '02-23-1995-MRI_BRAIN_WWO_CONTRAST-88714'],
 'TCGA-HT-A4DV': ['03-20-1999-MRI_BRAIN_FOR_STEREOTACTIC_WWO_CONTR-63394'],
 'TCGA-CS-6670': ['01-02-2002-MRI_BRAIN_COMBO_MRA-97978'],
 'TCGA-DU-7015': ['06-18-1989-MRI_BRAIN_WWO_CONTRAS-16160'],
 'TCGA-HT-7477': ['06-20-1998-MRI_BRAIN_FOR_STEREOTACTIC_WO_CONTRAST-55454'],
 'TCGA-FG-A4MT': ['02-1

Now I'm looking at each exam and listing all the series in each so that I can note all those that have keywords adc, dti or dwi 

In [28]:
LGG_exam_series_dict = {}

In [29]:
for key, value in LGG_exam_dict.items(): 
    for exam in value: 
        new_key = key+'/'+exam
        LGG_exam_series_dict[new_key] = os.listdir(PATH/'TCGA-LGG-nifti'/key/exam)

In [30]:
LGG_exam_series_dict

{'TCGA-CS-5396/03-02-2001-MRI_BRAIN_CONTRAST_MRA_BRAIN-47605': ['305-RT__COW-39894.nii.gz',
  '1101-T1_AX_SE-54608.nii.gz',
  '404-eB1000i-30080.nii.gz',
  '301-3D_MOTSA_sense-80049.nii.gz',
  '403-eBo-48146.nii.gz',
  '402-dADC_MAP-53538.nii.gz',
  '304-LT_COW-25699.nii.gz',
  '501-T1W__SE-58849.nii.gz',
  '701-T2WTSE-70754.nii.gz',
  '901-FFE_FOR_BLEED-67362.nii.gz',
  '306-POSTERIOR-47371.nii.gz',
  '601-T2_AX_FLAIR-98890.nii.gz',
  '1201-T1_COR_SE-91770.nii.gz',
  '302-3D_MOTSA_sense-85827.nii.gz',
  '1301-T1_SAG_SE-45175.nii.gz',
  '303-COW-38870.nii.gz',
  '1001-T1_AX__SE-75237.nii.gz',
  '801-PDWTSE-64364.nii.gz'],
 'TCGA-CS-6669/01-02-2002-MRI_BRAIN_COMBO-45633': ['401-T1W__SE-32144.nii.gz',
  '801-T1_AX__SE-67850.nii.gz',
  '301-DIFFUSION-92165.nii.gz',
  '601-FFE_FOR_BLEED-76102.nii.gz',
  '303-eBo-29641.nii.gz',
  '1101-T1_SAG_SE-52125.nii.gz',
  '501-T2_AX_FLAIR-40193.nii.gz',
  '304-eB1000i-11058.nii.gz',
  '901-T1_AX_SE-98053.nii.gz',
  '101-Survey-50946.nii.gz',
  '701-T

Now I'm going to iterate over the prior dictionary, check for DTI/DWI/ADC keywords in each exam, and return a dictionary of the exam + its corresponding DTI series name: 

In [31]:
LGG_has_DTI_dict = {}

In [32]:
for key, value in LGG_exam_series_dict.items(): 
    dti_search = [x for x in value if 'dti' in x.lower() or 'apparent' in x.lower() or 'dwi' in x.lower() or 'adc' in x.lower()]
    if len(dti_search) >= 1: 
        LGG_has_DTI_dict[key] = dti_search

In [33]:
LGG_has_DTI_dict

{'TCGA-CS-5396/03-02-2001-MRI_BRAIN_CONTRAST_MRA_BRAIN-47605': ['402-dADC_MAP-53538.nii.gz'],
 'TCGA-CS-6669/01-02-2002-MRI_BRAIN_COMBO-45633': ['302-dADC_MAP-67395.nii.gz'],
 'TCGA-DU-5853/08-23-1995-MRI_BRAIN_WWO_CONTRAST-78436': ['400-DTI_2.6mm_WH-19253.nii.gz',
  '4-DTI_2.6mm_WH-81596.nii.gz',
  '402-DTI_2.6mm_WH-69647.nii.gz',
  '401-DTI_2.6mm_WH-97906.nii.gz'],
 'TCGA-CS-6186/06-01-2000-MRI_BRAIN_per_R-97608': ['205-dadc-52244.nii.gz'],
 'TCGA-FG-A6J1/04-23-2004-NR_MRI_BRAIN_WWO-08141': ['12-ep2ddiffmddw20p2_wipADC-35015.nii.gz',
  '10-DiffusionADC-68763.nii.gz'],
 'TCGA-HT-7684/08-16-1995-MRI_BRAIN_WWO_CONTRAST-29908': ['901-PROP_eADC-45332.nii.gz',
  '900-PROP_ADC-52403.nii.gz',
  '500-Exponential_Apparent_Diffusion_Coefficient-02392.nii.gz',
  '9-Prop_DWI-88677.nii.gz',
  '501-Apparent_Diffusion_Coefficient_mms-66679.nii.gz'],
 'TCGA-DU-5872/12-03-1995-MRI_BRAIN_WWO_CONTRAST-92374': ['5-AXIAL_DTI-93315.nii.gz'],
 'TCGA-DU-5872/02-23-1995-MRI_BRAIN_WWO_CONTRAST-88714': ['6-AXIA

In [34]:
len(LGG_has_DTI_dict.keys())

116

In [35]:
total_DTI_dict = {'TCGA-LGG-nifti': LGG_has_DTI_dict, 
                 'TCGA-GBM-nifti': GBM_has_DTI_dict}

In [36]:
total_DTI_dict

{'TCGA-LGG-nifti': {'TCGA-CS-5396/03-02-2001-MRI_BRAIN_CONTRAST_MRA_BRAIN-47605': ['402-dADC_MAP-53538.nii.gz'],
  'TCGA-CS-6669/01-02-2002-MRI_BRAIN_COMBO-45633': ['302-dADC_MAP-67395.nii.gz'],
  'TCGA-DU-5853/08-23-1995-MRI_BRAIN_WWO_CONTRAST-78436': ['400-DTI_2.6mm_WH-19253.nii.gz',
   '4-DTI_2.6mm_WH-81596.nii.gz',
   '402-DTI_2.6mm_WH-69647.nii.gz',
   '401-DTI_2.6mm_WH-97906.nii.gz'],
  'TCGA-CS-6186/06-01-2000-MRI_BRAIN_per_R-97608': ['205-dadc-52244.nii.gz'],
  'TCGA-FG-A6J1/04-23-2004-NR_MRI_BRAIN_WWO-08141': ['12-ep2ddiffmddw20p2_wipADC-35015.nii.gz',
   '10-DiffusionADC-68763.nii.gz'],
  'TCGA-HT-7684/08-16-1995-MRI_BRAIN_WWO_CONTRAST-29908': ['901-PROP_eADC-45332.nii.gz',
   '900-PROP_ADC-52403.nii.gz',
   '500-Exponential_Apparent_Diffusion_Coefficient-02392.nii.gz',
   '9-Prop_DWI-88677.nii.gz',
   '501-Apparent_Diffusion_Coefficient_mms-66679.nii.gz'],
  'TCGA-DU-5872/12-03-1995-MRI_BRAIN_WWO_CONTRAST-92374': ['5-AXIAL_DTI-93315.nii.gz'],
  'TCGA-DU-5872/02-23-1995-MRI_B

#### We now have the list of patients & exams that contain DTI series - let's write that out & create JSON file that we can then read & find overlap b/w segmentation outputs & DTI series! 

In [38]:
os.getcwd()

'/working/lupolab/julia/tcia_analysis/code'

In [39]:
with open('dti_dict.json', 'w') as fp:
    json.dump(total_DTI_dict, fp)