In [1]:
import os 
import glob
from pathlib import Path
import pandas as pd 
import numpy as np 
import json 

This notebook is designed to be able to map the exams that have segmentations & those that have DTI so that we can process the DTI that matters & continue with the test set

In [2]:
with open('dti_dict.json', 'r') as json_file:
    dti_dict = json.load(json_file)

In [3]:
dti_dict

{'TCGA-LGG-nifti': {'TCGA-CS-5396/03-02-2001-MRI_BRAIN_CONTRAST_MRA_BRAIN-47605': ['402-dADC_MAP-53538.nii.gz'],
  'TCGA-CS-6669/01-02-2002-MRI_BRAIN_COMBO-45633': ['302-dADC_MAP-67395.nii.gz'],
  'TCGA-DU-5853/08-23-1995-MRI_BRAIN_WWO_CONTRAST-78436': ['400-DTI_2.6mm_WH-19253.nii.gz',
   '4-DTI_2.6mm_WH-81596.nii.gz',
   '402-DTI_2.6mm_WH-69647.nii.gz',
   '401-DTI_2.6mm_WH-97906.nii.gz'],
  'TCGA-CS-6186/06-01-2000-MRI_BRAIN_per_R-97608': ['205-dadc-52244.nii.gz'],
  'TCGA-FG-A6J1/04-23-2004-NR_MRI_BRAIN_WWO-08141': ['12-ep2ddiffmddw20p2_wipADC-35015.nii.gz',
   '10-DiffusionADC-68763.nii.gz'],
  'TCGA-HT-7684/08-16-1995-MRI_BRAIN_WWO_CONTRAST-29908': ['901-PROP_eADC-45332.nii.gz',
   '900-PROP_ADC-52403.nii.gz',
   '500-Exponential_Apparent_Diffusion_Coefficient-02392.nii.gz',
   '9-Prop_DWI-88677.nii.gz',
   '501-Apparent_Diffusion_Coefficient_mms-66679.nii.gz'],
  'TCGA-DU-5872/12-03-1995-MRI_BRAIN_WWO_CONTRAST-92374': ['5-AXIAL_DTI-93315.nii.gz'],
  'TCGA-DU-5872/02-23-1995-MRI_B

In [4]:
with open('segmentation_mapping.json', 'r') as json_file:
    seg_map = json.load(json_file)

In [5]:
seg_map_reverse = {v: k for k, v in seg_map.items()}

In [6]:
seg_map_reverse

{'TCGA-06-0241': '10-AX_T1_POST_GD_FLAIR-95256a_n',
 'TCGA-06-0171': '9-AX_T1_POST_GD_FLAIR-32350a_n',
 'TCGA-02-0087': '15-Ax_SE_T1_Post-42502a_n',
 'TCGA-02-0116': '6-AX_T1_POST-52648a_n',
 'TCGA-06-2570': '11-AX_T1_POST_GD_FLAIR-15818a_n',
 'TCGA-02-0037': '6-AX_T1_POST-64479a_n',
 'TCGA-DU-5851': '8-Ax_T1_2',
 'TCGA-06-0127': '8-AXIAL_T1_GD-71120a_n',
 'TCGA-06-0881': '10-AX_T1_POST_GD_FLAIR-80095a_n',
 'TCGA-06-0182': '13-AxT1-thin_for_surgery-12089a_n',
 'TCGA-08-0521': '6-3D_SPGR_AX-79240a_n',
 'TCGA-06-0138': '12-AX_T1_POST_GD_FLAIR-37089a_n',
 'TCGA-08-0389': '10-3d_T1_FSPGR__CONT-32567a_n',
 'TCGA-14-1395': '801-brain_with_AX_T1_POST-26488a_n',
 'TCGA-14-0813': '901-BRAIN_WWO_T1_POST-50983a_n',
 'TCGA-06-0644': '10-AX_T1_POST_GD_FLAIR-65253a_n',
 'TCGA-06-1802': '12-AX_T1_POST_GD_FLAIR-61610a_n',
 'TCGA-14-1459': '2-Ax_T1_SE-18545a_n',
 'TCGA-12-1093': '2-ax_t1_c-85616a_n',
 'TCGA-06-0240': '9-AxT1-thin_for_surgery-27598a_n',
 'TCGA-06-0176': '9-AXIAL_T1_GD-34814a_n',
 'TCGA-

In [7]:
root_dir = Path('/working/lupolab/julia/tcia_analysis/datasets')

In [8]:
dir_options = ['TCGA-GBM-nifti', 'TCGA-LGG-nifti']

In [9]:
dti_dict.keys()

dict_keys(['TCGA-LGG-nifti', 'TCGA-GBM-nifti'])

In [10]:
map_dti = pd.DataFrame()
## Looping through dti dictionary keys first 
for dir_option in dti_dict.keys(): # dir option is ['TCGA-LGG-nifti', 'TCGA-GBM-nifti']
    
    for key, value in dti_dict[dir_option].items(): # Can go through the keys and items in each sub dictionary 
        
        
        patient_id = key.split('/')[0]
        exam_to_look = key.split('/')[1]
        
        if patient_id in os.listdir(root_dir/dir_option): 
#             print(dir_option)
            if patient_id in seg_map_reverse.keys(): 
#                 print(patient_id)
                if seg_map_reverse[patient_id]+'.nii.gz' in os.listdir(root_dir/dir_option/patient_id/exam_to_look): 
#                     print(seg_map_reverse[patient_id]+'.nii.gz')
                    map_dti_line = {'patient_id': patient_id, 
                                   'exam': exam_to_look, 
                                   'segmentation_name': seg_map_reverse[patient_id], 
                                   'dti': value, 
                                   'cohort': dir_option}
                    print(map_dti_line)
                    map_dti = map_dti.append(map_dti_line, ignore_index = True)



{'patient_id': 'TCGA-DU-5853', 'exam': '08-23-1995-MRI_BRAIN_WWO_CONTRAST-78436', 'segmentation_name': '11-AX_T1_POST_GD_FLAIR-81300a_n', 'dti': ['400-DTI_2.6mm_WH-19253.nii.gz', '4-DTI_2.6mm_WH-81596.nii.gz', '402-DTI_2.6mm_WH-69647.nii.gz', '401-DTI_2.6mm_WH-97906.nii.gz'], 'cohort': 'TCGA-LGG-nifti'}
{'patient_id': 'TCGA-CS-6186', 'exam': '06-01-2000-MRI_BRAIN_per_R-97608', 'segmentation_name': '1001-T1_SE_POST-94212a_n', 'dti': ['205-dadc-52244.nii.gz'], 'cohort': 'TCGA-LGG-nifti'}
{'patient_id': 'TCGA-FG-A6J1', 'exam': '04-23-2004-NR_MRI_BRAIN_WWO-08141', 'segmentation_name': '20-T1fl2dAx__Gd-43129a_n', 'dti': ['12-ep2ddiffmddw20p2_wipADC-35015.nii.gz', '10-DiffusionADC-68763.nii.gz'], 'cohort': 'TCGA-LGG-nifti'}
{'patient_id': 'TCGA-DU-7015', 'exam': '06-18-1989-MRI_BRAIN_WWO_CONTRAS-16160', 'segmentation_name': '10-AX_T1_POST_GD_FLAIR-19430a_n', 'dti': ['601-DTI_ASSET-84919.nii.gz', '600-DTI_ASSET-07696.nii.gz', '6-DTI_ASSET-60836.nii.gz', '602-DTI_ASSET-87691.nii.gz'], 'cohort'

{'patient_id': 'TCGA-DU-7301', 'exam': '11-12-1991-MRI_BRAIN_WWO_CONTRAST-22050', 'segmentation_name': '10-AX_T1_POST_GD_FLAIR-56964a_n', 'dti': ['700-DTI_ASSET-93395.nii.gz', '701-DTI_ASSET-86571.nii.gz', '702-DTI_ASSET-89416.nii.gz', '7-DTI_ASSET-15834.nii.gz'], 'cohort': 'TCGA-LGG-nifti'}
{'patient_id': 'TCGA-CS-6667', 'exam': '11-05-2001-MRI_BRAIN_WITH-96919', 'segmentation_name': '9-Ax_T1_FS_BRAIN_POST-73660a_n', 'dti': ['400-Apparent_Diffusion_Coefficient_mms-64318.nii.gz'], 'cohort': 'TCGA-LGG-nifti'}
{'patient_id': 'TCGA-HT-7688', 'exam': '06-07-1996-MRI_BRAIN_WWO_CONTRAST-09340', 'segmentation_name': '12-AX_T1C-63038a_n', 'dti': ['401-Apparent_Diffusion_Coefficient_mms-43198.nii.gz', '400-Exponential_Apparent_Diffusion_Coefficient-94908.nii.gz'], 'cohort': 'TCGA-LGG-nifti'}
{'patient_id': 'TCGA-HT-8111', 'exam': '03-30-1998-MRI_BRAIN_WWO_CONTRAST-68651', 'segmentation_name': '11-AX_3D_SPGRC-47886a_n', 'dti': ['301-Apparent_Diffusion_Coefficient_mms-78619.nii.gz', '300-Exponent

{'patient_id': 'TCGA-HT-8106', 'exam': '07-27-1997-MRI_BRAIN_FOR_STEREOTACTIC_WWO_CONTR-67522', 'segmentation_name': '12-CAX_3D_SPGR-46928a_n', 'dti': ['301-Apparent_Diffusion_Coefficient_mms-99520.nii.gz', '901-PROP_eADC-05942.nii.gz', '900-PROP_ADC-67864.nii.gz', '9-8hrbrainAx2DDWIProp-25222.nii.gz', '300-Exponential_Apparent_Diffusion_Coefficient-67437.nii.gz'], 'cohort': 'TCGA-LGG-nifti'}
{'patient_id': 'TCGA-HT-7475', 'exam': '09-18-1997-MRI_BRAIN_FOR_STEREOTACTIC_WWO_CONTR-31460', 'segmentation_name': '11-AX_3D_SPGRC-33028a_n', 'dti': ['800-PROP_ADC-69961.nii.gz', '401-Apparent_Diffusion_Coefficient_mms-85317.nii.gz', '400-Exponential_Apparent_Diffusion_Coefficient-00063.nii.gz', '8-8hrbrainOb2DDWIProp-63835.nii.gz', '801-PROP_eADC-56548.nii.gz'], 'cohort': 'TCGA-LGG-nifti'}
{'patient_id': 'TCGA-DU-8166', 'exam': '03-22-1997-MRI_BRAIN_WWO_CONTRAST-94974', 'segmentation_name': '12-AX_T1_POST_GD_FLAIR-47718a_n', 'dti': ['4-DTI_2.6mm_WH-31825.nii.gz', '401-DTI_2.6mm_WH-24627.nii.gz'

{'patient_id': 'TCGA-06-0143', 'exam': '01-15-2006-43320', 'segmentation_name': '8-GD_AX_T1-63316a_n', 'dti': ['363-DTI_1000_24_FOV-03737.nii.gz', '333-DTI_1000_24_FOV-91561.nii.gz', '3-DTI_1000_24_FOV-56310.nii.gz', '303-DTI_1000_24_FOV-32080.nii.gz'], 'cohort': 'TCGA-GBM-nifti'}
{'patient_id': 'TCGA-06-0240', 'exam': '07-02-2005-04728', 'segmentation_name': '9-AxT1-thin_for_surgery-27598a_n', 'dti': ['474-Apparent_Diffusion_Coefficien-97986.nii.gz'], 'cohort': 'TCGA-GBM-nifti'}
{'patient_id': 'TCGA-14-1401', 'exam': '04-18-1997-MRI_BRAIN_W_WOUT_CONTRAST-27628', 'segmentation_name': '1201-BRAIN_WWO_T1_POST-04167a_n', 'dti': ['1003-BRAIN_WWO_DTImedium-60450.nii.gz', '901-BRAIN_WWO_SS_DWI-11037.nii.gz', '902-BRAIN_WWO_SS_DWI-67966.nii.gz'], 'cohort': 'TCGA-GBM-nifti'}
{'patient_id': 'TCGA-12-0829', 'exam': '06-02-1999-BRAIN-09460', 'segmentation_name': '18-T1_AX_POST-73832a_n', 'dti': ['7-ep2ddiff3scantraceADC-46169.nii.gz'], 'cohort': 'TCGA-GBM-nifti'}
{'patient_id': 'TCGA-06-0648', 'e

{'patient_id': 'TCGA-06-1801', 'exam': '10-22-2007-23597', 'segmentation_name': '8-AX_T1_FLAIR_C-45978a_n', 'dti': ['400-Apparent_Diffusion_Coefficient_mms-23032.nii.gz', '4-Ax_DWI-98057.nii.gz'], 'cohort': 'TCGA-GBM-nifti'}
{'patient_id': 'TCGA-06-2570', 'exam': '07-26-2007-MRI_BRAIN_WWO_CONTRAST-42847', 'segmentation_name': '11-AX_T1_POST_GD_FLAIR-15818a_n', 'dti': ['501-DTI_2.6mm_WH__COVERED_CUT_AIR-87548.nii.gz', '502-DTI_2.6mm_WH__COVERED_CUT_AIR-65637.nii.gz', '500-DTI_2.6mm_WH__COVERED_CUT_AIR-34767.nii.gz'], 'cohort': 'TCGA-GBM-nifti'}
{'patient_id': 'TCGA-02-0059', 'exam': '08-29-1999-MRI_BRAIN_WWO_CONTR-16610', 'segmentation_name': '14-Ax_SE_T1_Post-47437a_n', 'dti': ['10-DWI-96881.nii.gz'], 'cohort': 'TCGA-GBM-nifti'}
{'patient_id': 'TCGA-06-0188', 'exam': '08-08-2005-27435', 'segmentation_name': '8-AXIAL_T1_GD-58362a_n', 'dti': ['5-AXIAL_DTI-89099.nii.gz'], 'cohort': 'TCGA-GBM-nifti'}
{'patient_id': 'TCGA-14-1453', 'exam': '04-10-1998-MRI_BRAIN_WWO_CONT-55756', 'segmentatio

In [11]:
map_dti.shape

(138, 5)

In [12]:
map_dti

Unnamed: 0,cohort,dti,exam,patient_id,segmentation_name
0,TCGA-LGG-nifti,"[400-DTI_2.6mm_WH-19253.nii.gz, 4-DTI_2.6mm_WH...",08-23-1995-MRI_BRAIN_WWO_CONTRAST-78436,TCGA-DU-5853,11-AX_T1_POST_GD_FLAIR-81300a_n
1,TCGA-LGG-nifti,[205-dadc-52244.nii.gz],06-01-2000-MRI_BRAIN_per_R-97608,TCGA-CS-6186,1001-T1_SE_POST-94212a_n
2,TCGA-LGG-nifti,"[12-ep2ddiffmddw20p2_wipADC-35015.nii.gz, 10-D...",04-23-2004-NR_MRI_BRAIN_WWO-08141,TCGA-FG-A6J1,20-T1fl2dAx__Gd-43129a_n
3,TCGA-LGG-nifti,"[601-DTI_ASSET-84919.nii.gz, 600-DTI_ASSET-076...",06-18-1989-MRI_BRAIN_WWO_CONTRAS-16160,TCGA-DU-7015,10-AX_T1_POST_GD_FLAIR-19430a_n
4,TCGA-LGG-nifti,"[501-DTI_2.6mm_WH-21034.nii.gz, 5-DTI_2.6mm_WH...",01-11-1997-MRI_BRAIN_WWO_CONTRAST-98366,TCGA-DU-8164,10-AX_T1_POST_GD_FLAIR-02819a_n
5,TCGA-LGG-nifti,"[802-DTI_2.6mm_WH-35614.nii.gz, 600-Exponentia...",07-26-1997-MRI_BRAIN_WWO_CONTRAST-17402,TCGA-DU-A5TR,10-AX_T1_POST_GD_FLAIR-23755a_n
6,TCGA-LGG-nifti,"[900-PROP_ADC-74032.nii.gz, 401-Apparent_Diffu...",10-28-1995-MRI_BRAIN_FOR_STEREOTACTIC_WWO_CONT...,TCGA-HT-7604,12-CAX_3D_SPGR-37353a_n
7,TCGA-LGG-nifti,"[11-Axial_DWI-11211.nii.gz, 13-Axial_DWI-76635...",03-12-1998-MRI_BRAIN_W_AND_WO_CONTR-77342,TCGA-DU-A5TU,15-Axial_T1_FSE_Post_Gad-17816a_n
8,TCGA-LGG-nifti,"[6-DTI_2.6mm_WH-60904.nii.gz, 601-DTI_2.6mm_WH...",11-19-1996-MRI_BRAIN_WWO_CONTRAST-27038,TCGA-DU-8163,12-AX_T1_POST_GD_FLAIR-07772a_n
9,TCGA-LGG-nifti,"[1100-DTI_ASSET-99068.nii.gz, 1101-DTI_ASSET-2...",12-20-1991-MRI_BRAIN_WWO_CONTRAST-98852,TCGA-DU-7018,13-AX_T1_POST_GD_FLAIR-47091a_n


In [16]:
map_dti.to_csv('/working/lupolab/julia/tcia_analysis/code/dti_seg_map.csv')