# Create JSON data list 

In this notebook, I want to create the JSON datalist as exemplified by the rest example_config_datalist: 

In [1]:
import os
import glob
import json
from pathlib import Path
import pandas as pd
import subprocess as sub 
from subprocess import Popen, PIPE, STDOUT

# Example file: 

In [2]:
with open('/working/lupolab/julia/tcia_analysis/nvidia/config/config_fla-t2all_datalist.json') as json_file:
    example_list = json.load(json_file)

In [3]:
example_list

{'training': [],
 'validation': [{'image': 'nifti_march_2020/images/b4819/t13526/t13526_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4746/t13284/t13284_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4789/t13431/t13431_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4005/t13285/t13285_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4135/t11807/t11807_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4741/t13277/t13277_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4791/t13436/t13436_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4787/t13426/t13426_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4829/t13572/t13572_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4740/t13275/t13275_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4794/t13444/t13444_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4826/t13566/t13566_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4837/t13602/t13602_fla.nii.gz'},
  {'image': 'nifti_march_2020/images/b4590/t13

In [4]:
## Need to mimic the structure of this 'validation' list 
# example_list['validation']

In [5]:
## It is a list of dictionaries that have 'label' keys and 'image' keys; label key will be left blank 
## Image key will be list of stirngs describing nifti input 
## Must be in the following order: 1) t1c 2) t1 3) t2 4) t2flair 

## Use results from labeling (that we did during registration)

In [6]:
PATH = Path('/working/lupolab/julia/tcia_analysis/datasets/')

In [7]:
full_label_df = pd.read_csv(Path(PATH, 'one_exam_per_patient_with_labels.csv'), index_col = 0)
full_label_df = full_label_df.reset_index(drop = True)

In [8]:
full_label_df.shape

(3630, 8)

In [9]:
root_dir = '/working/lupolab/julia/tcia_analysis'

In [10]:
patient_id = "TCGA-06-1801"

In [11]:
full_label_df.image[0]

'9-AxT1-thin_for_surgery-27598'

In [12]:
full_label_df.head()

Unnamed: 0,patient_id,cohort,patient_exam,image,contrast,contrast_label,plane,exam_eligible
0,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,9-AxT1-thin_for_surgery-27598,1,T1C,ax,1
1,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,21-nordicICE_HFH_-_rBF_map_-Leakage_corrected-...,1,rBF,cor,1
2,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,1-3_PLANE_LOC-22369,0,LOCALIZER,,1
3,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,5-AXIAL_DIFFUSION-62807,0,DWI,ax,1
4,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,8-AXIAL_PERFUSION-44568,1,PERFUSION,ax,1


In [13]:
full_label_df.contrast_label.value_counts()

T1C          669
T1           562
T2           424
DWI          353
T2_FLAIR     298
DTI          282
OTHER        251
LOCALIZER    189
ADC          185
PERFUSION    130
SCOUT        126
MTT           42
rBF           42
rBV           42
FA            30
MRSI           5
Name: contrast_label, dtype: int64

In [14]:
flair_df = full_label_df.loc[full_label_df.contrast_label == "T2_FLAIR"]

In [15]:
flair_df.shape

(298, 8)

In [16]:
flair_df_nodup = flair_df.drop_duplicates(subset = ['patient_id'])

In [18]:
flair_df_nodup.shape

(274, 8)

In [19]:
flair_df_nodup.head()

Unnamed: 0,patient_id,cohort,patient_exam,image,contrast,contrast_label,plane,exam_eligible
5,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,4-AxFLAIR-thin_for_surgery-47036,0,T2_FLAIR,ax,1
29,TCGA-06-0143,TCGA-GBM,02-11-2005-83875,6-AXIAL_FLAIR-72192,0,T2_FLAIR,ax,1
42,TCGA-06-0149,TCGA-GBM,03-25-2003-87536,5-AXIAL_T2_FLAIR-94682,0,T2_FLAIR,ax,1
56,TCGA-19-0955,TCGA-GBM,04-07-2001-NR_MRI_BRAIN_WWO-83151,3-MRHG_FLAIR_AX-38966,0,T2_FLAIR,ax,1
74,TCGA-06-0648,TCGA-GBM,01-20-2006-51419,6-AXIAL_FLAIR-80030,0,T2_FLAIR,ax,1


In [20]:
valid_list = []
for idx, row in flair_df_nodup.iterrows(): 
    flair_image_loc = row['cohort']+"-nifti/"+row['patient_id']+"/"+row['patient_exam']+"/"+row['image']+'a.nii.gz'
    full_path = Path(PATH, Path(flair_image_loc).parent)
#     print(full_path)
    if os.path.isdir(full_path):
        if flair_image_loc.split('/')[-1] in os.listdir(full_path) :
            flair_dict = {'image': flair_image_loc}
            valid_list.append(flair_dict)
    else: print(full_path)
    

/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-12-0775/09-02-1997-MRI_BRAIN_WWO_ENHANCE-60393
/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-14-1794/04-18-1998-MRI_BRAIN_WWO_CONT-48802
/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-14-1823/05-27-2000-MRI_BRAIN_WWO_CONTRAST-65740
/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-06-0174/11-09-2001-MRI_BRAIN_WWO_CONTRAS-84848
/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-06-0190/12-10-2004-MRI_BRAIN_WWO_CONTRAS-69767
/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-14-1829/01-12-2002-MRI_Brain_w_wo_Contrast-20440
/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-14-1043/03-16-1997-MRI_BRAIN_W_WOUT_CONTRAST-67734
/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-14-1034/10-25-1991-BRAIN-90181
/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/TCGA-06-0238/04-12-2005-MRI_BRAIN_WWO_CONTRAST-

In [21]:
len(valid_list)

234

In [22]:
validation_datalist = {'training': [], 'validation': valid_list}

In [23]:
validation_datalist

{'training': [],
 'validation': [{'image': 'TCGA-GBM-nifti/TCGA-06-0240/07-02-2005-04728/4-AxFLAIR-thin_for_surgery-47036a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-06-0143/02-11-2005-83875/6-AXIAL_FLAIR-72192a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-06-0149/03-25-2003-87536/5-AXIAL_T2_FLAIR-94682a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-19-0955/04-07-2001-NR_MRI_BRAIN_WWO-83151/3-MRHG_FLAIR_AX-38966a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-06-0648/01-20-2006-51419/6-AXIAL_FLAIR-80030a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-12-0829/06-02-1999-BRAIN-09460/4-FLAIR-82367a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-06-0171/08-04-2001-68007/7-AXIAL_FLAIR_RF2__150-15878a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-14-1401/04-18-1997-MRI_BRAIN_W_WOUT_CONTRAST-27628/601-BRAIN_WWO_FLAIR-20508a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-02-0033/05-26-1997-MRI_BRAIN_WWO_CONTR-89744/4-AX_FLAIR-20679a.nii.gz'},
  {'image': 'TCGA-GBM-nifti/TCGA-06-0127/03-07-2000-83889/4-AXIAL_FLAIR-569

In [24]:
with open('/working/lupolab/julia/tcia_analysis/nvidia/config/config_datalist_flair.json', 'w') as fp:
    json.dump(validation_datalist, fp)