# Registering the niftis 

For each exam, we want to register everything to the following (in order): 
1. T2 FLAIR Ax 
2. T1 pre-contrast

We want to use the following command: 

` BRAINSFit --fixedVolume $fixed_vol.nii.gz --movingVolume $moving_vol.nii.gz --outputVolume $output_vol.nii.gz --outputTransform $transform_output.tfm  --transformType Rigid`

But first we need to figure out which are the fixed volumes; this will require labeling. 
Let's use code derived from `http://localhost:8890/notebooks/data/svcf/labels/TCGA-GBM/GenerateGroundTruthUsingDCMdump.ipynb` 

In [1]:
import glob
import subprocess as sub 
from subprocess import * 
import os 
from pathlib import Path
from functools import reduce
import pandas as pd

## First we want to have a data frame with all the patients, all of their exams, and how many images they have in each exam. 

Ideally, we want to narrow down the exam that we're going to use from each patient quite easily through looking at the name of the exam; many have "WWO" meaning w/ and w/o contrast -- this could be helpful 

In [2]:
PATH = Path('/working/lupolab/julia/tcia_analysis/datasets/')

In [8]:
patients_gbm = os.listdir(Path(PATH, 'TCGA-GBM-nifti'))
cohort_gbm = ['TCGA-GBM-nifti' for x in patients_gbm]

In [9]:
patients_lgg = os.listdir(Path(PATH, 'TCGA-LGG-nifti'))
cohort_lgg = ['TCGA-LGG-nifti' for x in patients_lgg]

In [10]:
patient_df = pd.DataFrame({'patient_id': patients_gbm+patients_lgg, 'cohort':cohort_gbm+cohort_lgg})

In [11]:
patient_df.shape

(395, 2)

In [14]:
df = pd.DataFrame()
for idx, row in patient_df.iterrows(): 
    patient_exam_df = pd.DataFrame()
    patient_id = row['patient_id']
    cohort = row['cohort']
    patient_exams = os.listdir(Path(PATH, cohort, patient_id))
    if patient_id in os.listdir(Path(PATH, cohort+'')): 
        for exam in patient_exams:
            exam_images = os.listdir(Path(PATH, cohort, patient_id, exam))
            exam_images_df = pd.DataFrame({'patient_id': [patient_id for x in exam_images], 
                                   'cohort': [cohort for x in exam_images],
                                   'patient_exam': [exam for x in exam_images], 
                                   'image': exam_images})

            df = df.append(exam_images_df, ignore_index = True)
    else: 
        print(f"patient {patient_id} is not in nifti folder")
    
    
    
#     patient_df = pd.DataFrame({'patient_id': [patient_id for x in patient_exams], 
#                                'cohort': [cohort for x in patient_exams],
#                                'patient_exam': patient_exams})
#     exam_df = exam_df.append(patient_df, ignore_index = True)

In [15]:
df.shape

(9007, 4)

In [16]:
df.head()

Unnamed: 0,patient_id,cohort,patient_exam,image
0,TCGA-06-0210,TCGA-GBM-nifti,08-17-1995-19584,1-SAG_LOCAL-29333.nii.gz
1,TCGA-06-0210,TCGA-GBM-nifti,08-17-1995-19584,3-AXIAL_T1-88865.nii.gz
2,TCGA-06-0210,TCGA-GBM-nifti,08-17-1995-19584,2-AXIAL_T2-83081.nii.gz
3,TCGA-06-0210,TCGA-GBM-nifti,08-17-1995-19584,4-AXIAL_T1_POST_GD-64939.nii.gz
4,TCGA-08-0350,TCGA-GBM-nifti,12-15-1998-MSTEALTH-91569,15-3DFSE_AXIAL-04462.nii.gz


## Next we want to go through the images and figure out whether they have contrast or not. 

In [17]:
df['contrast'] = ['' for x in df.patient_id]

In [18]:
# Needed to remove the logfiles breaking things: 
# ! rm /working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM/TCGA-06-0143/01-15-2006-43320/Logfile
# ! rm /working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM/TCGA-06-0149/03-25-2003-87536/Logfile

*** Note: do not run the below cell unless necessary - takes > 1 hour.

In [119]:
for idx, row in df.iterrows(): 
    os.chdir(Path(PATH, row['cohort'], row['patient_id'], row['patient_exam'], row['image']))
    commands = ["dcmdump 000000.dcm | grep -i '(0018,0010)' ",
    "dcmdump 000000.dcm | grep -i '(0018,0012)' ",
    "dcmdump 000000.dcm | grep -i '(0018,1040)' ",
    "dcmdump 000000.dcm | grep -i '(0018,0014)' "]
    for command in commands: 
        result = sub.Popen(command, stdout=sub.PIPE, stderr = sub.PIPE, shell = True)
        out, err = result.communicate()
        out = out.decode('utf-8')
        if out: 
            df.at[idx, 'contrast'] = 1
            break
        else: 
            df.at[idx, 'contrast'] = 0

KeyboardInterrupt: 

In [None]:
idx = 253
row = df.iloc[idx]

In [104]:
# df.loc[df.image.str.contains('t1c')]

In [105]:
os.chdir(Path(PATH, row['cohort'], row['patient_id'], row['patient_exam'], row['image']))

In [106]:
commands = ["dcmdump 000000.dcm | grep -i '(0018,0010)' ",
    "dcmdump 000000.dcm | grep -i '(0018,0012)' ",
    "dcmdump 000000.dcm | grep -i '(0018,1040)' ",
    "dcmdump 000000.dcm | grep -i '(0018,0014)' "]

In [107]:
command = "dcmdump 000000.dcm | grep -i '(0018,0010)' "

In [114]:
for command in commands: 
    result = sub.Popen(command, stdout=sub.PIPE, stderr = sub.PIPE, shell = True)
    out, err = result.communicate()
    out = out.decode('utf-8')
    if out: 
        df.at[idx, 'contrast'] = 1
        print(out)
        break
    else: 
        print('ok')
        df.at[idx, 'contrast'] = 0

(0018,0010) LO [Magnevist]                              #  10, 1 ContrastBolusAgent



In [108]:
result = sub.Popen(command, stdout=sub.PIPE, stderr = sub.PIPE, shell = True)

In [109]:
out, err = result.communicate()

In [110]:
out

b'(0018,0010) LO [Magnevist]                              #  10, 1 ContrastBolusAgent\n'

In [121]:
df.loc[df.contrast == 1]

Unnamed: 0,patient_id,cohort,patient_exam,image,contrast
3,TCGA-06-0237,TCGA-GBM,02-01-2005-48252,21-nordicICE_HFH_-_rBF_map_-Leakage_corrected-...,1
4,TCGA-06-0237,TCGA-GBM,02-01-2005-48252,7-AXIAL_PERFUSION-60045,1
5,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,9-AxT1-thin_for_surgery-27598,1
6,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,21-nordicICE_HFH_-_rBF_map_-Leakage_corrected-...,1
9,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,8-AXIAL_PERFUSION-44568,1
11,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,10-COR_T1-52396,1
14,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,22-nordicICE_HFH_-_MTT_map_-Leakage_corrected-...,1
16,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,7-AXIAL_PERFUSION-20070,1
18,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,20-nordicICE_HFH_-_rBV_map_-Leakage_corrected-...,1
37,TCGA-06-0143,TCGA-GBM,11-03-2005-49045,12-AX_T1_POST_GD_FLAIR-54984,1


In [122]:
df.to_csv(Path(PATH, 'images_with_dicom_contrast.csv'), index = False)

## Next we want to label the series descriptions by their contrast: 

In [19]:
df = pd.read_csv(Path(PATH, 'images_with_dicom_contrast.csv'))

In [20]:
df['contrast_label'] = ['' for x in df.patient_id]

In [21]:
for idx, row in df.iterrows(): 
    image_description = row['image'].lower()
    dicom_contrast = row['contrast']
    if 'apparent' in image_description or 'coeff' in image_description or 'adc' in image_description or 'average' in image_description or 'avg' in image_description:
        df.at[idx, 'contrast_label'] = 'ADC'
    elif 't1' in image_description or 'spgr' in image_description: 
        if (dicom_contrast == 1 or 'post' in image_description) and 'pre' not in image_description: 
            df.at[idx, 'contrast_label'] = 'T1C'
        else: 
            df.at[idx, 'contrast_label'] = 'T1'
    elif dicom_contrast == 1 and ('mpr' in image_description or 'rage' in image_description): 
        df.at[idx, 'contrast_label'] = 'T1C'        
    elif dicom_contrast == 0 and ('mpr' in image_description or 'rage' in image_description): 
        df.at[idx, 'contrast_label'] = 'T1'        
    elif 'flair' in image_description and 't1' not in image_description:
        df.at[idx, 'contrast_label'] = 'T2_FLAIR'
    elif ('t2' in image_description or 'fse' in image_description) and 'flair' not in image_description and 'dw' not in image_description: 
        df.at[idx, 'contrast_label'] = "T2"
    
    elif 'dwi' in image_description or 'diff' in image_description or 'dw' in image_description: 
        df.at[idx, 'contrast_label'] = 'DWI'
    elif 'dti' in image_description: 
        df.at[idx, 'contrast_label'] = 'DTI'
    elif 'fa' in image_description or 'fractional' in image_description: 
        df.at[idx, 'contrast_label'] = 'FA'
    elif 'perf' in image_description : 
        df.at[idx, 'contrast_label'] = 'PERFUSION'
    elif 'rbv' in image_description : 
        df.at[idx, 'contrast_label'] = 'rBV'
    elif 'rbf' in image_description : 
        df.at[idx, 'contrast_label'] = 'rBF'
    elif 'mtt' in image_description : 
        df.at[idx, 'contrast_label'] = 'MTT'
    elif 'scout' in image_description or 'survey' in image_description or 'cal' in image_description or 'loc' in image_description: 
        df.at[idx, 'contrast_label'] = 'SCOUT'
    elif 'mrsi' in image_description: 
        df.at[idx, 'contrast_label'] = 'MRSI'
    else: 
        df.at[idx, 'contrast_label'] = 'OTHER'
        
        

In [22]:
df.contrast_label.value_counts()

T1C          1428
T1           1265
T2            891
OTHER         782
SCOUT         630
DWI           625
T2_FLAIR      595
DTI           540
ADC           331
PERFUSION     243
FA             53
rBF            52
rBV            51
MTT            51
MRSI            7
Name: contrast_label, dtype: int64

In [23]:
df.to_csv(Path(PATH, 'contrast_labels.csv'), index = False)

## Need to add plane if we can: 

In [24]:
df = pd.read_csv(Path(PATH, 'contrast_labels.csv'))

In [25]:
df['plane'] = ['' for x in df.patient_id]
for idx, row in df.iterrows(): 
    image_description = row['image'].lower()
    if 'ax' in image_description: 
        df.at[idx, 'plane'] = 'ax'
    elif 'cor' in image_description: 
        df.at[idx, 'plane'] = 'cor'
    elif 'sag' in image_description:
        df.at[idx, 'plane'] = 'sag'
    elif 'obl' in image_description:
        df.at[idx, 'plane'] = 'obl'
    

In [26]:
df.plane.value_counts()

       3051
ax     2899
sag     848
cor     746
Name: plane, dtype: int64

## Next we need to figure out which exam from which patients we can use

In [27]:
df['exam_eligible'] = [0 for x in df.patient_exam]
for exam in df.patient_exam.unique(): 
    exam_contrasts = list(df.loc[df.patient_exam == exam, 'contrast_label'])
    if 'T1' in exam_contrasts and 'T1C' in exam_contrasts and "T2_FLAIR" in exam_contrasts: 
        df.at[df.loc[df.patient_exam == exam].index, 'exam_eligible'] = 1


In [28]:
df.to_csv(Path(PATH, 'contrast_labels_with_flair_eligibility.csv'), index = False)

## Then subset down to eligible exams, then to a single exam per patient

In [29]:
eligible_df = df.loc[df.exam_eligible == 1]

In [30]:
eligible_df.head()

Unnamed: 0,patient_id,cohort,patient_exam,image,contrast,contrast_label,plane,exam_eligible
5,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,9-AxT1-thin_for_surgery-27598,1,T1C,ax,1
6,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,21-nordicICE_HFH_-_rBF_map_-Leakage_corrected-...,1,rBF,cor,1
7,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,1-3_PLANE_LOC-22369,0,SCOUT,,1
8,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,5-AXIAL_DIFFUSION-62807,0,DWI,ax,1
9,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,8-AXIAL_PERFUSION-44568,1,PERFUSION,ax,1


In [31]:
eligible_df.shape

(5690, 8)

In [32]:
f'There are {len(eligible_df.patient_id.unique())} patients that we can use and {len(eligible_df.patient_exam.unique())} unique exams.'

'There are 274 patients that we can use and 461 unique exams.'

Previous approach: when I needed all four modalities, aligned to the T1c, etc. 

In [35]:
# one_exam_per_pt_df = pd.DataFrame()
# for patient in eligible_df.patient_id.unique(): 
#     exams = list(eligible_df.loc[eligible_df.patient_id == patient, 'patient_exam'].unique())
#     if len(exams) == 1: 
#         one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_id == patient])
#         print("one_exam_anyway")
#     else: 
#         ## create a new list that has the exam name, the contrast types that it contains, the planes, and the length of unique contrast types
#         new_list = [(exam, 
#                      [x+'_'+y for x, y in zip(list(eligible_df.loc[eligible_df.patient_exam == exam].contrast_label), list(eligible_df.loc[eligible_df.patient_exam == exam].plane))], 
#                      len([x+'_'+y for x, y in zip(list(eligible_df.loc[eligible_df.patient_exam == exam].contrast_label), list(eligible_df.loc[eligible_df.patient_exam == exam].plane))])) 
#                      for exam in exams]
        
#         t2_flair_ax_filter = list(filter(lambda x: 'T2_FLAIR_ax' in x[1] , new_list))
        
#         if len(t2_flair_ax_filter) == 1 : 
#             print('t2_flair_ax_filter worked, adding this exam to the df')
#             exam_to_add = t2_flair_ax_filter[0][0]
#             one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_exam == exam_to_add])
        
#         elif len(t2_flair_ax_filter) > 1: 
#             result = filter(lambda x: 'ADC' in x[1], t2_flair_ax_filter)
#             adc_filter = list(result)
#             if len(adc_filter) == 1: 
#                 print('adc filter worked, adding this exam to the df')
#                 exam_to_add = adc_filter[0][0]
#                 one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_exam == exam_to_add])
#             else: 
#                 print('Taking the exam with the most unique kinds of contrast. That also has T1ax')
#                 exam_to_add = reduce(lambda a, b : a if a[2] > b[2] else b, t2_flair_ax_filter)[0]
#                 one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_exam == exam_to_add])
#         else: 
#             print('Taking exam with most unique kinds of contrast.')
#             exam_to_add = reduce(lambda a, b : a if a[2] > b[2] else b, new_list)[0]
#             one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_exam == exam_to_add])
        
        

In [36]:
# one_exam_per_pt_df.shape

From here, I think what might make the most sense is to take the exam with an axial t1, then adc, then just whichever has the most unique kinds of images from each patient.

In [37]:
one_exam_per_pt_df = pd.DataFrame()
for patient in eligible_df.patient_id.unique(): 
    exams = list(eligible_df.loc[eligible_df.patient_id == patient, 'patient_exam'].unique())
    if len(exams) == 1: 
        one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_id == patient])
        print("one_exam_anyway")
    else: 
        ## create a new list that has the exam name, the contrast types that it contains, the planes, and the length of unique contrast types
        new_list = [(exam, 
                     [x+'_'+y for x, y in zip(list(eligible_df.loc[eligible_df.patient_exam == exam].contrast_label), list(eligible_df.loc[eligible_df.patient_exam == exam].plane))], 
                     len([x+'_'+y for x, y in zip(list(eligible_df.loc[eligible_df.patient_exam == exam].contrast_label), list(eligible_df.loc[eligible_df.patient_exam == exam].plane))])) 
                     for exam in exams]
        
        t1_ax_filter = list(filter(lambda x: 'T1_ax' in x[1] , new_list))
        
        if len(t1_ax_filter) == 1 : 
            print('t1_ax_filter worked, adding this exam to the df')
            exam_to_add = t1_ax_filter[0][0]
            one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_exam == exam_to_add])
        
        elif len(t1_ax_filter) > 1: 
            result = filter(lambda x: 'ADC' in x[1], t1_ax_filter)
            adc_filter = list(result)
            if len(adc_filter) == 1: 
                print('adc filter worked, adding this exam to the df')
                exam_to_add = adc_filter[0][0]
                one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_exam == exam_to_add])
            else: 
                print('Taking the exam with the most unique kinds of contrast. That also has T1ax')
                exam_to_add = reduce(lambda a, b : a if a[2] > b[2] else b, t1_ax_filter)[0]
                one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_exam == exam_to_add])
        else: 
            print('Taking exam with most unique kinds of contrast.')
            exam_to_add = reduce(lambda a, b : a if a[2] > b[2] else b, new_list)[0]
            one_exam_per_pt_df = one_exam_per_pt_df.append(eligible_df.loc[eligible_df.patient_exam == exam_to_add])
        
        

one_exam_anyway
Taking the exam with the most unique kinds of contrast. That also has T1ax
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
Taking the exam with the most unique kinds of contrast. That also has T1ax
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
Taking the exam with the most unique kinds of contrast. That also has T1ax
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
Taking the exam with the most unique kinds of contrast. That also has T1ax
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
one_exam_anyway
Taking the exam with the most unique kinds of contrast. That also has T1ax
Taking the exam with the most unique kinds of contrast. That also has T1ax
one_exam_anyway
Taking the exam with the most unique kinds of contrast. That also has T1ax
Taking the exam with the most unique kinds of contrast. Tha

In [38]:
one_exam_per_pt_df.shape

(3630, 8)

In [39]:
f'There are {len(one_exam_per_pt_df.patient_id.unique())} patients that we can use and {len(one_exam_per_pt_df.patient_exam.unique())} unique exams.'

'There are 274 patients that we can use and 274 unique exams.'

In [40]:
one_exam_per_pt_df.to_csv(Path(PATH, 'one_exam_per_patient_with_labels.csv'))

## Now we want to align all of the images that we can: 

First we will figure out what the fixed volume is to align to: 

In [41]:
one_exam_per_pt_df = pd.read_csv(Path(PATH, 'one_exam_per_patient_with_labels.csv'), index_col = 0)

In [42]:
one_exam_per_pt_df

Unnamed: 0,patient_id,cohort,patient_exam,image,contrast,contrast_label,plane,exam_eligible
5,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,9-AxT1-thin_for_surgery-27598,1,T1C,ax,1
6,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,21-nordicICE_HFH_-_rBF_map_-Leakage_corrected-...,1,rBF,cor,1
7,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,1-3_PLANE_LOC-22369,0,SCOUT,,1
8,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,5-AXIAL_DIFFUSION-62807,0,DWI,ax,1
9,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,8-AXIAL_PERFUSION-44568,1,PERFUSION,ax,1
10,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,4-AxFLAIR-thin_for_surgery-47036,0,T2_FLAIR,ax,1
11,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,10-COR_T1-52396,1,T1C,cor,1
12,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,6-AXIAL_T1-20532,0,T1,ax,1
13,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,2-SAG_T1-00985,0,T1,sag,1
14,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,22-nordicICE_HFH_-_MTT_map_-Leakage_corrected-...,1,MTT,cor,1


In [43]:
# one_exam_per_pt_df.loc[one_exam_per_pt_df.patient_id == 'TCGA-06-0162']

In [44]:
one_exam_per_pt_df['fixed_vol'] = ['' for x in one_exam_per_pt_df.patient_id]

In [45]:
# for idx, row in one_exam_per_pt_df.iterrows():
#     print(idx)
#     patient = row['patient_id']
#     patient_t1_df = one_exam_per_pt_df.loc[(one_exam_per_pt_df.patient_id == patient) & (one_exam_per_pt_df.contrast_label == 'T1')]
#     patient_t1c_df = one_exam_per_pt_df.loc[(one_exam_per_pt_df.patient_id == patient) & (one_exam_per_pt_df.contrast_label == 'T1C')]
#     if (patient_t1_df.shape[0] > 1) & ('ax' in list(patient_t1_df.plane)):
#         fixed_vol = patient_t1_df.loc[patient_t1_df.plane == 'ax'].reset_index().image[0]
#     elif (patient_t1c_df.shape[0] > 1) & ('ax' in list(patient_t1c_df.plane)): 
#         fixed_vol = patient_t1c_df.loc[patient_t1c_df.plane == 'ax'].reset_index().image[0]
#     else: 
#         fixed_vol = patient_t1_df.reset_index().image[0]
#     one_exam_per_pt_df.at[one_exam_per_pt_df.loc[one_exam_per_pt_df.patient_id == patient].index, 'fixed_vol'] = fixed_vol
    
    

Had to redo this with FLAIR b/c the segmentations are FLAIR based

In [54]:
for idx, row in one_exam_per_pt_df.iterrows():
    print(idx)
    patient = row['patient_id']
    patient_flair_df = one_exam_per_pt_df.loc[(one_exam_per_pt_df.patient_id == patient) & (one_exam_per_pt_df.contrast_label == 'T2_FLAIR')]
    if (patient_flair_df.shape[0] > 1) & ('ax' in list(patient_flair_df.plane)):
        fixed_vol = patient_flair_df.loc[patient_flair_df.plane == 'ax'].reset_index().image[0]
    else: 
        fixed_vol = patient_flair_df.reset_index().image[0]
    one_exam_per_pt_df.at[one_exam_per_pt_df.loc[one_exam_per_pt_df.patient_id == patient].index, 'fixed_vol'] = fixed_vol
    
    

5
6
7
8
9
10
11
12
13
14
15
16
17
18
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
241
242
243
244
245
246
247
248
249
250
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
301
302
303
304
305
306
307
308
309
310
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
461
462
463
464
465
466
467
468
469
470


4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4700
4701
4702
4703
4704
4705
4706
4707
4708
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852


7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543


In [55]:
one_exam_per_pt_df.head()

Unnamed: 0,patient_id,cohort,patient_exam,image,contrast,contrast_label,plane,exam_eligible,fixed_vol
5,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,9-AxT1-thin_for_surgery-27598,1,T1C,ax,1,4-AxFLAIR-thin_for_surgery-47036
6,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,21-nordicICE_HFH_-_rBF_map_-Leakage_corrected-...,1,rBF,cor,1,4-AxFLAIR-thin_for_surgery-47036
7,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,1-3_PLANE_LOC-22369,0,SCOUT,,1,4-AxFLAIR-thin_for_surgery-47036
8,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,5-AXIAL_DIFFUSION-62807,0,DWI,ax,1,4-AxFLAIR-thin_for_surgery-47036
9,TCGA-06-0240,TCGA-GBM,07-02-2005-04728,8-AXIAL_PERFUSION-44568,1,PERFUSION,ax,1,4-AxFLAIR-thin_for_surgery-47036


In [56]:
one_exam_per_pt_df.to_csv(Path(PATH, 'one_exam_per_patient_with_fixed_vol.csv'), index = False)

Now that we finally have the fixed volume we want to align to for each image in each patient that we care about, let's go ahead and create the commands for brainsfit in brackets so that we can send them to the grid: 

In [57]:
PATH

PosixPath('/working/lupolab/julia/tcia_analysis/datasets')

In [58]:
row = one_exam_per_pt_df.iloc[0]

In [59]:
Path(PATH, row['cohort']+'-nifti', row['patient_exam'], row['fixed_vol']+'.nii.gz')

PosixPath('/working/lupolab/julia/tcia_analysis/datasets/TCGA-GBM-nifti/07-02-2005-04728/4-AxFLAIR-thin_for_surgery-47036.nii.gz')

In [60]:
commands = []
for idx, row in one_exam_per_pt_df.iterrows(): 
    fixed_vol = str(Path(PATH, row['cohort']+'-nifti', row['patient_id'], row['patient_exam'], row['fixed_vol']+'.nii.gz'))
    moving_vol = str(Path(PATH, row['cohort']+'-nifti', row['patient_id'], row['patient_exam'], row['image']+'.nii.gz'))
    output_vol = moving_vol.split('.n')[0]+'a'+'.nii.gz'
    command_align = 'BRAINSFit --fixedVolume '+fixed_vol+' --movingVolume '+moving_vol+' --outputVolume '+output_vol+' --transformType Rigid'
    commands.append(command_align)

In [61]:
len(commands)

3630

In [62]:
os.chdir('/working/lupolab/julia/tcia_analysis/code/')

In [63]:
x = open('register_niftis.txt', 'w')

In [64]:
x.write('{')
x.write('\n')
for command in commands: 
    x.write(command)
    x.write('\n')
x.write('}')
x.close()

In [65]:
!grid_pipeline --scriptonly register_niftis.txt


In [66]:
command

'BRAINSFit --fixedVolume /working/lupolab/julia/tcia_analysis/datasets/TCGA-LGG-nifti/TCGA-DU-7309/08-31-1996-MRI_BRAIN_WWO_CONTRAST-83234/6-AXIAL_FLAIR-44187.nii.gz --movingVolume /working/lupolab/julia/tcia_analysis/datasets/TCGA-LGG-nifti/TCGA-DU-7309/08-31-1996-MRI_BRAIN_WWO_CONTRAST-83234/8-AX_T1_pre_gd-90729.nii.gz --outputVolume /working/lupolab/julia/tcia_analysis/datasets/TCGA-LGG-nifti/TCGA-DU-7309/08-31-1996-MRI_BRAIN_WWO_CONTRAST-83234/8-AX_T1_pre_gd-90729a.nii.gz --transformType Rigid'

In [67]:
PATH

PosixPath('/working/lupolab/julia/tcia_analysis/datasets')