In [66]:
import os
import glob
import pandas as pd 
from pathlib import Path
import numpy as np 
from scipy.io import loadmat
from torch.utils.data import Dataset
import json
import nibabel as nib

In [67]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Read in the outcome files: 

In [68]:
os.getcwd()

'/working/lupolab/julia/tcia_analysis/code_updated'

In [69]:
PATH = Path('/working/lupolab/julia/')

In [70]:
outcome1 = pd.read_csv(Path(PATH, 'cnn_analysis/who_subtype/data/raw_csvs/tcga_subtype_data.csv'))
outcome2 = pd.read_table(Path(PATH, 'cnn_analysis/who_subtype/code/09_createSeparateMasterList_TCIAWHO/gbm_tcga_pub2013_clinical_data.tsv'))
outcome1.shape, outcome2.shape

((795, 7), (65, 22))

In [71]:
outcome1 = outcome1.drop(0)
outcome1.head()

Unnamed: 0,track_name,IDH-1P10Q Subtype,IDH/codel subtype,MGMT promoter status,Overall Survival (Months),IDH1,IDH1.1
1,TCGA-P5-A5ET,Codel,IDHmut-codel,Methylated,0.0,,Missense Mutation (putative driver)
2,TCGA-QH-A6CU,Codel,IDHmut-codel,Methylated,0.0,,Missense Mutation (putative driver)
3,TCGA-HT-7467,Codel,IDHmut-codel,Methylated,0.1,,Missense Mutation (putative driver)
4,TCGA-HT-7471,Codel,IDHmut-codel,Methylated,0.1,,Missense Mutation (putative driver)
5,TCGA-HT-7687,Codel,IDHmut-codel,Methylated,0.1,,Missense Mutation (putative driver)


In [72]:
outcome1['IDH/codel subtype'].value_counts()

IDHwt               362
IDHmut-non-codel    263
IDHmut-codel        169
Name: IDH/codel subtype, dtype: int64

In [73]:
169/(169+263+362)

0.2128463476070529

In [74]:
outcome2.head()

Unnamed: 0,Study ID,Patient ID,Sample ID,Diagnosis Age,Cancer Type,Cancer Type Detailed,Disease Free (Months),Disease Free Status,Gene Expression Subtype,Fraction Genome Altered,G-CIMP Methylation,IDH1 Mutation,Methylation Status,MGMT Status,Mutation Count,Oncotree Code,Overall Survival (Months),Overall Survival Status,Number of Samples Per Patient,Sample Type,Sex,therapy
0,gbm_tcga_pub2013,TCGA-02-0006,TCGA-02-0006-01,56.2,Glioma,Glioblastoma Multiforme,9.9,Recurred/Progressed,Mesenchymal,0.2391,non-G-CIMP,WT,CL_2,UNMETHYLATED,,GBM,18.3,DECEASED,1,Primary,Female,"Standard Radiation, TMZ Chemo"
1,gbm_tcga_pub2013,TCGA-02-0011,TCGA-02-0011-01,19.0,Glioma,Glioblastoma Multiforme,4.7,Recurred/Progressed,Proneural,0.2939,non-G-CIMP,WT,CL_6,METHYLATED,,GBM,20.7,DECEASED,1,Primary,Female,"TMZ Chemoradiation, TMZ Chemo"
2,gbm_tcga_pub2013,TCGA-02-0027,TCGA-02-0027-01,33.9,Glioma,Glioblastoma Multiforme,8.5,Recurred/Progressed,Classical,0.1618,non-G-CIMP,WT,CL_1,METHYLATED,,GBM,12.2,DECEASED,1,Primary,Female,"TMZ Chemoradiation, TMZ Chemo"
3,gbm_tcga_pub2013,TCGA-02-0034,TCGA-02-0034-01,60.7,Glioma,Glioblastoma Multiforme,12.7,Recurred/Progressed,Mesenchymal,0.2146,non-G-CIMP,WT,CL_1,UNMETHYLATED,,GBM,14.1,DECEASED,1,Primary,Male,"Standard Radiation, TMZ Chemo"
4,gbm_tcga_pub2013,TCGA-02-0037,TCGA-02-0037-01,74.1,Glioma,Glioblastoma Multiforme,1.2,Recurred/Progressed,Classical,0.1885,non-G-CIMP,WT,CL_3,UNMETHYLATED,,GBM,3.6,DECEASED,1,Primary,Female,Standard Radiation


In [75]:
outcome1.columns =['patient_id', '1p19q', 'idh_codel_subtype', 'mgmt_status', 'OS', 'IDH1_drop', 'idh1_mut']

In [76]:
cols_to_drop = [x for x in list(outcome2.columns) if x not in ['Patient ID', 'IDH1 Mutation', 'MGMT Status', 'Overall Survival (Months)']]

In [77]:
outcome2 = outcome2.drop(cols_to_drop, axis = 1)

In [78]:
outcome2.columns = ['patient_id', 'idh_mut', 'mgmg_status', 'OS']

# Create the final outcome

In [79]:
outcome1.columns

Index(['patient_id', '1p19q', 'idh_codel_subtype', 'mgmt_status', 'OS', 'IDH1_drop', 'idh1_mut'], dtype='object')

In [80]:
outcome1['outcome'], outcome1['cohort'] = ['' for x in outcome1.patient_id], ['' for x in outcome1.patient_id]
outcome2['outcome'], outcome2['cohort'] = ['' for x in outcome2.patient_id], ['TCGA-GBM' for x in outcome2.patient_id]

In [81]:
outcome1 = outcome1.sort_values(by = 'patient_id').reset_index(drop = True)

In [82]:
outcome1.at[:284, 'cohort'] = 'TCGA-GBM'
outcome1.at[284:, 'cohort'] = 'TCGA-LGG'

In [83]:
outcome1['1p19q'].value_counts()

Non-codel    625
Codel        169
Name: 1p19q, dtype: int64

In [84]:
outcome1['idh_codel_subtype'].value_counts()

IDHwt               362
IDHmut-non-codel    263
IDHmut-codel        169
Name: idh_codel_subtype, dtype: int64

In [85]:
for idx, row in outcome1.iterrows():
    if row['cohort'] == 'TCGA-GBM' and row['idh_codel_subtype'] == 'IDHwt': 
        outcome1.at[idx, 'outcome'] = 'gbm_idh_wt'
    elif row['cohort'] == 'TCGA-GBM' and row['idh_codel_subtype'] != 'IDHwt': 
        outcome1.at[idx, 'outcome'] = 'as_idh_mut'
    elif row['cohort'] == 'TCGA-LGG' and row['idh_codel_subtype'] == 'IDHwt': 
        outcome1.at[idx, 'outcome'] = 'gbm_idh_wt'
    elif row['cohort'] == 'TCGA-LGG' and row['idh_codel_subtype'] == 'IDHmut-non-codel': 
        outcome1.at[idx, 'outcome'] = 'as_idh_mut'
    elif row['cohort'] == 'TCGA-LGG' and row['idh_codel_subtype'] == 'IDHmut-codel': 
        outcome1.at[idx, 'outcome'] = 'od_1p19codel'

In [86]:
outcome1.outcome.value_counts()

gbm_idh_wt      362
as_idh_mut      263
od_1p19codel    169
Name: outcome, dtype: int64

In [87]:
outcome1.outcome.isnull().value_counts()

False    794
Name: outcome, dtype: int64

In [88]:
outcome1.outcome.value_counts()

gbm_idh_wt      362
as_idh_mut      263
od_1p19codel    169
Name: outcome, dtype: int64

In [89]:
outcome2.idh_mut.value_counts()

WT       39
R132H     2
Name: idh_mut, dtype: int64

In [90]:
for idx, row in outcome2.iterrows():
    if row['cohort'] == 'TCGA-GBM' and row['idh_mut'] == 'WT': 
        outcome2.at[idx, 'outcome'] = 'gbm_idh_wt'
    elif row['cohort'] == 'TCGA-GBM' and row['idh_mut'] != 'R132H': 
        outcome2.at[idx, 'outcome'] = 'as_idh_mut'
    else: 
        outcome2.at[idx, 'outcome'] = 'gbm_nos'

In [91]:
outcome2.outcome.value_counts()

gbm_idh_wt    39
as_idh_mut    24
gbm_nos        2
Name: outcome, dtype: int64

In [92]:
outcome1.columns

Index(['patient_id', '1p19q', 'idh_codel_subtype', 'mgmt_status', 'OS', 'IDH1_drop', 'idh1_mut', 'outcome', 'cohort'], dtype='object')

In [93]:
outcome2.columns

Index(['patient_id', 'idh_mut', 'mgmg_status', 'OS', 'outcome', 'cohort'], dtype='object')

In [94]:
outcome2['idh_codel_subtype'] = ['IDHwt'  if x == 'WT' else 'IDHmut-non-codel' if x == 'R132H' else 'gbm_nos' for x in list(outcome2.idh_mut)]

In [95]:
outcomes = outcome1[['patient_id', 'outcome', 'cohort', 'idh_codel_subtype']].append(outcome2[['patient_id', 'outcome', 'cohort', 'idh_codel_subtype']])

In [96]:
outcomes.outcome.value_counts()

gbm_idh_wt      401
as_idh_mut      287
od_1p19codel    169
gbm_nos           2
Name: outcome, dtype: int64

In [97]:
outcomes.head()

Unnamed: 0,patient_id,outcome,cohort,idh_codel_subtype
0,TCGA-02-0003,gbm_idh_wt,TCGA-GBM,IDHwt
1,TCGA-02-0033,gbm_idh_wt,TCGA-GBM,IDHwt
2,TCGA-02-0047,gbm_idh_wt,TCGA-GBM,IDHwt
3,TCGA-02-0055,gbm_idh_wt,TCGA-GBM,IDHwt
4,TCGA-02-2470,gbm_idh_wt,TCGA-GBM,IDHwt


In [98]:
outcomes.shape

(859, 4)

# Merge the outcomes with the segmentation map: 

In [99]:
seg_t1c_df = pd.read_csv(Path(PATH, 'tcia_analysis/datasets/segmentation_t1c_df.csv'))
seg_fla_df = pd.read_csv(Path(PATH, 'tcia_analysis/datasets/segmentation_fla_df.csv'))

In [100]:
seg_t1c_df.shape

(226, 5)

In [101]:
seg_fla_df.shape

(224, 5)

In [102]:
seg_df = pd.merge(seg_t1c_df, seg_fla_df, how = 'outer', on = ['patient_id', 'cohort', 'patient_exam'], suffixes =['_t1ca', '_fla'])

In [103]:
seg_w_outcome = pd.merge(seg_df, outcomes, on = ['patient_id'], how = 'left')

In [104]:
seg_w_outcome.shape

(229, 10)

In [105]:
seg_w_outcome.head()

Unnamed: 0,patient_id,seg_file_t1ca,seg_loc_t1ca,cohort_x,patient_exam,seg_file_fla,seg_loc_fla,outcome,cohort_y,idh_codel_subtype
0,TCGA-06-0240,9-AxT1-thin_for_surgery-27598a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,07-02-2005-04728,4-AxFLAIR-thin_for_surgery-47036_TC_shifted.ni...,/working/lupolab/julia/tcia_analysis/nvidia/ev...,,,
1,TCGA-06-0143,11-COR__T1_POST_GD_FLAIR-28783a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,02-11-2005-83875,6-AXIAL_FLAIR-72192_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,gbm_idh_wt,TCGA-GBM,IDHwt
2,TCGA-06-0149,10-COR_T1_POST_GD_FLAIR-88922a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,03-25-2003-87536,5-AXIAL_T2_FLAIR-94682_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,as_idh_mut,TCGA-GBM,gbm_nos
3,TCGA-19-0955,8-MRHG_T1_AX_POST_GAD-01973a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,04-07-2001-NR_MRI_BRAIN_WWO-83151,3-MRHG_FLAIR_AX-38966_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,as_idh_mut,TCGA-GBM,gbm_nos
4,TCGA-06-0648,11-AX_T1_POST_GD_FLAIR-02197a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,01-20-2006-51419,6-AXIAL_FLAIR-80030_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,gbm_idh_wt,TCGA-GBM,IDHwt


In [106]:
seg_w_outcome.outcome.value_counts()

gbm_idh_wt      107
as_idh_mut       81
od_1p19codel     27
gbm_nos           2
Name: outcome, dtype: int64

In [107]:
seg_w_outcome.loc[seg_w_outcome.outcome.isnull()].patient_id

0       TCGA-06-0240
14      TCGA-06-0162
16      TCGA-06-0165
72      TCGA-76-4932
74      TCGA-06-0147
96      TCGA-14-0865
97      TCGA-06-0156
102     TCGA-12-1601
136     TCGA-HT-A61A
147    TCGA-EZ-7264A
164    TCGA-EZ-7265A
203     TCGA-DU-7014
Name: patient_id, dtype: object

## Adding in missing patients: 

In [108]:
missing_patients = pd.read_table(Path(PATH, 'cnn_analysis/who_subtype/data/raw_csvs/missing_patients.tsv'))

In [109]:
missing_patients.head()

Unnamed: 0,Study ID,Patient ID,Sample ID,Absolute Extract Ploidy,Diagnosis Age,ATRX status,BCR Status,BRAF-KIAA1549 fusion,BRAF V600E status,Cancer Type,Cancer Type Detailed,Chr 19/20 co-gain,Chr 7 gain/Chr 10 loss,DAXX status,ESTIMATE combined score,ESTIMATE immune score,ESTIMATE stromal score,Neoplasm Histologic Grade,Neoplasm Histologic Type Name,HM27,HM450,IDH-1P10Q Subtype,IDH/codel subtype,IDH-specific DNA Methylation Cluster,IDH-specific RNA Expression Cluster,IDH status,Karnofsky Performance Score,MGMT promoter status,Mutation Count,Oncotree Code,Original Subtype,Overall Survival (Months),Overall Survival Status,Pan-Glioma DNA Methylation Cluster,Pan-Glioma RNA Expression Cluster,Percent aneuploidy,Absolute Purity,Random Forest Sturm Cluster,RNA-SEQ Data,RPPA,RPPA Cluster,Number of Samples Per Patient,Sex,SNP6,Study,Supervised DNA Methylation Cluster,Telomere length estimate in blood normal (Kb),Telomere length estimate in tumor (Kb),Telomere Maintenance,TERT expression (log2),TERT expression status,TERT promoter status,Tissue Source Site,Transcriptome Subtype,U133a,Whole Exome Sequencing,Whole Genome Sequencing
0,lgggbm_tcga_pub,TCGA-06-0147,TCGA-06-0147-01,,51.0,,IGC,,,Glioma,Diffuse Glioma,No chr 19/20 gain,No combined CNA,,,,,G4,glioblastoma,Yes,No,Non-codel,IDHwt,IDHwt-K2,,WT,,Methylated,,DIFG,Mesenchymal,17.8,DECEASED,LGm5,LGr4,0.144902,,,No,No,,1,Female,Yes,Glioblastoma multiforme,Mesenchymal-like,,,,,,,Henry Ford Hospital,ME,Yes,No,No
1,lgggbm_tcga_pub,TCGA-06-0156,TCGA-06-0156-01,,57.0,,IGC,WT,,Glioma,Diffuse Glioma,,,,3897.975,2334.814,1563.161,G4,glioblastoma,No,No,,,,IDHmut-R3,Mutant,,,,DIFG,Proneural,5.8,DECEASED,,LGr4,,,,Yes,Yes,K1,1,Male,No,Glioblastoma multiforme,,,,,4.906891,Expressed,,Henry Ford Hospital,ME,Yes,Yes,No
2,lgggbm_tcga_pub,TCGA-06-0162,TCGA-06-0162-01,1.85,47.0,,IGC,,,Glioma,Diffuse Glioma,No chr 19/20 gain,No combined CNA,,,,,G4,glioblastoma,No,No,Non-codel,,,,,60.0,,,DIFG,Neural,3.4,DECEASED,,LGr4,0.386795,0.67,,No,No,,1,Female,Yes,Glioblastoma multiforme,,,,,,,,Henry Ford Hospital,NE,Yes,No,No
3,lgggbm_tcga_pub,TCGA-06-0165,TCGA-06-0165-01,,52.0,WT,IGC,,WT,Glioma,Diffuse Glioma,No chr 19/20 gain,No combined CNA,WT,,,,G4,glioblastoma,No,No,Non-codel,IDHwt,,,WT,90.0,,,DIFG,,10.6,DECEASED,,,,,,No,No,,1,Male,Yes,Glioblastoma multiforme,,,,,,,,Henry Ford Hospital,,No,Yes,No
4,lgggbm_tcga_pub,TCGA-06-0240,TCGA-06-0240-01,,57.0,WT,IGC,,WT,Glioma,Diffuse Glioma,No chr 19/20 gain,No combined CNA,WT,,,,G4,glioblastoma,No,No,Non-codel,IDHwt,,,WT,90.0,,,DIFG,,20.4,DECEASED,,LGr2,,,,No,No,,1,Male,Yes,Glioblastoma multiforme,,,,,,,,Henry Ford Hospital,NE,Yes,Yes,No


In [110]:
cols_to_drop = [x for x in list(missing_patients.columns) if x not in ['Patient ID', 'IDH1 Mutation', 'IDH-1P10Q Subtype', 'IDH/codel subtype','IDH status', 'MGMT Status', 'Overall Survival (Months)']]

In [111]:
missing_patients = missing_patients.drop(cols_to_drop, axis = 1)

In [112]:
missing_patients

Unnamed: 0,Patient ID,IDH-1P10Q Subtype,IDH/codel subtype,IDH status,Overall Survival (Months)
0,TCGA-06-0147,Non-codel,IDHwt,WT,17.8
1,TCGA-06-0156,,,Mutant,5.8
2,TCGA-06-0162,Non-codel,,,3.4
3,TCGA-06-0165,Non-codel,IDHwt,WT,10.6
4,TCGA-06-0240,Non-codel,IDHwt,WT,20.4
5,TCGA-12-1601,,,WT,
6,TCGA-14-0865,Non-codel,IDHwt,WT,16.5
7,TCGA-76-4932,,,WT,47.9
8,TCGA-DU-7014,Non-codel,,,117.3
9,TCGA-EZ-7264,Codel,IDHmut-codel,Mutant,14.3


In [113]:
missing_patients.columns =['patient_id', '1p19q', 'idh_codel_subtype', 'idh_status', 'OS']

In [114]:
missing_patients['outcome'],  missing_patients['cohort'] = ['' for x in missing_patients.patient_id], ['' for x in missing_patients.patient_id]
missing_patients.at[:8, 'cohort'] = 'TCGA-GBM'
missing_patients.at[8:, 'cohort'] = 'TCGA-LGG'

In [115]:
missing_patients

Unnamed: 0,patient_id,1p19q,idh_codel_subtype,idh_status,OS,outcome,cohort
0,TCGA-06-0147,Non-codel,IDHwt,WT,17.8,,TCGA-GBM
1,TCGA-06-0156,,,Mutant,5.8,,TCGA-GBM
2,TCGA-06-0162,Non-codel,,,3.4,,TCGA-GBM
3,TCGA-06-0165,Non-codel,IDHwt,WT,10.6,,TCGA-GBM
4,TCGA-06-0240,Non-codel,IDHwt,WT,20.4,,TCGA-GBM
5,TCGA-12-1601,,,WT,,,TCGA-GBM
6,TCGA-14-0865,Non-codel,IDHwt,WT,16.5,,TCGA-GBM
7,TCGA-76-4932,,,WT,47.9,,TCGA-GBM
8,TCGA-DU-7014,Non-codel,,,117.3,,TCGA-LGG
9,TCGA-EZ-7264,Codel,IDHmut-codel,Mutant,14.3,,TCGA-LGG


In [116]:
for idx, row in missing_patients.iterrows():
    if row['cohort'] == 'TCGA-GBM' and row['idh_codel_subtype'] == 'IDHwt': 
        missing_patients.at[idx, 'outcome'] = 'gbm_idh_wt'
    elif row['cohort'] == 'TCGA-GBM' and row['idh_codel_subtype'] != 'IDHwt': 
        missing_patients.at[idx, 'outcome'] = 'gbm_idh_mut'
    elif row['cohort'] == 'TCGA-LGG' and row['idh_codel_subtype'] == 'IDHwt': 
        missing_patients.at[idx, 'outcome'] = 'as_idh_wt'
    elif row['cohort'] == 'TCGA-LGG' and row['idh_codel_subtype'] == 'IDHmut-non-codel': 
        missing_patients.at[idx, 'outcome'] = 'as_idh_mut'
    elif row['cohort'] == 'TCGA-LGG' and row['idh_codel_subtype'] == 'IDHmut-codel': 
        missing_patients.at[idx, 'outcome'] = 'od_1p19codel'

In [117]:
missing_patients

Unnamed: 0,patient_id,1p19q,idh_codel_subtype,idh_status,OS,outcome,cohort
0,TCGA-06-0147,Non-codel,IDHwt,WT,17.8,gbm_idh_wt,TCGA-GBM
1,TCGA-06-0156,,,Mutant,5.8,gbm_idh_mut,TCGA-GBM
2,TCGA-06-0162,Non-codel,,,3.4,gbm_idh_mut,TCGA-GBM
3,TCGA-06-0165,Non-codel,IDHwt,WT,10.6,gbm_idh_wt,TCGA-GBM
4,TCGA-06-0240,Non-codel,IDHwt,WT,20.4,gbm_idh_wt,TCGA-GBM
5,TCGA-12-1601,,,WT,,gbm_idh_mut,TCGA-GBM
6,TCGA-14-0865,Non-codel,IDHwt,WT,16.5,gbm_idh_wt,TCGA-GBM
7,TCGA-76-4932,,,WT,47.9,gbm_idh_mut,TCGA-GBM
8,TCGA-DU-7014,Non-codel,,,117.3,,TCGA-LGG
9,TCGA-EZ-7264,Codel,IDHmut-codel,Mutant,14.3,od_1p19codel,TCGA-LGG


In [118]:
outcomes.columns

Index(['patient_id', 'outcome', 'cohort', 'idh_codel_subtype'], dtype='object')

In [119]:
outcomes = outcomes.append(missing_patients[['patient_id', 'outcome', 'cohort', 'idh_codel_subtype']])

In [120]:
seg_w_outcome = pd.merge(seg_df, outcomes, on = ['patient_id', 'cohort'], how = 'left')

In [121]:
seg_w_outcome.head()

Unnamed: 0,patient_id,seg_file_t1ca,seg_loc_t1ca,cohort,patient_exam,seg_file_fla,seg_loc_fla,outcome,idh_codel_subtype
0,TCGA-06-0240,9-AxT1-thin_for_surgery-27598a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,07-02-2005-04728,4-AxFLAIR-thin_for_surgery-47036_TC_shifted.ni...,/working/lupolab/julia/tcia_analysis/nvidia/ev...,gbm_idh_wt,IDHwt
1,TCGA-06-0143,11-COR__T1_POST_GD_FLAIR-28783a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,02-11-2005-83875,6-AXIAL_FLAIR-72192_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,gbm_idh_wt,IDHwt
2,TCGA-06-0149,10-COR_T1_POST_GD_FLAIR-88922a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,03-25-2003-87536,5-AXIAL_T2_FLAIR-94682_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,as_idh_mut,gbm_nos
3,TCGA-19-0955,8-MRHG_T1_AX_POST_GAD-01973a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,04-07-2001-NR_MRI_BRAIN_WWO-83151,3-MRHG_FLAIR_AX-38966_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,as_idh_mut,gbm_nos
4,TCGA-06-0648,11-AX_T1_POST_GD_FLAIR-02197a_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,TCGA-GBM,01-20-2006-51419,6-AXIAL_FLAIR-80030_TC_shifted.nii.gz,/working/lupolab/julia/tcia_analysis/nvidia/ev...,gbm_idh_wt,IDHwt


In [122]:
seg_w_outcome.outcome.isnull().value_counts()

False    227
True       2
Name: outcome, dtype: int64

In [123]:
seg_w_outcome.outcome.value_counts()

gbm_idh_wt      111
as_idh_mut       82
od_1p19codel     27
gbm_idh_mut       4
gbm_nos           2
                  1
Name: outcome, dtype: int64

In [124]:
seg_df.cohort.value_counts()

TCGA-GBM    122
TCGA-LGG    107
Name: cohort, dtype: int64

In [125]:
seg_w_outcome.loc[seg_w_outcome.cohort == 'TCGA-LGG'].idh_codel_subtype.value_counts()

IDHmut-non-codel    57
IDHmut-codel        27
IDHwt               20
Name: idh_codel_subtype, dtype: int64

In [128]:
seg_w_outcome.seg_loc_t1ca[0]

'/working/lupolab/julia/tcia_analysis/nvidia/eval_t1c/9-AxT1-thin_for_surgery-27598a_t1ca'

In [129]:
seg_w_outcome.to_csv(Path(PATH, 'cnn_analysis/who_subtype/data/processed_csvs/seg_map_with_outcome.csv'), index = False)