In [2]:
import sys
sys.path.append('../')

from survival.evaluate import evaluate_survival_per_subtype

import lifelines

import pandas as pd
import numpy as np

In [8]:
def run_experiment(data_df, metadata_df):
    
    # calculate three molecular subtypes based on IDH and 1p/19q binary labels
    metadata_df['subtype'] = metadata_df['idh'] + metadata_df['1p19q']
    metadata_df['subtype'] = metadata_df['subtype'].replace({-1:'Unknown', 0:'IDH-Wildtype', 
                                                             1:'IDH-Mutant, 1p/19q Intact', 2:'1p/19q Co-deleted'})
    # subtype dataframe
    subtype_info = metadata_df.set_index('tciaID')['subtype']
    
    # split training / validation data
    train_idxs = metadata_df.loc[metadata_df['phase'] == 'train']['tciaID'].tolist()
    val_idxs = metadata_df.loc[metadata_df['phase'] == 'val']['tciaID'].tolist()

    # get survival labels
    survival_df = metadata_df.loc[metadata_df['tciaID'].isin(train_idxs + val_idxs)].set_index('tciaID')
    survival_df = survival_df[['OS', 'OS_EVENT']]

    # join data + survival labels
    df = data_df.join(survival_df)

    # divide data by training splits
    train = df[df.index.isin(train_idxs)]
    valid = df[df.index.isin(val_idxs)]

    # train linear cox proportional hazard model
    cph = lifelines.CoxPHFitter(penalizer=0.1)
    cph.fit(train, duration_col='OS', event_col='OS_EVENT')

    # get C-Index scores
    valid_performance = evaluate_survival_per_subtype(valid["OS"], 
                                                      - cph.predict_partial_hazard(valid), 
                                                      valid["OS_EVENT"], 
                                                      valid.join(subtype_info)["subtype"])
    
    return valid_performance

In [9]:
# load embedding from MTL model trained with 1-channel FLAIR MR input along with SCNA input
embedding_csvfile = '../data/mtl_embedding_from_flair_scna.csv'
embedding_df = pd.read_csv(embedding_csvfile, index_col=0)

# metadata and that includes training/val split and survival labels (and events)
glioma_metadata_df = pd.read_csv('../data/glioma_metadata.csv', index_col=0)

# calculate survival C-Index over each IDH and 1p/19q defined subtype
run_experiment(data_df=embedding_df, metadata_df=glioma_metadata_df)

{'1p/19q Co-deleted': {'c-index': 0.84, 'n': 14},
 'IDH-Mutant, 1p/19q Intact': {'c-index': 0.7868852459016393, 'n': 18},
 'IDH-Wildtype': {'c-index': 0.6814516129032258, 'n': 27},
 'Unknown': {'c-index': 0.5416666666666666, 'n': 16},
 'all': {'c-index': 0.7289088863892014, 'n': 75}}