In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from engines.datasets.base_datasets import SurvivalGEDataset
from engines.hp_dict.base import HP_dict
from engines.models import cox_models
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from datetime import datetime
import re 
stamp = datetime.now()
hash1 = sum([int(i) for i in re.sub(r'[^\w\s]','', str(stamp)).replace(" ", "")]) 
from engines.models import functions
plt.rcParams["svg.fonttype"] = "none"


In [2]:
### import some basic general arguments
from experiments.parsers import parse_arguments


## Data
The data used to train the network is described here. We use 
* **mutation profiles** : is described by presence/absence of NPM1 mutation, FLT3-ITD mutation and IDH1-R32 mutation.
* **transcriptomic profile** : is descr. by gene expression of the protein coding genes with selection/projection-based dimensionality reduction. (to up to 17 components.)    
* **cytogenetic profile** : is descr. by multiple (18) cytogenetic abnormalities or groups.
    
* **age** (described by age > 60 years boolean), **sex** (is patient female? bool)

In [3]:
## Clinical FEATURES 
mutations = ["NPM1 mutation", "FLT3-ITD mutation", "IDH1-R132 mutation"]
age_sex = ["Sex_F","Age_gt_60"]
cytogenetics = ['MLL translocations (+MLL FISH positive) (Irrespective of additional cytogenetic abnormalities)',
       'Intermediate abnormal karyotype (except isolated trisomy/tetrasomy 8)',
       'Normal karyotype',
       'Complex (3 and more chromosomal abnormalities)',
       'Trisomy/tetrasomy 8 (isolated)',
       'Monosomy 5/ 5q-/Monosomy 7/ 7q- (less than 3 chromosomal abnormalities)',
       'NUP98-NSD1(normal karyotype)',
       't(8;21)(q22;q22)/RUNX1-RUNX1T1 (Irrespective of additional cytogenetic abnormalities)',
       'inv(16)(p13.1q22)/t(16;16)(p13.1;q22)/CBFB-MYH11 (Irrespective of additional cytogenetic abnormalities)',
       'EVI1 rearrangements (+EVI1 FISH positive) (Irrespective of additional cytogenetic abnormalities)',
       't(6;9)(p23;q34) (Irrespective of additional cytogenetic abnormalities)',
       'Monosomy17/del17p (less than 3 chromosomal abnormalities)',
       'Hyperdiploid numerical abnormalities only']
clinical_features = np.concatenate([mutations, cytogenetics, age_sex])

In [4]:
SGE = SurvivalGEDataset()
SGE.get_data("lgn_pronostic")
## cohort ## input_types ## other params
## data{x: input_data, y: target} 
clin_factors = SGE.new(clinical_features, gene_expressions="None")
clin_factors_lsc17 = SGE.new(clinical_features, gene_expressions="LSC17")
clin_factors_pca = SGE.new(clinical_features, gene_expressions="PCA")

pca_only = SGE.new(None, gene_expressions = "PCA")
lsc17_only = SGE.new(None, gene_expressions = "LSC17")
lsc17_pca =  SGE.new(None, gene_expressions = "LSC17+PCA")

clin_factors_lsc17_pca = SGE.new(clinical_features, gene_expressions="LSC17+PCA")

Loading and assembling Gene Repertoire...
Loading ClinF lgn_pronostic file ...
removed 414 genes with null expression across samples 
Now datataset hase shape (300, 19183)


In [5]:
data = SGE.data["CF_bin"].merge(SGE.data["LSC17"].x, left_index = True, right_index = True)

In [6]:
clin_factors_copy = clin_factors.clone()
print( "Is the cloned copy equal in values ? : ", np.all(clin_factors.x == clin_factors_copy.x))
clin_factors.split_train_test(5)
clin_factors_copy.split_train_test(5)


Is the cloned copy equal in values ? :  True


In [7]:
# splitting different copies of the cloned set will shuffle the data internally 
n = clin_factors.folds[0].train.x.index.isin(clin_factors_copy.folds[0].train.x.index).sum()

print (f"The number of common samples between two shuffles: {n}")

The number of common samples between two shuffles: 192


## Example
### Parameters, preprocessing, splitting


In [8]:
# Set general parameters
HyperParams = HP_dict(wd = 1e-3, nepochs = 200,  bootstr_n = 1000, nfolds = 5)
data = clin_factors.clone()
# preprocess data (remove low variance columns)
data.x = data.x[data.x.columns[np.where(data.x.var(0) > 0.01)]]
# splitting
data.split_train_test(HyperParams.nfolds)
# generate model parameters 
params = HyperParams.generate_default(model_type = "cphdnn_2l", data = data)



### Launch training

In [9]:
# c_index_metrics, c_scores, surv_tbl, params= cox_models.evaluate(data, params, pca_n = None)
        

## Figures 
#### Data generation

In [10]:
# Set general parameters
HyperParams = HP_dict(wd = 1e-3, nepochs = 200,  bootstr_n = 1000, nfolds = 5)
CDS = SGE.get_data("lgn_pronostic")["CDS"]
cyt = pd.DataFrame(SGE.data["CF"]["Cytogenetic risk"])
cyt_levels = [{"intermediate cytogenetics":1, "Intermediate/Normal":1, "adverse cytogenetics": 2, "favorable cytogenetics":0, "Favorable":0, "Standard":1, "Low":0, "Poor":2, None: 1}[level] for level in cyt["Cytogenetic risk"]] 
cyt["pred_risk"] = cyt_levels
cyt_c_scores, cyt_metrics = functions.compute_cyto_risk_c_index(cyt["pred_risk"], CDS.y, gamma = 0.001, n = HyperParams.bootstr_n)
print("C index method 1: ", cyt_metrics)

results = [(1, "c_index", "cytogenetics", cyt_metrics[0], cyt_metrics[1], cyt_metrics[2] )]


Loading ClinF lgn_pronostic file ...
removed 414 genes with null expression across samples 
Now datataset hase shape (300, 19183)


100%|██████████| 1000/1000 [00:00<00:00, 1152.60it/s]

C index method 1:  (0.6307777804624641, 0.614709933070771, 0.6474255200908497)





In [11]:

for repn in range(1,4,1):        
    for model_type in ["ridge_cph_lifelines_CF", "cphdnn_2l"]:
        data = clin_factors.clone()
        # preprocess data (remove low variance columns)
        data.x = data.x[data.x.columns[np.where(data.x.var(0) > 0.01)]]
        # splitting
        data.split_train_test(HyperParams.nfolds)
        # generate model parameters 
        params = HyperParams.generate_default(model_type = model_type, data = data)
        # train and evaluate model
        c_index_metrics, c_scores, surv_tbl, params= cox_models.evaluate(data, params, pca_params = None)
        # append to results
        results.append((repn, params["modeltype"], data.name, c_index_metrics[0], c_index_metrics[1], c_index_metrics[2] ))

    for model_type in ["ridge_cph_lifelines_CF_LSC17", "cphdnn_5l"]:
        data = clin_factors_lsc17.clone()
        # preprocess data (remove low variance columns)
        data.x = data.x[data.x.columns[np.where(data.x.var(0) > 0.01)]]
        
        # splitting
        data.split_train_test(HyperParams.nfolds)
        # generate model parameters 
        params = HyperParams.generate_default(model_type = model_type, data = data)
        # train and evaluate model
        c_index_metrics, c_scores, surv_tbl, params= cox_models.evaluate(data, params, pca_params = None)
        # append to results
        results.append((repn, params["modeltype"], data.name , c_index_metrics[0], c_index_metrics[1], c_index_metrics[2] ))

    for model_type in ["ridge_cph_lifelines_CF_PCA", "cphdnn_1l"]:
        data = clin_factors_pca.clone()
        # preprocess data 
        data.x = data.x[data.x.columns[np.where(data.x.var(0) > 0.01)]]
            # splitting
        data.split_train_test(HyperParams.nfolds)
        # generate model parameters 
        params = HyperParams.generate_default(model_type = model_type, data = data)
        pca_params = {"min_col": 16, "max_col": data.x.shape[1], "pca_n": 17 }
        # train and evaluate model
        c_index_metrics, c_scores, surv_tbl, params= cox_models.evaluate(data, params, pca_params = pca_params)
        # append to results
        results.append((repn, params["modeltype"], data.name , c_index_metrics[0], c_index_metrics[1], c_index_metrics[2] ))
        
    for model_type in ["ridge_cph_lifelines_CF_PCA", "cphdnn_1l"]:
        data = clin_factors_lsc17_pca.clone()
        # preprocess data 
        var = data.x.var(0)
        data.x = data.x[data.x.columns[np.where( var > 0.01)]]
        # splitting
        data.split_train_test(HyperParams.nfolds)
        # generate model parameters 
        params = HyperParams.generate_default(model_type = model_type, data = data)
        pca_params = {"min_col": 33, "max_col": data.x.shape[1], "pca_n": 17 }
        # train and evaluate model
        c_index_metrics, c_scores, surv_tbl, params= cox_models.evaluate(data, params, pca_params = pca_params)
        # append to results
        results.append((repn, params["modeltype"], data.name , c_index_metrics[0], c_index_metrics[1], c_index_metrics[2] ))
    

    for model_type in ["ridge_cph_lifelines_CF_PCA", "cphdnn_1l"]:
        data = lsc17_pca.clone()
        # preprocess data 
        var = data.x.var(0)
        data.x = data.x[data.x.columns[np.where( var > 0.01)]]
        # splitting
        data.split_train_test(HyperParams.nfolds)
        # generate model parameters 
        params = HyperParams.generate_default(model_type = model_type, data = data)
        pca_params = {"min_col": 17, "max_col": data.x.shape[1], "pca_n": 17 }
        # train and evaluate model
        c_index_metrics, c_scores, surv_tbl, params= cox_models.evaluate(data, params, pca_params = pca_params)
        # append to results
        results.append((repn, params["modeltype"], data.name , c_index_metrics[0], c_index_metrics[1], c_index_metrics[2] ))


    for model_type in ["ridge_cph_lifelines_CF_PCA", "cphdnn_2l"]:
        data = pca_only.clone()
        # preprocess data 
        var = data.x.var(0)
        data.x = data.x[data.x.columns[np.where( var > np.median(var))]]
        # splitting
        data.split_train_test(HyperParams.nfolds)
        # generate model parameters 
        params = HyperParams.generate_default(model_type = model_type, data = data)
        pca_params = {"min_col": 0, "max_col": data.x.shape[1], "pca_n": 30 }
        # train and evaluate model
        c_index_metrics, c_scores, surv_tbl, params= cox_models.evaluate(data, params, pca_params = pca_params)
        # append to results
        results.append((repn, params["modeltype"], data.name , c_index_metrics[0], c_index_metrics[1], c_index_metrics[2] ))
    
    for model_type in ["ridge_cph_lifelines_LSC17", "cphdnn_1l"]:
        data = lsc17_only.clone()
        # preprocess data 
        var = data.x.var(0)
        # splitting
        data.split_train_test(HyperParams.nfolds)
        # generate model parameters 
        params = HyperParams.generate_default(model_type = model_type, data = data)

        # train and evaluate model
        c_index_metrics, c_scores, surv_tbl, params= cox_models.evaluate(data, params, pca_params = None)
        # append to results
        results.append((repn, params["modeltype"], data.name , c_index_metrics[0], c_index_metrics[1], c_index_metrics[2] ))
    

clin. factors + None; ridge_cph_lifelines, INsize: 15: 100%|██████████| 5/5 [00:00<00:00,  9.79it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 926.00it/s]


training c indices:  [0.68 0.69 0.69 0.68 0.69]
valid c indices (aggregated):  (0.6769292841235619, 0.6459959026770711, 0.7062959102038837)
Setting up stack... saving to GPU


clin. factors + None; CPHDNN, INsize: 15: 100%|██████████| 5/5 [00:31<00:00,  6.26s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 973.76it/s]


training c indices:  [0.69 0.74 0.76 0.75 0.76]
valid c indices (aggregated):  (0.7032435367957353, 0.6741527446300716, 0.7290453524053446)


clin. factors + LSC17; ridge_cph_lifelines, INsize: 31: 100%|██████████| 5/5 [00:00<00:00,  7.42it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 916.58it/s]


training c indices:  [0.75 0.75 0.76 0.76 0.73]
valid c indices (aggregated):  (0.6911130294796051, 0.6651446336191194, 0.7163660912334844)
Setting up stack... saving to GPU


clin. factors + LSC17; CPHDNN, INsize: 31: 100%|██████████| 5/5 [00:31<00:00,  6.28s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 962.11it/s]


training c indices:  [0.88 0.95 0.99 0.99 1.  ]
valid c indices (aggregated):  (0.8150142461792246, 0.7957047776925551, 0.8331527372090832)


clin. factors + PCA; ridge_cph_lifelines, INsize: 32: 100%|██████████| 5/5 [00:04<00:00,  1.05it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 927.89it/s]


training c indices:  [0.76 0.75 0.76 0.76 0.74]
valid c indices (aggregated):  (0.6902431416035333, 0.6569504636749149, 0.7194807092751364)
Setting up stack... saving to GPU


clin. factors + PCA; CPHDNN, INsize: 32: 100%|██████████| 5/5 [00:34<00:00,  6.87s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 941.98it/s]


training c indices:  [0.69 0.76 0.74 0.77 0.8 ]
valid c indices (aggregated):  (0.6736273935275863, 0.6441701360991405, 0.7033571957541348)


clin. factors + LSC17+PCA; ridge_cph_lifelines, INsize: 49: 100%|██████████| 5/5 [00:05<00:00,  1.18s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 898.92it/s]


training c indices:  [0.79 0.77 0.77 0.77 0.78]
valid c indices (aggregated):  (0.6805750778052175, 0.6511038114100768, 0.7111320435717219)
Setting up stack... saving to GPU


clin. factors + LSC17+PCA; CPHDNN, INsize: 49: 100%|██████████| 5/5 [00:34<00:00,  6.88s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 955.78it/s]


training c indices:  [0.69 0.73 0.78 0.8  0.81]
valid c indices (aggregated):  (0.6741603069184915, 0.6438343372087594, 0.7063003896390325)


LSC17+PCA; ridge_cph_lifelines, INsize: 33: 100%|██████████| 5/5 [00:03<00:00,  1.26it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 907.98it/s]


training c indices:  [0.75 0.74 0.73 0.73 0.74]
valid c indices (aggregated):  (0.6743000401891106, 0.6438694638694639, 0.7046983136757968)
Setting up stack... saving to GPU


LSC17+PCA; CPHDNN, INsize: 33: 100%|██████████| 5/5 [00:33<00:00,  6.72s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 941.45it/s]


training c indices:  [0.66 0.71 0.75 0.76 0.78]
valid c indices (aggregated):  (0.6464639008756747, 0.6167180001432562, 0.6792541600633915)


PCA; ridge_cph_lifelines, INsize: 30: 100%|██████████| 5/5 [00:03<00:00,  1.56it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 921.28it/s]


training c indices:  [0.73 0.73 0.75 0.73 0.74]
valid c indices (aggregated):  (0.6891004325172403, 0.6600601747934476, 0.7187363517251419)
Setting up stack... saving to GPU


PCA; CPHDNN, INsize: 30: 100%|██████████| 5/5 [00:33<00:00,  6.71s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 970.19it/s]


training c indices:  [0.87 0.86 0.88 0.88 0.88]
valid c indices (aggregated):  (0.4869574428212221, 0.45424138912610307, 0.520169696969697)


LSC17; ridge_cph_lifelines, INsize: 17: 100%|██████████| 5/5 [00:00<00:00, 11.31it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 930.03it/s]


training c indices:  [0.71 0.7  0.7  0.7  0.68]
valid c indices (aggregated):  (0.6648717846865357, 0.6334223949477775, 0.694830502242812)
Setting up stack... saving to GPU


LSC17; CPHDNN, INsize: 17: 100%|██████████| 5/5 [00:30<00:00,  6.12s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 971.14it/s]


training c indices:  [0.68 0.71 0.7  0.71 0.71]
valid c indices (aggregated):  (0.6635571425865289, 0.6346454085253347, 0.6928317888367439)


clin. factors + None; ridge_cph_lifelines, INsize: 15: 100%|██████████| 5/5 [00:00<00:00, 11.30it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 934.01it/s]


training c indices:  [0.68 0.68 0.69 0.68 0.69]
valid c indices (aggregated):  (0.6858592647656019, 0.6556680591942222, 0.7136348376576663)
Setting up stack... saving to GPU


clin. factors + None; CPHDNN, INsize: 15: 100%|██████████| 5/5 [00:30<00:00,  6.17s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 981.36it/s]


training c indices:  [0.71 0.74 0.75 0.75 0.75]
valid c indices (aggregated):  (0.7013456557992084, 0.6731736526946108, 0.7278375325376946)


clin. factors + LSC17; ridge_cph_lifelines, INsize: 31: 100%|██████████| 5/5 [00:00<00:00,  7.04it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 933.07it/s]


training c indices:  [0.76 0.73 0.75 0.76 0.74]
valid c indices (aggregated):  (0.6840753782567348, 0.6531213629497561, 0.711270702037227)
Setting up stack... saving to GPU


clin. factors + LSC17; CPHDNN, INsize: 31: 100%|██████████| 5/5 [00:31<00:00,  6.33s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 972.76it/s]


training c indices:  [0.86 0.96 0.96 0.98 0.99]
valid c indices (aggregated):  (0.8043061501393544, 0.7823457394711067, 0.8250805249747608)


clin. factors + PCA; ridge_cph_lifelines, INsize: 32: 100%|██████████| 5/5 [00:05<00:00,  1.06s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 901.95it/s]


training c indices:  [0.76 0.76 0.76 0.74 0.74]
valid c indices (aggregated):  (0.6894781574136258, 0.6600200592224663, 0.7195720017833259)
Setting up stack... saving to GPU


clin. factors + PCA; CPHDNN, INsize: 32: 100%|██████████| 5/5 [00:34<00:00,  6.89s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 963.11it/s]


training c indices:  [0.72 0.73 0.74 0.77 0.79]
valid c indices (aggregated):  (0.6719920712293583, 0.6441770205287183, 0.6993309340354098)


clin. factors + LSC17+PCA; ridge_cph_lifelines, INsize: 49: 100%|██████████| 5/5 [00:06<00:00,  1.20s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 919.95it/s]


training c indices:  [0.77 0.79 0.77 0.78 0.76]
valid c indices (aggregated):  (0.6820204493870783, 0.6535525543159131, 0.71242185376461)
Setting up stack... saving to GPU


clin. factors + LSC17+PCA; CPHDNN, INsize: 49: 100%|██████████| 5/5 [00:33<00:00,  6.74s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 947.78it/s]


training c indices:  [0.76 0.79 0.8  0.81 0.82]
valid c indices (aggregated):  (0.6827095399304566, 0.6496758273659764, 0.7124652399951639)


LSC17+PCA; ridge_cph_lifelines, INsize: 33: 100%|██████████| 5/5 [00:04<00:00,  1.20it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 895.66it/s]


training c indices:  [0.74 0.73 0.74 0.73 0.73]
valid c indices (aggregated):  (0.6767888370247255, 0.6462201988738469, 0.7061311311311311)
Setting up stack... saving to GPU


LSC17+PCA; CPHDNN, INsize: 33: 100%|██████████| 5/5 [00:34<00:00,  6.92s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 952.64it/s]


training c indices:  [0.7  0.74 0.78 0.81 0.79]
valid c indices (aggregated):  (0.6729366724044475, 0.6400765022030698, 0.7024422377827116)


PCA; ridge_cph_lifelines, INsize: 30: 100%|██████████| 5/5 [00:02<00:00,  1.68it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 935.61it/s]


training c indices:  [0.74 0.73 0.74 0.73 0.73]
valid c indices (aggregated):  (0.690996320676861, 0.6613202321604894, 0.718564209274673)
Setting up stack... saving to GPU


PCA; CPHDNN, INsize: 30: 100%|██████████| 5/5 [00:32<00:00,  6.56s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 957.84it/s]


training c indices:  [0.87 0.89 0.89 0.88 0.86]
valid c indices (aggregated):  (0.48614332468596355, 0.4561399276236429, 0.52118287552586)


LSC17; ridge_cph_lifelines, INsize: 17: 100%|██████████| 5/5 [00:00<00:00, 11.58it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 945.37it/s]


training c indices:  [0.7  0.69 0.72 0.69 0.69]
valid c indices (aggregated):  (0.6707894306018893, 0.6415023030589347, 0.700075559997075)
Setting up stack... saving to GPU


LSC17; CPHDNN, INsize: 17: 100%|██████████| 5/5 [00:30<00:00,  6.06s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 980.92it/s]


training c indices:  [0.67 0.71 0.7  0.72 0.73]
valid c indices (aggregated):  (0.6451274097262777, 0.615023023791251, 0.6764003058688587)


clin. factors + None; ridge_cph_lifelines, INsize: 15: 100%|██████████| 5/5 [00:00<00:00, 11.11it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 948.67it/s]


training c indices:  [0.67 0.7  0.68 0.7  0.69]
valid c indices (aggregated):  (0.6754585036082381, 0.6430436847103513, 0.7062085331008765)
Setting up stack... saving to GPU


clin. factors + None; CPHDNN, INsize: 15: 100%|██████████| 5/5 [00:30<00:00,  6.19s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 989.04it/s]


training c indices:  [0.7  0.73 0.73 0.75 0.77]
valid c indices (aggregated):  (0.6949017849102501, 0.6631941170924861, 0.7241119404799533)


clin. factors + LSC17; ridge_cph_lifelines, INsize: 31: 100%|██████████| 5/5 [00:00<00:00,  7.92it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 939.88it/s]


training c indices:  [0.75 0.75 0.74 0.74 0.75]
valid c indices (aggregated):  (0.6961817305830291, 0.6678931951874895, 0.7242721774687202)
Setting up stack... saving to GPU


clin. factors + LSC17; CPHDNN, INsize: 31: 100%|██████████| 5/5 [00:30<00:00,  6.13s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 985.13it/s]


training c indices:  [0.86 0.96 0.96 0.98 0.99]
valid c indices (aggregated):  (0.8052806719830596, 0.7821492049767017, 0.8278680279635869)


clin. factors + PCA; ridge_cph_lifelines, INsize: 32: 100%|██████████| 5/5 [00:03<00:00,  1.31it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 910.08it/s]


training c indices:  [0.75 0.76 0.76 0.73 0.75]
valid c indices (aggregated):  (0.7003287926023958, 0.6710954513200714, 0.7281292704562345)
Setting up stack... saving to GPU


clin. factors + PCA; CPHDNN, INsize: 32: 100%|██████████| 5/5 [00:33<00:00,  6.64s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 954.70it/s]


training c indices:  [0.65 0.65 0.74 0.72 0.77]
valid c indices (aggregated):  (0.6159812494442587, 0.5839863228380111, 0.6470092873297724)


clin. factors + LSC17+PCA; ridge_cph_lifelines, INsize: 49: 100%|██████████| 5/5 [00:04<00:00,  1.03it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 926.04it/s]


training c indices:  [0.77 0.79 0.78 0.76 0.78]
valid c indices (aggregated):  (0.671938263504827, 0.6414055786515549, 0.7004952306603889)
Setting up stack... saving to GPU


clin. factors + LSC17+PCA; CPHDNN, INsize: 49: 100%|██████████| 5/5 [00:33<00:00,  6.77s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 952.59it/s]


training c indices:  [0.76 0.79 0.82 0.8  0.82]
valid c indices (aggregated):  (0.685370785800238, 0.65457101331872, 0.7158520766538584)


LSC17+PCA; ridge_cph_lifelines, INsize: 33: 100%|██████████| 5/5 [00:04<00:00,  1.18it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 895.59it/s]


training c indices:  [0.76 0.73 0.74 0.72 0.74]
valid c indices (aggregated):  (0.657610722126829, 0.6250461493022226, 0.6897264554588721)
Setting up stack... saving to GPU


LSC17+PCA; CPHDNN, INsize: 33: 100%|██████████| 5/5 [00:33<00:00,  6.69s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 946.90it/s]


training c indices:  [0.75 0.74 0.77 0.79 0.8 ]
valid c indices (aggregated):  (0.6757720501656775, 0.6416852637516247, 0.7048455987039128)


PCA; ridge_cph_lifelines, INsize: 30: 100%|██████████| 5/5 [00:02<00:00,  1.68it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 913.40it/s]


training c indices:  [0.74 0.74 0.72 0.74 0.72]
valid c indices (aggregated):  (0.6812983610508117, 0.6485053389438864, 0.7105095466238701)
Setting up stack... saving to GPU


PCA; CPHDNN, INsize: 30: 100%|██████████| 5/5 [00:32<00:00,  6.58s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 964.00it/s]


training c indices:  [0.88 0.89 0.9  0.85 0.85]
valid c indices (aggregated):  (0.5127008712349164, 0.4767671602240766, 0.5476110765698982)


LSC17; ridge_cph_lifelines, INsize: 17: 100%|██████████| 5/5 [00:00<00:00, 11.48it/s]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 938.34it/s]


training c indices:  [0.71 0.69 0.69 0.71 0.7 ]
valid c indices (aggregated):  (0.6635659463113068, 0.6319704352329167, 0.6910536114956796)
Setting up stack... saving to GPU


LSC17; CPHDNN, INsize: 17: 100%|██████████| 5/5 [00:30<00:00,  6.10s/it]
bootstraping 1000...: 100%|██████████| 1000/1000 [00:01<00:00, 981.24it/s]

training c indices:  [0.68 0.69 0.7  0.72 0.72]
valid c indices (aggregated):  (0.6447679813577205, 0.6111137485757691, 0.6791609838686921)





In [12]:

colnames = ("repn", "model t.", "input t.", "c.ind med", "c.ind 5%", "c.ind 95%")
resdf = pd.DataFrame(results, columns = colnames)

resdf



Unnamed: 0,repn,model t.,input t.,c.ind med,c.ind 5%,c.ind 95%
0,1,c_index,cytogenetics,0.630778,0.61471,0.647426
1,1,ridge_cph_lifelines,clin. factors + None,0.676929,0.645996,0.706296
2,1,CPHDNN,clin. factors + None,0.703244,0.674153,0.729045
3,1,ridge_cph_lifelines,clin. factors + LSC17,0.691113,0.665145,0.716366
4,1,CPHDNN,clin. factors + LSC17,0.815014,0.795705,0.833153
5,1,ridge_cph_lifelines,clin. factors + PCA,0.690243,0.65695,0.719481
6,1,CPHDNN,clin. factors + PCA,0.673627,0.64417,0.703357
7,1,ridge_cph_lifelines,clin. factors + LSC17+PCA,0.680575,0.651104,0.711132
8,1,CPHDNN,clin. factors + LSC17+PCA,0.67416,0.643834,0.7063
9,1,ridge_cph_lifelines,LSC17+PCA,0.6743,0.643869,0.704698


### Plotting... 

In [13]:
fig, axes = plt.subplots(ncols = 2, nrows = 1, figsize = (22,10))
cph_ax = axes[0]
cphdnn_ax = axes[1]
title = "CPHDNN, CPH, Cyto risk with leucegene from varying input factors"
spacer = 0.2
cph_df = resdf[(resdf["model t."] == "ridge_cph_lifelines")]
for (xmark, input_type) in enumerate(np.unique(cph_df["input t."])):
    data = cph_df[(cph_df["input t."] == input_type)]
    xaxis = np.array(xmark + (spacer * data["repn"].values.astype(int)))
    cph_ax.vlines(x = xaxis, ymin = data["c.ind 5%"], ymax = data["c.ind 95%"], linewidth = 4, label = input_type, color = "k")
    cph_ax.scatter(x = xaxis, y = data["c.ind med"], linewidth = 5, label = input_type)
    for (i, value) in zip(xaxis, data["c.ind med"]):
        cph_ax.text(i, value, str(round(value, 3)), fontsize = 20)
    #ax.set_xticks(np.arange(ncols))
    #ax.set_xticklabels(resdf.columns, fontsize = 14)
    cph_ax.set_title(title)
    cph_ax.set_xlabel("CPH (lifelines)")
    cph_ax.set_ylabel("concordance index")
    cph_ax.grid(visible = True, linestyle = "--")
cph_ax.set_ylim((0.5,0.9))
cph_ax.set_xticks(np.arange(len(np.unique(cph_df["input t."]))) + spacer * 2)
cph_ax.set_xticklabels(np.unique(cph_df["input t."]), fontsize = 14)
cph_ax.legend()

cphdnn_df = resdf[(resdf["model t."] == "CPHDNN")]
for (xmark, input_type) in enumerate(np.unique(cphdnn_df["input t."])):
    data = cphdnn_df[(cphdnn_df["input t."] == input_type)]
    xaxis = np.array(xmark + (0.2 * data["repn"].values.astype(int)))
    cphdnn_ax.vlines(x = xaxis, ymin = data["c.ind 5%"], ymax = data["c.ind 95%"], linewidth = 4, label = input_type, color = "k")
    cphdnn_ax.scatter(x = xaxis, y = data["c.ind med"], linewidth = 5, label = input_type)
    for (i, value) in zip(xaxis, data["c.ind med"]):
        cphdnn_ax.text(i, value, str(round(value, 3)), fontsize = 20)
    #ax.set_xticks(np.arange(ncols))
    #ax.set_xticklabels(resdf.columns, fontsize = 14)
    cphdnn_ax.set_title(title)
    cphdnn_ax.set_xlabel("CPHDNN")
    cphdnn_ax.set_ylabel("concordance index")
    cphdnn_ax.grid(visible = True, linestyle = "--")
cphdnn_ax.set_xticks(np.arange(len(np.unique(cphdnn_df["input t."]))) + 0.2 * 2)
cphdnn_ax.set_xticklabels(np.unique(cphdnn_df["input t."]), fontsize = 14)
cphdnn_ax.legend()
cphdnn_ax.set_ylim((0.5,0.9))
plt.tight_layout()

plt.savefig(f"RES/V2/fig_{str(hash1 * 9)}.png")


#### **Figure** : using clinical factors with cph / cphdnn with leucegene and comparing to cytogentic risk only benchmark 