In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import ShuffleSplit, GridSearchCV,train_test_split
from sklearn.model_selection import cross_validate

from sksurv.datasets import load_veterans_lung_cancer
from sksurv.column import encode_categorical
from sksurv.metrics import concordance_index_censored
from sksurv.svm import FastSurvivalSVM,FastKernelSurvivalSVM
from sksurv.kernels import clinical_kernel
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

##### In this notebook, we create the different features that we want to put in our evaluation model with a dictionary. After that,
##### we use a gridsearch on SVM survival to find a good set of parameters and we train our model 25 times with the best parameters found
##### using the training and validation set. Finally, we evaluate 25 times our model on a separate testing set.

In [3]:
def score_survival_model(model, X, y):
    prediction = model.predict(X)
    result = concordance_index_censored(y['Status'], y['Survival_in_days'], prediction)
    return result[0]

In [23]:
df_final.iloc[:,eln_comp_gen_cyto_clin_demo ].columns

Index(['eln_2017_ratio', 'final_component_additions',
       'final_component_BAGE3_KMT2C', 'final_component_CEBPA_bi',
       'final_component_chr_splicing_1',
       'final_component_chr_splicing_multiple',
       'final_component_DNMT3A_IDH1_2', 'final_component_inv_3',
       'final_component_KIT_NRAS', 'final_component_not_assigned',
       ...
       'inv_3', 'ahd', 'perf_status', 'bm_blasts', 'secondary', 'wbc', 'hb',
       'plt', 'gender', 'age'],
      dtype='object', length=129)

In [4]:
df_final = pd.read_table("data_frame_final_prognosis.tsv",sep=" ")

eln = [0]
comp =list(range(163,178)) 
age = [162]
all_gen = list(range(1,85))

tmp = df_final.iloc[:,all_gen][df_final.iloc[:,all_gen] >0].count()

gen = [df_final.columns.get_loc(c) for c in tmp[tmp>df_final.shape[0]*0.02].keys() if c in df_final]

cyto = list(range(85,154))
clin =list(range(154,161))
demo = [161,162]
demo_without_age = [161]

eln_comp = eln + comp
eln_age = eln + age
eln_gen = eln + gen
eln_cyto = eln + cyto
eln_clin = eln + clin
eln_demo = eln + demo
eln_demo_without_age = eln + demo_without_age

# USEFUL FOR ELN COMPARISON
# with comp
eln_comp_age = eln_comp + age
eln_comp_gen = eln_comp + gen
eln_comp_cyto = eln_comp + cyto
eln_comp_clin = eln_comp + clin
eln_comp_demo = eln_comp + demo
eln_comp_demo_without_age = eln_comp + demo_without_age

eln_comp_age_gen = eln_comp_age + gen
eln_comp_age_cyto = eln_comp_age + cyto
eln_comp_age_clin = eln_comp_age + clin

eln_comp_gen_cyto = eln_comp_gen + cyto
eln_comp_gen_clin = eln_comp_gen + clin
eln_comp_gen_demo = eln_comp_gen + demo
eln_comp_gen_demo_without_age = eln_comp_gen + demo_without_age

eln_comp_cyto_clin = eln_comp_cyto + clin
eln_comp_cyto_demo = eln_comp_cyto + demo
eln_comp_cyto_demo_without_age = eln_comp_cyto + demo_without_age

eln_comp_clin_demo = eln_comp_clin + demo
eln_comp_clin_demo_without_age = eln_comp_clin + demo_without_age

eln_comp_age_gen_cyto = eln_comp_age_gen + cyto
eln_comp_age_gen_clin = eln_comp_age_gen + clin
eln_comp_age_gen_demo = eln_comp_age_gen + demo


eln_comp_gen_cyto_clin_demo = eln_comp_gen_cyto + clin + demo

# without comp

eln_age_gen = eln_age + gen
eln_age_cyto = eln_age + cyto
eln_age_clin = eln_age + clin

eln_gen_cyto = eln_gen + cyto
eln_gen_clin = eln_gen + clin
eln_gen_demo = eln_gen + demo
eln_gen_demo_without_age = eln_gen + demo_without_age

eln_cyto_clin = eln_cyto + clin
eln_cyto_demo = eln_cyto + demo
eln_cyto_demo_without_age = eln_cyto + demo_without_age

eln_clin_demo = eln_clin + demo
eln_clin_demo_without_age = eln_clin + demo_without_age

eln_age_gen_cyto = eln_age_gen + cyto
eln_age_gen_clin = eln_age_gen + clin
eln_age_gen_demo = eln_age_gen + demo


eln_gen_cyto_clin_demo = eln_gen_cyto + clin + demo

# USEFUL FOR COMP
comp_age = comp + age
comp_gen = comp + gen
comp_cyto = comp + cyto
comp_clin = comp + clin
comp_demo = comp + demo
comp_demo_without_age = comp + demo_without_age
comp_gen_cyto = comp_gen + cyto
comp_clin_demo = comp_clin + demo
comp_gen_cyto_clin_demo = comp_gen_cyto + clin + demo

#USEFUL FOR GEN
gen_age = gen + age
gen_cyto = gen + cyto
gen_clin = gen + clin
gen_demo = gen + demo
gen_demo_without_age = gen + demo_without_age
gen_clin_demo = gen_clin + demo
gen_cyto_clin_demo = gen_cyto + clin + demo

#USEFUL FOR CYTO 
cyto_age = cyto + age
cyto_clin = cyto + clin
cyto_demo = cyto + demo
gen_demo_without_age = gen + demo_without_age
cyto_clin_demo = cyto_clin + demo
cyto_gen_demo = gen_cyto + demo


clin_age  = clin + age


In [6]:
dict_features_type_final_comp = dict(zip(("eln","comp","gen","cyto","clin","demo","eln_comp","eln_age","eln_gen","eln_cyto","eln_clin","eln_demo","eln_demo_without_age","eln_comp_age",
"eln_comp_gen","eln_comp_cyto","eln_comp_clin","eln_comp_demo","eln_comp_demo_without_age","eln_comp_age_gen",
"eln_comp_age_cyto","eln_comp_age_clin","eln_comp_gen_cyto","eln_comp_gen_clin","eln_comp_gen_demo",
"eln_comp_gen_demo_without_age","eln_comp_cyto_clin","eln_comp_cyto_demo","eln_comp_cyto_demo_without_age",
"eln_comp_clin_demo","eln_comp_clin_demo_without_age","eln_comp_age_gen_cyto","eln_comp_age_gen_clin",
"eln_comp_age_gen_demo","eln_comp_gen_cyto_clin_demo","eln_age_gen","eln_age_cyto","eln_age_clin","eln_gen_cyto","eln_gen_clin","eln_gen_demo","eln_gen_demo_without_age",
"eln_cyto_clin","eln_cyto_demo","eln_cyto_demo_without_age","eln_clin_demo","eln_clin_demo_without_age",
"eln_age_gen_cyto","eln_age_gen_clin","eln_age_gen_demo","eln_gen_cyto_clin_demo","comp_age","comp_gen","comp_cyto","comp_clin","comp_demo","comp_demo_without_age","comp_gen_cyto","comp_clin_demo",
 "comp_gen_cyto_clin_demo","gen_age","gen_cyto","gen_clin","gen_demo","gen_demo_without_age","gen_clin_demo","gen_cyto_clin_demo",
 "cyto_age","cyto_clin","cyto_demo","gen_demo_without_age","cyto_clin_demo","cyto_gen_demo","clin_age"),(eln,comp,gen,cyto,clin,demo,eln_comp,eln_age,eln_gen,eln_cyto,eln_clin,eln_demo,eln_demo_without_age,eln_comp_age,
eln_comp_gen,eln_comp_cyto,eln_comp_clin,eln_comp_demo,eln_comp_demo_without_age,eln_comp_age_gen,
eln_comp_age_cyto,eln_comp_age_clin,eln_comp_gen_cyto,eln_comp_gen_clin,eln_comp_gen_demo,
eln_comp_gen_demo_without_age,eln_comp_cyto_clin,eln_comp_cyto_demo,eln_comp_cyto_demo_without_age,
eln_comp_clin_demo,eln_comp_clin_demo_without_age,eln_comp_age_gen_cyto,eln_comp_age_gen_clin,
eln_comp_age_gen_demo,eln_comp_gen_cyto_clin_demo,eln_age_gen,eln_age_cyto,eln_age_clin,eln_gen_cyto,eln_gen_clin,eln_gen_demo,eln_gen_demo_without_age,
eln_cyto_clin,eln_cyto_demo,eln_cyto_demo_without_age,eln_clin_demo,eln_clin_demo_without_age,
eln_age_gen_cyto,eln_age_gen_clin,eln_age_gen_demo,eln_gen_cyto_clin_demo,comp_age,comp_gen,comp_cyto,comp_clin,comp_demo,comp_demo_without_age,comp_gen_cyto,comp_clin_demo,
 comp_gen_cyto_clin_demo,gen_age,gen_cyto,gen_clin,gen_demo,gen_demo_without_age,gen_clin_demo,gen_cyto_clin_demo,
 cyto_age,cyto_clin,cyto_demo,gen_demo_without_age,cyto_clin_demo,cyto_gen_demo,clin_age)))


In [None]:
dict_features_type_final_comp = dict(zip(("eln","comp","gen","cyto","clin","demo","eln_comp","eln_age","eln_gen","eln_cyto","eln_clin","eln_demo","eln_demo_without_age","eln_comp_age",
"eln_comp_gen","eln_comp_cyto","eln_comp_clin","eln_comp_demo","eln_comp_demo_without_age","eln_comp_age_gen",
"eln_comp_age_cyto","eln_comp_age_clin","eln_comp_gen_cyto","eln_comp_gen_clin","eln_comp_gen_demo",
"eln_comp_gen_demo_without_age","eln_comp_cyto_clin","eln_comp_cyto_demo","eln_comp_cyto_demo_without_age",
"eln_comp_clin_demo","eln_comp_clin_demo_without_age","eln_comp_age_gen_cyto","eln_comp_age_gen_clin",
"eln_comp_age_gen_demo","eln_comp_gen_cyto_clin_demo","eln_age_gen","eln_age_cyto","eln_age_clin","eln_gen_cyto","eln_gen_clin","eln_gen_demo","eln_gen_demo_without_age",
"eln_cyto_clin","eln_cyto_demo","eln_cyto_demo_without_age","eln_clin_demo","eln_clin_demo_without_age"),(eln,comp,gen,cyto,clin,demo,eln_comp,eln_age,eln_gen,eln_cyto,eln_clin,eln_demo,eln_demo_without_age,eln_comp_age,
eln_comp_gen,eln_comp_cyto,eln_comp_clin,eln_comp_demo,eln_comp_demo_without_age,eln_comp_age_gen,
eln_comp_age_cyto,eln_comp_age_clin,eln_comp_gen_cyto,eln_comp_gen_clin,eln_comp_gen_demo,
eln_comp_gen_demo_without_age,eln_comp_cyto_clin,eln_comp_cyto_demo,eln_comp_cyto_demo_without_age,
eln_comp_clin_demo,eln_comp_clin_demo_without_age,eln_comp_age_gen_cyto,eln_comp_age_gen_clin,
eln_comp_age_gen_demo,eln_comp_gen_cyto_clin_demo,eln_age_gen,eln_age_cyto,eln_age_clin,eln_gen_cyto,eln_gen_clin,eln_gen_demo,eln_gen_demo_without_age,
eln_cyto_clin,eln_cyto_demo,eln_cyto_demo_without_age,eln_clin_demo,eln_clin_demo_without_age)))
estimator = FastSurvivalSVM(max_iter=1000, tol=1e-6, random_state=17)
param_grid = {'alpha': 10. ** np.array([-6,-5,-2.5,-1,0]),'optimizer':["avltree"]}
cv = ShuffleSplit(n_splits=5,random_state=17)
gcv = GridSearchCV(estimator, param_grid, scoring=score_survival_model,
                   n_jobs=50, iid=False, refit=True,
                   cv=cv)
df=pd.DataFrame(columns=dict_features_type_final_comp.keys())
for key,item in dict_features_type_final_comp.items():
    x = df_final.iloc[:,item]
    y = np.array(list(zip(df_final.os_status, df_final.os)),dtype=[('Status', '?'), ('Survival_in_days', '<f8')])
    ci=[]
    for i in range(25):
        X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(x), y, test_size=0.2, random_state=i)
        gcv = gcv.fit(X_train,y_train)
        print(gcv.best_params_)
        ci.append(concordance_index_censored(y_test['Status'], y_test['Survival_in_days'], gcv.predict(X_test))[0])
        print(ci)
    df[key] = ci
    
df.to_csv("SVM.csv")

In [None]:
dict_features_type_final_comp = dict(zip(("eln_age_gen_cyto","eln_age_gen_clin","eln_age_gen_demo","eln_gen_cyto_clin_demo","comp_age","comp_gen","comp_cyto","comp_clin","comp_demo","comp_demo_without_age","comp_gen_cyto","comp_clin_demo",
 "comp_gen_cyto_clin_demo","gen_age","gen_cyto","gen_clin","gen_demo","gen_demo_without_age","gen_clin_demo","gen_cyto_clin_demo",
 "cyto_age","cyto_clin","cyto_demo","gen_demo_without_age","cyto_clin_demo","cyto_gen_demo","clin_age"),(eln_age_gen_cyto,eln_age_gen_clin,eln_age_gen_demo,eln_gen_cyto_clin_demo,comp_age,comp_gen,comp_cyto,comp_clin,comp_demo,comp_demo_without_age,comp_gen_cyto,comp_clin_demo,
 comp_gen_cyto_clin_demo,gen_age,gen_cyto,gen_clin,gen_demo,gen_demo_without_age,gen_clin_demo,gen_cyto_clin_demo,
 cyto_age,cyto_clin,cyto_demo,gen_demo_without_age,cyto_clin_demo,cyto_gen_demo,clin_age)))

estimator = FastSurvivalSVM(max_iter=1000, tol=1e-6, random_state=17)
param_grid = {'alpha': 10. ** np.array([-6,-5,-2.5,-1,0]),'optimizer':["avltree"]}
cv = ShuffleSplit(n_splits=5,random_state=17)
gcv = GridSearchCV(estimator, param_grid, scoring=score_survival_model,
                   n_jobs=50, iid=False, refit=True,
                   cv=cv)

df1=pd.DataFrame(columns=dict_features_type_final_comp.keys())

for key,item in dict_features_type_final_comp.items():
    x = df_final.iloc[:,item]
    y = np.array(list(zip(df_final.os_status, df_final.os)),dtype=[('Status', '?'), ('Survival_in_days', '<f8')])
    ci=[]
    for i in range(25):
        X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(x), y, test_size=0.2, random_state=i)
        gcv = gcv.fit(X_train,y_train)
        print(gcv.best_params_)
        ci.append(concordance_index_censored(y_test['Status'], y_test['Survival_in_days'], gcv.predict(X_test))[0])
        print(ci)
    df1[key] = ci
    
df1.to_csv("SVM_add.csv")




In [19]:
df.join(df1).to_csv("SVM.csv")

In [17]:
df.head()

Unnamed: 0,eln,comp,gen,cyto,clin,demo,eln_comp,eln_age,eln_gen,eln_cyto,...,eln_age_clin,eln_gen_cyto,eln_gen_clin,eln_gen_demo,eln_gen_demo_without_age,eln_cyto_clin,eln_cyto_demo,eln_cyto_demo_without_age,eln_clin_demo,eln_clin_demo_without_age
0,0.608907,0.629983,0.637071,0.575557,0.588574,0.690158,0.636938,0.704671,0.645507,0.615548,...,0.710428,0.650363,0.656621,0.710819,0.64405,0.661711,0.695218,0.620679,0.710992,0.652423
1,0.603909,0.624365,0.632087,0.576941,0.621896,0.681443,0.633654,0.694363,0.650553,0.623082,...,0.714859,0.654995,0.68228,0.694233,0.650033,0.678593,0.694395,0.629919,0.714518,0.673006
2,0.600245,0.653404,0.64423,0.597049,0.619835,0.639417,0.642074,0.65531,0.656123,0.627907,...,0.679393,0.656014,0.681972,0.680206,0.652701,0.674376,0.669782,0.623687,0.679706,0.65556
3,0.632842,0.650995,0.653389,0.580297,0.611555,0.702098,0.668029,0.728355,0.66128,0.638741,...,0.735371,0.661083,0.682071,0.729638,0.658941,0.679157,0.720786,0.635338,0.734552,0.681378
4,0.611693,0.651032,0.646689,0.59468,0.582525,0.672501,0.652518,0.688429,0.655785,0.628521,...,0.697823,0.659246,0.667764,0.704615,0.655874,0.648239,0.690501,0.628312,0.697935,0.640082


In [16]:
df1.shape

(25, 26)