In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import ShuffleSplit, GridSearchCV,train_test_split
from sklearn.model_selection import cross_validate

from sksurv.datasets import load_veterans_lung_cancer
from sksurv.column import encode_categorical
from sksurv.metrics import concordance_index_censored
from sksurv.svm import FastSurvivalSVM,FastKernelSurvivalSVM
from sksurv.kernels import clinical_kernel
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
def score_survival_model(model, X, y):
    prediction = model.predict(X)
    result = concordance_index_censored(y['Status'], y['Survival_in_days'], prediction)
    return result[0]

In [67]:
all_features = list(range(180)) 
clin_demo_comp = list(range(154,180)) 
clin_demo_cyto_gen_comp = list(range(1,180)) 
comp = list(range(163,180)) 
cyto_comp = list(range(85,154))+list(range(163,180)) 
cyto_gen_comp = list(range(1,154))+list(range(163,180)) 
eln_clin_demo_comp = [0]+list(range(154,180)) 
eln_cyto_comp = [0]+list(range(85,154))+list(range(163,180)) 
eln_cyto_gen_comp = list(range(154))+list(range(163,180)) 
eln_gen_comp = list(range(85))+list(range(163,180)) 
gen_comp = list(range(1,85))+list(range(163,180))
clin_comp = list(range(154,161))+list(range(163,180)) 
clin_cyto_comp = list(range(85,161))+list(range(163,180)) 
clin_gen_comp = list(range(1,85))+list(range(154,161))+list(range(163,180)) 
eln_clin_comp = [0]+list(range(154,161))+list(range(163,180))


#Without age
all_features_without_age = list(range(162))+list(range(163,180))
clin_demo_comp_without_age = list(range(154,162))+list(range(163,180))
clin_demo_cyto_gen_comp_without_age = list(range(1,162))+list(range(163,180))
eln_clin_demo_comp_without_age = [0]+list(range(154,162))+list(range(163,180))
            
            
eln_clin_gen = list(range(85))+list(range(154,161))  
eln_demo_gen = list(range(85))+[161,162] 
eln_clin_demo_cyto_gen =list(range(163)) 
eln_clin_demo_cyto = [0]+list(range(85,163))

eln_clin_demo_gen = list(range(85))+list(range(154,163))
eln_clin_demo = [0] + list(range(154,163))
eln_clin = [0] + list(range(154,161))
eln_cyto_gen = list(range(154))
clin_demo_cyto_gen = list(range(1,163))
clin_demo_cyto = list(range(85,163))
clin_demo_gen = list(range(1,85))+list(range(154,163)) 
clin_demo = list(range(154,163)) 
cyto_gen = list(range(1,154))
cyto = list(range(85,154))
gen = list(range(1,85))
clin_gen = list(range(1,85)) + list(range(154,161))  
clin_cyto = list(range(85,161))  
demo_gen = list(range(1,85)) + [161,162]
demo_cyto = list(range(85,154)) + [161,162]

###Without age:

eln_demo_gen_without_age = list(range(85)) + [161]
eln_clin_demo_cyto_gen_without_age = list(range(162))
eln_clin_demo_cyto_without_age = [0] + list(range(85,162))
eln_clin_demo_gen_without_age = list(range(85)) + list(range(154,162))
eln_clin_demo_without_age = [0] + list(range(154,162))
clin_demo_cyto_gen_without_age = list(range(1,162))
clin_demo_cyto_without_age = list(range(85,162))
clin_demo_gen_without_age = list(range(1,85)) + list(range(154,162)) 
clin_demo_without_age = list(range(154,162))
demo_gen_without_age = list(range(1,85)) + [161]
demo_cyto_without_age = list(range(85,154)) + [161]
gen_age = list(range(1,85)) + [162]
eln_comp=[0] + list(range(163,180))
eln_age=[0,162]
eln_gen=[0] + list(range(1,85))
eln_cyto=[0] + list(range(85,154))

dict_features_type_final_comp = dict(zip(("all_features","clin_demo_comp","clin_demo_cyto_gen_comp","comp","cyto_comp","cyto_gen_comp","eln_clin_demo_comp","eln_cyto_comp","eln_cyto_gen_comp",
                                        "eln_gen_comp","gen_comp","clin_comp","clin_cyto_comp","clin_gen_comp","eln_clin_comp","all_features_without_age","clin_demo_comp_without_age",
                                          "clin_demo_cyto_gen_comp_without_age","eln_clin_demo_comp_without_age","eln_clin_gen","eln_demo_gen","eln_clin_demo_cyto_gen","eln_clin_demo_cyto",
                                         "eln_clin_demo_gen","eln_clin_demo","eln_clin","eln_cyto_gen","clin_demo_cyto_gen","clin_demo_cyto","clin_demo_gen","clin_demo","cyto_gen","cyto","gen",
                                          "clin_gen","clin_cyto","demo_gen","demo_cyto","eln_demo_gen_without_age","eln_clin_demo_cyto_gen_without_age","eln_clin_demo_cyto_without_age",
                                          "eln_clin_demo_gen_without_age","eln_clin_demo_without_age","clin_demo_cyto_gen_without_age","clin_demo_cyto_without_age","clin_demo_gen_without_age",
                                          "clin_demo_without_age","demo_gen_without_age","demo_cyto_without_age","gen_age","eln_comp","eln_age","eln_gen","eln_cyto"),
                                         (all_features,clin_demo_comp,clin_demo_cyto_gen_comp,comp,cyto_comp,cyto_gen_comp,eln_clin_demo_comp,eln_cyto_comp,eln_cyto_gen_comp,
                                         eln_gen_comp,gen_comp,clin_comp,clin_cyto_comp,clin_gen_comp,eln_clin_comp,all_features_without_age,clin_demo_comp_without_age,
                                          clin_demo_cyto_gen_comp_without_age,eln_clin_demo_comp_without_age,eln_clin_gen,eln_demo_gen,eln_clin_demo_cyto_gen,eln_clin_demo_cyto,
                                         eln_clin_demo_gen,eln_clin_demo,eln_clin,eln_cyto_gen,clin_demo_cyto_gen,clin_demo_cyto,clin_demo_gen,clin_demo,cyto_gen,cyto,gen,
                                          clin_gen,clin_cyto,demo_gen,demo_cyto,eln_demo_gen_without_age,eln_clin_demo_cyto_gen_without_age,eln_clin_demo_cyto_without_age,
                                          eln_clin_demo_gen_without_age,eln_clin_demo_without_age,clin_demo_cyto_gen_without_age,clin_demo_cyto_without_age,clin_demo_gen_without_age,
                                          clin_demo_without_age,demo_gen_without_age,demo_cyto_without_age,gen_age,eln_comp,eln_age,eln_gen,eln_cyto)))
dicts= dict(zip(("clin_demo_cyto_gen_comp_without_age","eln_clin_demo_comp_without_age","eln_clin_gen","eln_demo_gen","eln_clin_demo_cyto_gen","eln_clin_demo_cyto",
                                         "eln_clin_demo_gen","eln_clin_demo","eln_clin","eln_cyto_gen","clin_demo_cyto_gen","clin_demo_cyto","clin_demo_gen","clin_demo","cyto_gen","cyto","gen",
                                          "clin_gen","clin_cyto","demo_gen","demo_cyto","eln_demo_gen_without_age","eln_clin_demo_cyto_gen_without_age","eln_clin_demo_cyto_without_age",
                                          "eln_clin_demo_gen_without_age","eln_clin_demo_without_age","clin_demo_cyto_gen_without_age","clin_demo_cyto_without_age","clin_demo_gen_without_age",
                                          "clin_demo_without_age","demo_gen_without_age","demo_cyto_without_age","gen_age","eln_comp","eln_age","eln_gen","eln_cyto"
                                          ),
                                         (clin_demo_cyto_gen_comp_without_age,eln_clin_demo_comp_without_age,eln_clin_gen,eln_demo_gen,eln_clin_demo_cyto_gen,eln_clin_demo_cyto,
                                         eln_clin_demo_gen,eln_clin_demo,eln_clin,eln_cyto_gen,clin_demo_cyto_gen,clin_demo_cyto,clin_demo_gen,clin_demo,cyto_gen,cyto,gen,
                                          clin_gen,clin_cyto,demo_gen,demo_cyto,eln_demo_gen_without_age,eln_clin_demo_cyto_gen_without_age,eln_clin_demo_cyto_without_age,
                                          eln_clin_demo_gen_without_age,eln_clin_demo_without_age,clin_demo_cyto_gen_without_age,clin_demo_cyto_without_age,clin_demo_gen_without_age,
                                          clin_demo_without_age,demo_gen_without_age,demo_cyto_without_age,gen_age,eln_comp,eln_age,eln_gen,eln_cyto
                                          )))

In [68]:
df_final = pd.read_table("prognosis_comp_final.tsv")
ci=[]
#df=pd.DataFrame(columns=["feature","ref_CI","permuted_CI","algo","model"])
for key,item in dicts.items():
    for j in range(25):    
        estimator = FastSurvivalSVM(max_iter=1000, tol=1e-6, random_state=j)
        param_grid = {'alpha': 10. ** np.array([-6,-5.5,-5,-4.5,-2.5,-1,0]),'optimizer':["avltree"]}
        cv = ShuffleSplit(n_splits=5,random_state=j)
        gcv = GridSearchCV(estimator, param_grid, scoring=score_survival_model,
                           n_jobs=50, iid=False, refit=True,
                           cv=cv)
        x = df_final.iloc[:,item]
        features = x.columns
        y = np.array(list(zip(df_final.os_status, df_final.os)),dtype=[('Status', '?'), ('Survival_in_days', '<f8')])    
        X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(x), y, test_size=0.2, random_state=j)
        gcv = gcv.fit(X_train,y_train)
        ref_ci = concordance_index_censored(y_test['Status'], y_test['Survival_in_days'], gcv.predict(X_test))[0]
        print(ref_ci)
        for i in range(4):
            for feature in features:
                X_test_permuted = X_test
                X_test_permuted[feature]=np.random.RandomState(seed=i).permutation(X_test_permuted[feature])      
                permuted_ci = concordance_index_censored(y_test['Status'], y_test['Survival_in_days'], gcv.predict(X_test_permuted))[0]
                df = df.append({'feature': feature, 'ref_CI': ref_ci, 'permuted_CI': permuted_ci, 'algo':'SVM_optimized', 'model': key}, ignore_index=True)

0.655822515631447
0.6606394213844529
0.6891866566716641
0.6662502602540079
0.673002224361741
0.6715026934135299
0.6829015202674881
0.6838308612738377
0.6708084085561826
0.6798827351239997
0.6866432280747025
0.689247311827957
0.6745050858580335
0.6957106838466179
0.6854385826148857
0.6579076504202744
0.6500396498375134
0.6817692863450269
0.6718466099263651
0.6676144578313253
0.6770406291374882
0.6929550811726038
0.6849713253201352
0.6520336913298056
0.6606179580659529
0.6458644110832513
0.664655806957468
0.6910763368315842
0.6757154983263666
0.6696656817502739
0.6732542384084074
0.6774825819840891


  self.best_estimator_.fit(X, y, **fit_params)


0.700626324317587
0.6655184609943257
0.6717058870137073
0.6913201948198562
0.6851359898798229
0.6685208043624319
0.6865226066915188
0.6870521237048169
0.6670729583841198
0.6561192915895697
0.6874849482218833
0.6616620445634503
0.6716626506024096
0.6717605959578236
0.6856844062282129
0.6744598947285726
0.6499514669505589
0.6460185750342302
0.6528231821499985
0.6578551540947931
0.6929503998001
0.6520283796985858
0.6786295275721258
0.6710730691695033
0.6823744502824766
0.6907952137604387
0.6632117978132833
0.6789794786466999
0.694981130858304
0.6883776091081594
0.6657864720863736
0.6890439140589955
0.6860238867357431
0.6514333655200798
0.662680950974142
0.6820903909448502
0.6662522712058908
0.6707951807228916
0.6717286924340793
0.688456558547193
0.6810904234425328
0.6625544039828413
0.6741285735495471
0.699051004538674
0.6874579959676129
0.722576211894053
0.7108057464084947
0.7105673782410943
0.6992795531907862
0.7305354701628975
0.7193537330175744
0.7003183195189838
0.703177244275414
0.7

  self.best_estimator_.fit(X, y, **fit_params)


0.6786762868565717
0.6548791620621727
0.651405995816872
0.6688258038930566
0.6708447943603512
0.684282687274087
0.6602438911870089
0.6616591395293558
0.6764022836499693
0.6695762175838077
0.6479117513788847


  self.best_estimator_.fit(X, y, **fit_params)


0.674689477022073
0.673985604682433
0.6591740142149336
0.6513302131761852
0.6857028176928635
0.6454528067323324
0.6582971887550201
0.6610410119797732
0.6731018226486505
0.6478277947992772
0.6474621911889031
0.6686847358089048
0.6398102009077348
0.6489663007648734
0.6788636931534233
0.6629190089527379
0.6626191029514292
0.6474106877292707
0.6566221402335579
0.6536286301882089
0.6492334189361669
0.655518580144204
0.6785714285714286
0.6693390259329538
0.6597865658349088
0.6769555600241303
0.6644309103851934
0.6318205562502968
0.636667547774167
0.6677932086377137
0.6229638838417647
0.6591807228915663
0.6455917306066455
0.6612911257926364
0.6604367978631471
0.6390941541159157
0.6520810306999456
0.7077474846859428
0.6945626780170896
0.7329304097951025
0.717804577267413
0.7250091298429667
0.7125648567368387
0.7453428425543129
0.7241680169512651
0.7117286133878731
0.7245067744235798
0.7417991807244461
0.7130139152435168
0.7183325260542804
0.7366084548871599
0.72547654828759
0.7019771104744116


  self.best_estimator_.fit(X, y, **fit_params)


0.7294946276861569
0.7131280129406301
0.7247103349822384
0.7033609835090386
0.7325284535437221
0.7217842452947775
0.70174845069123
0.7125108945408446
0.7301067638615618
0.710910815939279
0.70677020671552
0.7324629924670142
0.70673099738986
0.6974656894559385
0.6941831356024443
0.7116480693585936
0.7252239329316885
0.6800642570281125
0.7095184163090813
0.7194980246339763
0.6946814360908162
0.6914393963114882
0.7224632540952507
0.7089218268956105
0.6854578039491791
0.7226855322338831
0.7128717628405323
0.7206434049334351
0.7097227271225091
0.7433333882364568
0.7275021812289667
0.705070045671931
0.7197686395689723
0.7360416733864464
0.7036843769765971
0.7124576178497211
0.7268789927145045
0.7258562050146325
0.6956294619536827
0.6987700775892899
0.7098338283695914
0.714401836090657
0.7002088353413655
0.709789596260907
0.7164270774542678
0.7155471757404352
0.7003788709020885
0.7272472327158152
0.7003364331735805
0.6747207731942266
0.689124187906047
0.6978170694597927
0.706898841339929
0.686

  self.best_estimator_.fit(X, y, **fit_params)


0.6920254971614488
0.6801319820881452
0.6816858189560697
0.703525297349016
0.6424366013901672
0.6433097577367427
0.6743112818590704
0.6580902961290219
0.6559377178712527
0.650219769324829
0.6635152273812858
0.6427692259753209
0.6536391456119578
0.6567149988115046
0.6791278263393865
0.6708807716635041
0.664333369791097
0.6804281582081703
0.662936012022463
0.6235258733952797
0.6293750874628768
0.658489202857831
0.6280641356666985
0.6579116465863454
0.6469715580085821
0.6642707745426779
0.6699583627936209
0.6358455709678429
0.6530378264240585
0.5629542641317803
0.5751352129804461
0.6297866691654173
0.5881660500648633
0.5672703429501013
0.615411943553984
0.5848500321183272
0.5571404088246292
0.6034230881606668
0.6029078519927106
0.6188675289488114
0.5869149272612271
0.5817799721879345
0.5873718077619143
0.5853594874634185
0.5807702657781014
0.5823628193366815
0.5993979288753312
0.5959245800261388
0.581582329317269
0.5908054044569223
0.5795956309551475
0.5883023018304658
0.5736528164824498


  self.best_estimator_.fit(X, y, **fit_params)


0.6258335410694253
0.6342708637684725
0.6397195150938911
0.6577831177627972
0.6561669829222011
0.6469977031608881
0.6679995050194125
0.6563315668749505
0.6090339227201494
0.6168115311056862
0.6354419202055069
0.6129785470657614
0.6498232931726907
0.6275024326436855
0.6544852428538229
0.6555267499410794
0.6243620252371858
0.6437420610699616
0.6525851398102009
0.6371491663199668
0.6754591454272864
0.6384791556559202
0.6696822814647588
0.6619187679698602
0.6762472617067184
0.677629938925589
0.6556613203340048
0.6742571903969574
0.6763055188207593
0.6740037950664136
0.6580053436665052
0.6810932883725966
0.6841097840702365
0.6428695803586976
0.643244756114627
0.6606245484466565
0.6620764400242262
0.6601767068273092
0.6600200992199589
0.6839580359217822
0.6761882315971404
0.6559946143970943
0.6610468664940036
0.6242581013742978
0.6357250296028418
0.6795039980009995
0.6419065007447269
0.6609176322167258
0.6460557189596484
0.6501408265116202
0.6763679421662719
0.6401451660028603
0.668504872830

  self.best_estimator_.fit(X, y, **fit_params)


0.7235751378898816
0.6928842504743833
0.7055358510023281
0.7168865721047503
0.7205093727754489
0.687762176879363
0.690544679924743
0.691627197559605
0.691960728060948


  self.best_estimator_.fit(X, y, **fit_params)


0.697285140562249
0.6922426582015984
0.6982171906643205
0.7068112184774923
0.6858659235369634
0.7183061416387602
0.6978131843717269
0.6850737670816398
0.7129716391804098
0.7088278159483656
0.7100361873775771
0.6949833107505206
0.7300907548630442
0.7010781503178363
0.6937597072075535
0.689438237857539
0.7237122213979292
0.6947106261859582
0.6939266573959781
0.7155795139909357
0.6999762714545599
0.6843034207652003
0.6847371449007199
0.6983543389259051
0.6991967103375729
0.6883855421686746
0.6937261720557035
0.701105540984695
0.6860161835179511
0.6780067006919873
0.7133077088041703
0.6348113117719871
0.6417336064262169
0.6778876186906547
0.6414500552539278
0.6506839082367784
0.6407680359562444
0.6538550228122478
0.649951701358594
0.6447969367512956
0.6485223040963474
0.6749750024191208
0.6671410499683744
0.6509742035280699
0.6697242030039134
0.6647077434153287
0.623351748373514
0.6357268359429664
0.6612025367263386
0.618628669790571
0.6581044176706827
0.6402399144985563
0.6588758673350819

  self.best_estimator_.fit(X, y, **fit_params)


0.5871810887426128
0.6318847315826168
0.611900591559536
0.6105117520135721


  self.best_estimator_.fit(X, y, **fit_params)


0.6409073912501558
0.5962109212812745
0.6273829332065605
0.6170693158726575
0.6089974699557242
0.6043655567881752
0.6335287475444322
0.6254211816815629
0.6159910088170579
0.6129242921337832
0.6187525086296861
0.6249880462847853
0.6022650602409638
0.6093892070379173
0.6176421765545632
0.5990258464922618
0.6035476093559194
0.6122997739982514
0.6374377122544197
0.6193154542836112
0.652845452273863
0.6242732907317542
0.6392633046711597
0.6295317095740111
0.6570668555334113
0.627048797208027
0.6334942871641883
0.6376515331590207
0.6587830209979679
0.6565860215053764
0.6447321135607256
0.6649522807777383
0.6551372300877956
0.6093821727636807
0.6151633417815994
0.6346150758609617
0.612866979057091
0.6494056224899598
0.6264017610745107
0.6540370505627303
0.6507266870924661
0.6226398847731471
0.6443029413220277
0.5839972069698797
0.5847281339008544
0.6416323088455772
0.6158250452441583
0.5899621526509744
0.620104762219505
0.6204601979806631
0.5535258008226349
0.6148256931522859
0.62254971872276

  self.best_estimator_.fit(X, y, **fit_params)


0.7148144677661169
0.7032623840868688
0.7147173068623219
0.6977923923460788
0.7349002684762737
0.7146173501184095
0.6985344999923111
0.6992789794786467
0.7232687159307164
0.6926154332700822
0.7063327135513507
0.7157728657829201
0.721585066835403
0.6875722223101641
0.6904980330570802
0.6930079473388456
0.6928692104172643
0.697140562248996
0.6928009698671218
0.6973706052255901
0.7086181161128132
0.6863669098537746
0.7165080255365479
0.6173866124988098
0.6316926424936794
0.6666041979010495
0.6407053284005189
0.6232943793366754
0.6438910737301299
0.6395829558743596
0.6451296273214508
0.6341401528548801
0.630568100784407
0.6618553043253879
0.6572975964579381
0.6311385759597506
0.647805843864561
0.6469667009412323
0.633688442847419
0.6223236359678448
0.6628160873404512
0.6054716139109368
0.6391807228915662
0.6251495477675509
0.632308024301982


  self.best_estimator_.fit(X, y, **fit_params)


0.6398067405137874
0.6218022982747283
0.6181230307329385
0.6943377662106833
0.6884020866003137
0.7047335707146427


  self.best_estimator_.fit(X, y, **fit_params)


0.708427425166963
0.6967232163606786
0.6901417760005287
0.7269859832325861
0.7177567618097969
0.6958895262113826
0.683258061960225
0.7160516724187982
0.6856973434535104


  self.best_estimator_.fit(X, y, **fit_params)


0.6906220215309136
0.7068477470649198


  self.best_estimator_.fit(X, y, **fit_params)


0.7098789844182551
0.6848891140202301
0.6813863449069395
0.6968772577667175
0.6815849032545982
0.6833574297188755
0.6848330648120085
0.6975283025131968
0.6864875481184697
0.6771456304599681
0.7111961596199211
0.6343114228584125
0.6417656094985118
0.6771067591204398
0.6414580630695559
0.6571743966003785
0.6412472322284279
0.653550310477163
0.6523666334288919
0.6447969367512956
0.6512796133428413
0.6761119891623392
0.6679237824161923
0.6524351182012781
0.669878884437501
0.6649766669303172
0.6235654472638628
0.6367375180756613
0.6608412940515372
0.6196566892990342
0.6592048192771084
0.6401442039273237
0.6593821586268717
0.6572236625029461
0.6385070607759026
0.6496395519556575
0.5987478972926651
0.6276922584568119
0.6641132558720639
0.6257787600698281
0.6071760565718269
0.635050728708814
0.6378946848286199


  self.best_estimator_.fit(X, y, **fit_params)


0.6310451202791973
0.6380614802626521
0.6263291339830441
0.6363980905073703
0.6416587602783048
0.6151232011999812
0.6391746198703769
0.6342165625247173
0.6206448957624302
0.6166171691570911
0.6459099301597495
0.5979567116126359
0.634441767068273
0.6192154923511302
0.6298927658444274
0.6177233089794957
0.6132777029777374
0.6303139279763771


In [66]:
df.to_csv("SVM_tmp1.cs")