In [385]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas
import seaborn as sns
from sklearn.model_selection import ShuffleSplit, GridSearchCV

from sksurv.datasets import load_veterans_lung_cancer
from sksurv.column import encode_categorical
from sksurv.metrics import concordance_index_censored
from sksurv.svm import FastSurvivalSVM,FastKernelSurvivalSVM
from sksurv.kernels import clinical_kernel

In [386]:
df_final = pd.read_table("df_prognosis_features_ready.tsv")
x = df_final.iloc[:,1:177]
y = np.array(list(zip(df_final.os_status, df_final.os)),dtype=[('Status', '?'), ('Survival_in_days', '<f8')])

In [387]:
#help(FastSurvivalSVM)
ram_grid = {'alpha': 10. ** np.arange(-7, 2, 1)}
ram_grid

{'alpha': array([1.e-07, 1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00,
        1.e+01])}

In [388]:
estimator = FastSurvivalSVM(optimizer="rbtree", max_iter=1000, tol=1e-6, random_state=17)
estimator = FastKernelSurvivalSVM(optimizer="rbtree", kernel="precomputed", random_state=0)

In [389]:
def score_survival_model(model, X, y):
    prediction = model.predict(X)
    result = concordance_index_censored(y['Status'], y['Survival_in_days'], prediction)
    return result[0]

In [390]:
#param_grid = {'alpha': 10. ** np.arange(-10, -3, 2)}
param_grid = {'alpha': 10. ** np.arange(-10, -3, 0.2)}
#cv = ShuffleSplit(n_splits=5, test_size=0.5, random_state=0)
cv = ShuffleSplit(n_splits=10,random_state=17)
gcv = GridSearchCV(estimator, param_grid, scoring=score_survival_model,
                   n_jobs=4, iid=False, refit=False,
                   cv=cv)

In [391]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
gcv = gcv.fit(clinical_kernel(x),y)
#gcv = gcv.fit(x,y)

In [392]:
gcv.best_score_

0.7072491659202412

In [369]:
gcv.best_params_

{'alpha': 9.999999999999591e-06}