In [1]:
# pip install fastsurvivalsvm

In [2]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from sklearn.model_selection import train_test_split,ShuffleSplit, GridSearchCV
from sksurv.svm import FastSurvivalSVM
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
from sksurv.util import Surv
from sksurv.metrics import concordance_index_censored
import numpy as np

In [3]:
dataset_name = 'TCGA-OV'
data = pd.read_csv("../data/OV/"+dataset_name+"/"+dataset_name+"_data.csv")
data

Unnamed: 0,case id,PROM1,PROM2,age,OS(d),final_state,stage
0,a2319490-b85d-4219-a1b0-fa1ec432d5c8,1.1163,7.0028,75,2621,1,3
1,a2319490-b85d-4219-a1b0-fa1ec432d5c8,1.1163,13.9069,75,2621,1,3
2,a2319490-b85d-4219-a1b0-fa1ec432d5c8,0.8823,7.0028,75,2621,1,3
3,a2319490-b85d-4219-a1b0-fa1ec432d5c8,0.8823,13.9069,75,2621,1,3
4,42ebd30b-175e-4ece-a806-e55cb7e40e96,0.7281,82.6072,62,949,1,3
...,...,...,...,...,...,...,...
266,c5355491-e1e8-46a4-a05e-bafcaf2e7459,0.7960,14.5609,76,2648,1,3
267,d1976840-35f7-4423-8458-12fb32a52b33,0.2056,31.0689,73,84,1,4
268,d8d13aa4-45d5-4e1a-a6cf-895bdf05e7b2,0.1940,15.0706,63,351,1,4
269,d77ef9cf-f8e6-4ee9-8d4f-1106885f6b06,1.9786,33.7066,67,787,1,3


In [4]:
X = data[['PROM1', 'PROM2', 'stage', 'age']]
y = data[['OS(d)', 'final_state']]

# Normalization 
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
y_train_surv = np.array(list(zip(y_train['final_state'], y_train['OS(d)'])), dtype=[('event', 'bool'), ('time', 'f4')])
y_test_surv = np.array(list(zip(y_test['final_state'], y_test['OS(d)'])), dtype=[('event', 'bool'), ('time', 'f4')])

In [6]:
# -----------------------------
# 1. Boosting
# -----------------------------
boosting = GradientBoostingSurvivalAnalysis(n_estimators=300, learning_rate=0.1,random_state=42)
boosting.fit(X_train, y_train_surv)
boosting_cindex = boosting.score(X_test, y_test_surv)
print(f"Boosting C-index: {boosting_cindex:.4f}")

Boosting C-index: 0.5324


In [7]:
# -----------------------------
# 2. Elastic Net
# -----------------------------
for i in range(1,10):
    elastic_net = ElasticNetCV(alphas=[i/10], random_state=42)
    # 对于l1_ratio =0，惩罚是L2惩罚。对于l1_ratio =1，这是l1惩罚。对于0< l1_ratio <1，惩罚是l1和L2的组合。
    # 乘以惩罚条件的常数。默认为1.0。alpha=0相当于一个普通的最小二乘，由LinearRegression对象求解。
    elastic_net.fit(X_train, y_train['final_state'])
    elastic_net_cindex = concordance_index(y_test['OS(d)'], elastic_net.predict(X_test), y_test['final_state'])
    print(f"Elastic Net C-index: {elastic_net_cindex:.4f}")  

Elastic Net C-index: 0.5000
Elastic Net C-index: 0.5000
Elastic Net C-index: 0.5000
Elastic Net C-index: 0.5000
Elastic Net C-index: 0.5000
Elastic Net C-index: 0.5000
Elastic Net C-index: 0.5000
Elastic Net C-index: 0.5000
Elastic Net C-index: 0.5000


  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model

In [8]:
# -----------------------------
# 3. survival SVM
# -----------------------------
for i in range(1,10):
    model = FastSurvivalSVM(alpha=i/10)
    model.fit(X_train, y_train_surv)
    y_pred = model.predict(X_test)
    c_index = concordance_index_censored(y_test_surv['event'], y_test_surv['time'], y_pred)[0]
    print(f"C-index: {c_index:.4f}")

C-index: 0.5849
C-index: 0.5849
C-index: 0.5846
C-index: 0.5852
C-index: 0.5849
C-index: 0.5858
C-index: 0.5855
C-index: 0.5846
C-index: 0.5858
