In [1]:
import os
import pandas as pd
import numpy as np
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
from sksurv.preprocessing import OneHotEncoder
from sksurv.util import Surv

# ========= CONFIG =========
data_path = "D:/Final_Year_Project/splits_final"  # change if needed
train_prefix = "train_split_"
test_prefix = "test_split_"
n_splits = 100

factor_cols = ["Race", "sex", "Mobility", "diabetes.y", "Asthma", "Arthritis",
               "heart_failure", "coronary_heart_disease", "angina", "stroke",
               "thyroid", "bronchitis", "cancer"]

drop_cols = ["mortstat", "time_mort",'act_mean', 'dyn_feat_0', 'dyn_feat_1',
       'dyn_feat_2', 'dyn_feat_3', 'dyn_feat_4', 'dyn_feat_5', 'dyn_feat_6',
       'dyn_feat_7', 'dyn_feat_8', 'dyn_feat_9']

# ========= RESULT STORAGE =========
c_index_list = []

# ========= LOOP THROUGH SPLITS =========
for i in range(1, n_splits + 1):
    train_file = os.path.join(data_path, f"{train_prefix}{i}.csv")
    test_file = os.path.join(data_path, f"{test_prefix}{i}.csv")

    try:
        # Load data
        df_train = pd.read_csv(train_file)
        df_test = pd.read_csv(test_file)

        # Categoricals to category dtype
        for col in factor_cols:
            df_train[col] = df_train[col].astype("category")
            df_test[col] = df_test[col].astype("category")

        # Survival outcome
        y_train = Surv.from_dataframe("mortstat", "time_mort", df_train)
        y_test = Surv.from_dataframe("mortstat", "time_mort", df_test)

        # Drop unnecessary columns
        X_train = df_train.drop(columns=drop_cols, errors="ignore")
        X_test = df_test.drop(columns=drop_cols, errors="ignore")

        # One-hot encode
        encoder = OneHotEncoder()
        X_train_enc = encoder.fit_transform(X_train)
        X_test_enc = encoder.transform(X_test)

        # Model
        model = GradientBoostingSurvivalAnalysis(
            n_estimators=1000,
            learning_rate=0.05,
            max_depth=1,
            random_state=42
        )

        model.fit(X_train_enc, y_train)
        c_index = model.score(X_test_enc, y_test)
        c_index_list.append(c_index)
        print(f"Split {i}: C-index = {c_index:.4f}")

    except Exception as e:
        print(f"Error on split {i}: {e}")
        c_index_list.append(np.nan)

# ========= SAVE RESULTS =========
df_result = pd.DataFrame({
    "split": list(range(1, n_splits + 1)),
    "c_index": c_index_list
})
# df_result.to_csv("gbsm_cindex_from_100_splits.csv", index=False)

print(f"\n✅ Done! Average C-index: {np.nanmean(c_index_list):.4f}")

Split 1: C-index = 0.7904
Split 2: C-index = 0.8046
Split 3: C-index = 0.7980
Split 4: C-index = 0.7749
Split 5: C-index = 0.7581
Split 6: C-index = 0.7788
Split 7: C-index = 0.7513
Split 8: C-index = 0.7773
Split 9: C-index = 0.7616
Split 10: C-index = 0.7911
Split 11: C-index = 0.7816
Split 12: C-index = 0.8076
Split 13: C-index = 0.7763
Split 14: C-index = 0.7865
Split 15: C-index = 0.7838
Split 16: C-index = 0.7532
Split 17: C-index = 0.7622
Split 18: C-index = 0.7722
Split 19: C-index = 0.7679
Split 20: C-index = 0.8073
Split 21: C-index = 0.7867
Split 22: C-index = 0.7515
Split 23: C-index = 0.7913
Split 24: C-index = 0.7971
Split 25: C-index = 0.7708
Split 26: C-index = 0.7963
Split 27: C-index = 0.7689
Split 28: C-index = 0.7735
Split 29: C-index = 0.8020
Split 30: C-index = 0.7430
Split 31: C-index = 0.8072
Split 32: C-index = 0.8043
Split 33: C-index = 0.7706
Split 34: C-index = 0.8033
Split 35: C-index = 0.7894
Split 36: C-index = 0.7751
Split 37: C-index = 0.7812
Split 38: 

In [2]:
df_result.to_csv("gbsm_cindex_from_100_splits_fpc.csv", index=False)