In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sksurv.util import Surv
from sksurv.metrics import cumulative_dynamic_auc
from lifelines import WeibullFitter, ExponentialFitter, LogNormalFitter, LogLogisticFitter, LogNormalAFTFitter

In [2]:
data = pd.read_csv("data_ready_45.csv")

In [3]:
data["AGE_CMV"] = data["AGE"] * data["CMV_STATUS"]
data["VIR_CO_INF"] = data["CMV_STATUS"] * data["EBV_SEROSTATUS"]
data["AGE_BMI_DON"] = data["AGE_DON"] * data["BMI_DON_CALC"]
data["AGE_BMI"] = data["AGE"] * data["BMI_CALC"]
data["DIAB_BMI"] = data["YRS_DIAB"] * data["BMI_CALC"]

In [4]:
data["time_frame"] = data["time_frame"] + 1
data["time_frame"] = data["time_frame"] / 12

In [5]:
train, test = train_test_split(data, test_size=0.2, stratify=data["GRF_STAT_PA"], random_state=42)

In [6]:
y_train = train[["time_frame", "GRF_STAT_PA"]]
y_real_train = Surv.from_dataframe("GRF_STAT_PA", "time_frame", y_train)
x_train = train.drop(columns=["time_frame", "GRF_STAT_PA"])

In [7]:
y = test[["time_frame", "GRF_STAT_PA"]]

x_test = test.drop(columns=["time_frame", "GRF_STAT_PA"], axis=1)

In [8]:
from sklearn.linear_model import ElasticNetCV

# Use ElasticNetCV with cross-validation for feature selection
elastic_net = ElasticNetCV(l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 0.99], cv=50).fit(x_train, y_train['time_frame'])

# Select features with non-zero coefficients
selected_features = x_train.columns[(elastic_net.coef_ != 0)]
print("Selected features:", selected_features)

# Fit AFT model with selected features
aft = LogNormalAFTFitter()
features = list(selected_features) + ['time_frame', 'GRF_STAT_PA']
aft.fit(train[features], duration_col='time_frame', event_col='GRF_STAT_PA')

Selected features: Index(['CREAT_TRR', 'BUN_DON', 'SGOT_DON', 'SGPT_DON', 'CLIN_INFECT_DON',
       'HGT_CM_DON_CALC', 'WGT_KG_DON_CALC', 'PA_PRESERV_TM', 'DIAG_PA_is5001',
       'DAYSWAIT_CHRON_PA', 'ORGAN_isKP', 'CMV_IGG', 'MED_COND_TRR_is3',
       'HGT_CM_CALC', 'WGT_KG_CALC', 'LIPASE', 'AMYLASE', 'RESUSCIT_DUR',
       'INOTROP_SUPPORT_DON', 'YRS_DIAB', 'AGE_CMV', 'AGE_BMI_DON', 'AGE_BMI',
       'DIAB_BMI'],
      dtype='object')


<lifelines.LogNormalAFTFitter: fitted with 16908 total observations, 12979 right-censored observations>

In [9]:
y_real = Surv.from_dataframe("GRF_STAT_PA", "time_frame", y)

time_points = np.arange(12, 12*10, 12)

preds = aft.predict_cumulative_hazard(df=x_test, times=time_points)

auc, mean_auc = cumulative_dynamic_auc(y_real_train, y_real, preds.T, time_points)

In [10]:
print("Dynamic AUC values at different time points:")
for t, auc in zip(time_points, auc):
    print(f"Time {t:.2f}: AUC = {auc:.3f}")

print(f"\nMean Dynamic AUC: {mean_auc:.3f}")

Dynamic AUC values at different time points:
Time 12.00: AUC = 0.760
Time 24.00: AUC = 0.750
Time 36.00: AUC = 0.729
Time 48.00: AUC = 0.736
Time 60.00: AUC = 0.745
Time 72.00: AUC = 0.745
Time 84.00: AUC = 0.732
Time 96.00: AUC = 0.725
Time 108.00: AUC = 0.724

Mean Dynamic AUC: 0.739


In [11]:
preds_train = pd.DataFrame(aft.predict_expectation(x_train), columns=["AFT"])
preds_test = pd.DataFrame(aft.predict_expectation(x_test), columns=["AFT"])

new_train = pd.concat([train[features], preds_train], axis=1)
new_test = pd.concat([test[features], preds_test], axis=1)

In [12]:
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index

cox_model = CoxPHFitter(penalizer=0.1).fit(new_train, duration_col='time_frame', event_col='GRF_STAT_PA')

In [15]:
preds_cox_train = pd.DataFrame(cox_model.predict_expectation(new_train, times=time_points), columns=["COX"])
preds_cox_test = pd.DataFrame(cox_model.predict_expectation(new_test), columns=["COX"])

new_cox_train = pd.concat([new_train, preds_cox_train], axis=1)
new_cox_test = pd.concat([new_test, preds_cox_test], axis=1)

In [14]:
new_cox_train.to_csv("train_aft.csv", index=False)
new_cox_test.to_csv("test_aft.csv", index=False)