In [109]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sksurv.util import Surv
from sksurv.metrics import cumulative_dynamic_auc
from lifelines import WeibullFitter, ExponentialFitter, LogNormalFitter, LogLogisticFitter

In [110]:
data = pd.read_csv("data_ready_45.csv")

In [111]:
data["time_frame"] = data["time_frame"] + 1

In [112]:
train, test = train_test_split(data, test_size=0.2, stratify=data["GRF_STAT_PA"], random_state=42)

In [113]:
y_train = train[["time_frame", "GRF_STAT_PA"]]
y_real_train = Surv.from_dataframe("GRF_STAT_PA", "time_frame", y_train)
x_train = train.drop(columns=["time_frame", "GRF_STAT_PA"])

In [114]:
y = test[["time_frame", "GRF_STAT_PA"]]

x_test = test.drop(columns=["time_frame", "GRF_STAT_PA"], axis=1)

In [115]:
from lifelines import LogNormalAFTFitter
from sklearn.linear_model import LassoCV

# Example: Feature selection using LASSO


lasso = LassoCV(cv=50).fit(x_train, y_train['time_frame'])  # Use survival times for LASSO selection
selected_features = x_train.columns[(lasso.coef_ != 0)]
print("Selected features:", selected_features)

# Fit AFT model with selected features
aft = LogNormalAFTFitter()
features = list(selected_features) + ['time_frame', 'GRF_STAT_PA']
aft.fit(train[features], duration_col='time_frame', event_col='GRF_STAT_PA')


Selected features: Index(['PERIP_VASC', 'CREAT_TRR', 'AGE_DON', 'DDAVP_DON', 'CMV_DON', 'BUN_DON',
       'SGOT_DON', 'SGPT_DON', 'TBILI_DON', 'CLIN_INFECT_DON',
       'HGT_CM_DON_CALC', 'WGT_KG_DON_CALC', 'AGE', 'DIAL_TRR', 'DUCT_MGMT_2',
       'DIAG_PA_is5001', 'DAYSWAIT_CHRON_PA', 'ORGAN_isKP', 'MED_COND_TRR_is3',
       'HGT_CM_CALC', 'WGT_KG_CALC', 'PROTEIN_URINE', 'LIPASE', 'AMYLASE',
       'RESUSCIT_DUR', 'INOTROP_SUPPORT_DON', 'YRS_DIAB'],
      dtype='object')


<lifelines.LogNormalAFTFitter: fitted with 16908 total observations, 12979 right-censored observations>

In [104]:
y_real = Surv.from_dataframe("GRF_STAT_PA", "time_frame", y)

time_points = [30, 365, 365*2, 365*5]

preds = aft.predict_cumulative_hazard(df=x, times=time_points)

auc, mean_auc = cumulative_dynamic_auc(y_real_train, y_real, preds.T, time_points)

In [105]:
print("Dynamic AUC values at different time points:")
for t, auc in zip(time_points, auc):
    print(f"Time {t:.2f}: AUC = {auc:.3f}")

print(f"\nMean Dynamic AUC: {mean_auc:.3f}")

Dynamic AUC values at different time points:
Time 30.00: AUC = 0.774
Time 365.00: AUC = 0.716
Time 730.00: AUC = 0.740
Time 1825.00: AUC = 0.714

Mean Dynamic AUC: 0.724


In [126]:
preds_train = pd.DataFrame(aft.predict_expectation(x_train), columns=["AFT"])
preds_test = pd.DataFrame(aft.predict_expectation(x_test), columns=["AFT"])

new_train = pd.concat([train[features], preds_train], axis=1)
new_test = pd.concat([test[features], preds_test], axis=1)

In [127]:
new_train.to_csv("train_aft.csv", index=False)
new_test.to_csv("test_aft.csv", index=False)