In [62]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sksurv.util import Surv
from sksurv.metrics import cumulative_dynamic_auc
from lifelines import WeibullFitter, ExponentialFitter, LogNormalFitter, LogLogisticFitter

In [63]:
data = pd.read_csv("data_ready_45.csv")

In [64]:
data["time_frame"] = data["time_frame"] + 1

In [65]:
train, test = train_test_split(data, test_size=0.2, stratify=data["GRF_STAT_PA"], random_state=42)

In [66]:
# Instantiate each fitter
wb = WeibullFitter()
ex = ExponentialFitter()
log = LogNormalFitter()
loglogis = LogLogisticFitter()# Fit to data
for model in [wb, ex, log, loglogis]:
    model.fit(durations = train["time_frame"], event_observed = train["GRF_STAT_PA"])# Print AIC
    print("The AIC value for", model.__class__.__name__, "is",  model.AIC_)

The AIC value for WeibullFitter is 80735.50265003768
The AIC value for ExponentialFitter is 80733.50500371843
The AIC value for LogNormalFitter is 81672.9421370727
The AIC value for LogLogisticFitter is 80946.41811406214


In [67]:
from lifelines import WeibullAFTFitter, LogLogisticAFTFitter, LogNormalAFTFitter

weibull_aft = LogNormalAFTFitter()
weibull_aft.fit(train, duration_col='time_frame', event_col='GRF_STAT_PA')
weibull_aft.print_summary(3)


It's advisable to not trust the variances reported, and to be suspicious of the fitted parameters too.



0,1
model,lifelines.LogNormalAFTFitter
duration col,'time_frame'
event col,'GRF_STAT_PA'
number of observations,16908
number of events observed,3929
log-likelihood,-40166.423
time fit was run,2024-10-16 12:48:38 UTC

Unnamed: 0,Unnamed: 1,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
mu_,ABO_MAT,-0.222,0.801,0.073,-0.365,-0.079,0.694,0.924,0.0,-3.042,0.002,8.733
mu_,AGE,0.029,1.029,0.003,0.023,0.035,1.024,1.035,0.0,10.049,<0.0005,76.51
mu_,AGE_DON,-0.016,0.984,0.003,-0.021,-0.011,0.979,0.989,0.0,-6.368,<0.0005,32.283
mu_,AMIS,-0.017,0.984,,,,,,0.0,,,
mu_,AMYLASE,0.0,1.0,0.0,-0.0,0.001,1.0,1.001,0.0,0.946,0.344,1.538
mu_,ANTIHYPE_DON,-0.039,0.962,0.051,-0.138,0.06,0.871,1.062,0.0,-0.773,0.439,1.186
mu_,ART_RECON_is2,0.027,1.028,0.076,-0.121,0.175,0.886,1.192,0.0,0.362,0.717,0.48
mu_,BLOOD_INF_CONF_DON,-0.231,0.794,0.111,-0.449,-0.013,0.639,0.988,0.0,-2.072,0.038,4.709
mu_,BMIS,0.027,1.027,,,,,,0.0,,,
mu_,BMI_CALC,-0.034,0.967,0.049,-0.13,0.062,0.878,1.064,0.0,-0.689,0.491,1.027

0,1
Concordance,0.685
AIC,80490.845
log-likelihood ratio test,1336.097 on 77 df
-log2(p) of ll-ratio test,757.737


In [68]:
y_train = train[["time_frame", "GRF_STAT_PA"]]
y_real_train = Surv.from_dataframe("GRF_STAT_PA", "time_frame", y_train)


In [69]:
y = test[["time_frame", "GRF_STAT_PA"]]

x = test.drop(columns=["time_frame", "GRF_STAT_PA"], axis=1)

In [70]:
y_real = Surv.from_dataframe("GRF_STAT_PA", "time_frame", y)

time_points = [30, 365, 365*2, 365*5]

preds = weibull_aft.predict_cumulative_hazard(df=x, times=time_points)

auc, mean_auc = cumulative_dynamic_auc(y_real_train, y_real, preds.T, time_points)

In [71]:
print("Dynamic AUC values at different time points:")
for t, auc in zip(time_points, auc):
    print(f"Time {t:.2f}: AUC = {auc:.3f}")

print(f"\nMean Dynamic AUC: {mean_auc:.3f}")

Dynamic AUC values at different time points:
Time 30.00: AUC = 0.768
Time 365.00: AUC = 0.716
Time 730.00: AUC = 0.742
Time 1825.00: AUC = 0.718

Mean Dynamic AUC: 0.726
