In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib

from lifelines import CoxPHFitter
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('processed_data.csv')
df.head()

Unnamed: 0,Volume,Mean,Std,Min,Max,Median,SurfaceArea,Elongation,Flatness,Roundness,Time to Event,Event
0,2719,-139.272894,194.164635,-812,154,-56.236328,826.137989,1.436361,1.446431,0.699813,3078,0
1,2150,-105.572558,173.439744,-829,144,-36.021484,1037.374063,1.367921,1.112439,0.781205,70,0
2,1324,-42.965257,172.357348,-815,290,23.177734,755.268235,1.387373,1.089084,0.822009,666,0
3,1547,-74.679379,254.147443,-1024,366,21.595703,912.514223,1.593605,1.63112,0.639694,1172,0
4,16625,-25.891429,107.583454,-783,391,7.496094,2432.30509,1.27808,1.16629,0.735654,1456,1


In [3]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['Event'])


In [4]:
cph = CoxPHFitter(penalizer=0.1, l1_ratio=0.5)
cph.fit(train_df, duration_col='Time to Event', event_col='Event')
joblib.dump(cph, "cox_model.pkl")

['cox_model.pkl']

In [5]:
cph.print_summary()  # access the individual results using cph.summary
with open("results_CoxPH.txt", "w") as f:
    f.write(cph.summary.to_string() + "\n")
    f.write(f"Concordance Index: {cph.concordance_index_}\n")

0,1
model,lifelines.CoxPHFitter
duration col,'Time to Event'
event col,'Event'
penalizer,0.1
l1 ratio,0.5
baseline estimation,breslow
number of observations,115
number of events observed,41
partial log-likelihood,-177.48
time fit was run,2025-05-16 08:33:42 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
Volume,-0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,-0.0,1.0,0.0
Mean,0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0
Std,0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,0.2,0.85,0.24
Min,-0.0,1.0,0.0,-0.01,0.0,0.99,1.0,0.0,-0.45,0.65,0.62
Max,0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,0.28,0.78,0.35
Median,0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,0.24,0.81,0.3
SurfaceArea,0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0
Elongation,0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0
Flatness,-0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,-0.0,1.0,0.0
Roundness,-0.94,0.39,1.34,-3.56,1.69,0.03,5.4,0.0,-0.7,0.48,1.05

0,1
Concordance,0.62
Partial AIC,374.95
log-likelihood ratio test,1.92 on 10 df
-log2(p) of ll-ratio test,0.00


In [6]:
test_ci = cph.score(test_df, scoring_method="concordance_index")
print(test_ci)

0.7015706806282722
