In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib

from lifelines import CoxPHFitter
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('processed_data.csv')
df.head()

Unnamed: 0,Age,Weight (lbs),Gender,Ethnicity,Smoking status,%GG,Tumor Location (choice=RUL),Tumor Location (choice=RML),Tumor Location (choice=RLL),Tumor Location (choice=LUL),...,Std,Min,Max,Median,SurfaceArea,Elongation,Flatness,Roundness,Time to Event,Event
0,79,146.0,0,2,1,0,0,1,1,1,...,194.164635,-812,154,-56.236328,826.137989,1.436361,1.446431,0.699813,3078,0
1,65,195.0,0,1,2,0,0,1,1,1,...,173.439744,-829,144,-36.021484,1037.374063,1.367921,1.112439,0.781205,70,0
2,65,173.5,1,2,0,0,1,1,1,1,...,172.357348,-815,290,23.177734,755.268235,1.387373,1.089084,0.822009,666,0
3,67,173.5,1,2,1,0,1,1,1,0,...,254.147443,-1024,366,21.595703,912.514223,1.593605,1.63112,0.639694,1172,0
4,84,145.0,1,4,1,0,1,0,1,1,...,107.583454,-783,391,7.496094,2432.30509,1.27808,1.16629,0.735654,1456,1


In [3]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['Event'])


In [None]:
cph = CoxPHFitter(penalizer=0.1, l1_ratio=0.5)
cph.fit(train_df, duration_col='Time to Event', event_col='Event')
joblib.dump(cph, "cox_model.pkl")

cph.check_assumptions(train_df, p_value_threshold=0.05, show_plots=True)

['cox_model.pkl']

In [5]:
cph.print_summary()  # access the individual results using cph.summary
with open("results_CoxPH.txt", "w") as f:
    f.write(cph.summary.to_string() + "\n")
    f.write(f"Concordance Index: {cph.concordance_index_}\n")

0,1
model,lifelines.CoxPHFitter
duration col,'Time to Event'
event col,'Event'
penalizer,0.1
l1 ratio,0.5
baseline estimation,breslow
number of observations,115
number of events observed,41
partial log-likelihood,-171.64
time fit was run,2025-05-06 19:11:49 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
Age,0.01,1.01,0.02,-0.02,0.05,0.98,1.05,0.0,0.62,0.54,0.9
Weight (lbs),-0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,-0.0,1.0,0.0
Gender,0.11,1.12,0.39,-0.65,0.88,0.52,2.4,0.0,0.29,0.77,0.37
Ethnicity,0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0
Smoking status,-0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,-0.0,1.0,0.0
%GG,-0.11,0.9,0.08,-0.26,0.05,0.77,1.05,0.0,-1.32,0.19,2.41
Tumor Location (choice=RUL),0.12,1.13,0.36,-0.58,0.82,0.56,2.27,0.0,0.33,0.74,0.44
Tumor Location (choice=RML),-0.17,0.85,0.54,-1.23,0.9,0.29,2.46,0.0,-0.31,0.76,0.4
Tumor Location (choice=RLL),-0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,-0.0,1.0,0.0
Tumor Location (choice=LUL),-0.0,1.0,0.0,-0.0,0.0,1.0,1.0,0.0,-0.0,1.0,0.0

0,1
Concordance,0.75
Partial AIC,415.27
log-likelihood ratio test,13.60 on 36 df
-log2(p) of ll-ratio test,0.00


In [7]:
test_ci = cph.score(test_df, scoring_method="concordance_index")
print(test_ci)

0.7696335078534031
