In [194]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from lifelines import KaplanMeierFitter
import numpy as np

In [195]:
data = pd.read_csv('test_data.csv')[['D_PFS', 'D_PFS_FLAG']]
pred = data

In [196]:
data = data.rename(index=str, columns={"D_PFS": "failure_time", "D_PFS_FLAG": "status"})

In [197]:
# MinMaxScale the prediction scores, to get values similar to probability/confidence values (not necessary)
scaler = MinMaxScaler()
pred['prediction'] = pred['D_PFS'].values.argsort()
pred['prediction'] = scaler.fit_transform(pred['prediction'].values.reshape(-1, 1))



In [198]:
times=30.5 * np.asarray([14, 16, 18, 20, 22])

In [199]:
data.sort_values('failure_time', axis=0, inplace=True)

In [200]:
data.reset_index(drop=True, inplace=True)

In [201]:
np.shape(data['failure_time'].unique())

(97,)

In [202]:
# remove duplicates using the general technique described by kaplan meier paper
for i, row  in data[data['failure_time'].duplicated()].iterrows():
    if row['status'] == 0:
        data.at[i, 'failure_time'] = row['failure_time']+1
    else:
        data.at[i, 'failure_time'] = row['failure_time']-1

In [203]:
kmf = KaplanMeierFitter()
kmf.fit(data['failure_time'], event_observed=data['status'].values)
kmf.predict(data['failure_time'])

0.000000       1.000000
36.000000      1.000000
54.900000      1.000000
61.000000      0.979381
60.000000      0.989691
84.000000      0.979381
93.000000      0.968962
100.650000     0.958544
109.800000     0.948125
130.000000     0.937706
131.000000     0.927287
149.450000     0.916868
155.000000     0.906449
167.750000     0.896030
196.000000     0.885611
200.000000     0.885611
212.000000     0.875068
220.616667     0.864525
244.000000     0.853982
266.000000     0.853982
267.000000     0.853982
274.000000     0.853982
292.000000     0.843033
295.850000     0.832085
314.150000     0.821136
348.000000     0.821136
350.000000     0.821136
385.000000     0.821136
386.000000     0.821136
390.400000     0.809571
                 ...   
1005.483333    0.497764
1013.616667    0.480599
1101.000000    0.480599
1110.200000    0.462799
1125.450000    0.462799
1128.500000    0.462799
1149.850000    0.462799
1152.900000    0.462799
1163.000000    0.462799
1165.100000    0.462799
1198.650000    0

In [204]:
kmf.survival_function_

Unnamed: 0_level_0,KM_estimate
timeline,Unnamed: 1_level_1
0.000000,1.000000
36.000000,1.000000
54.900000,1.000000
60.000000,0.989691
61.000000,0.979381
84.000000,0.979381
93.000000,0.968962
100.650000,0.958544
109.800000,0.948125
130.000000,0.937706


In [205]:
data['failure_time'].values

array([    0.        ,    36.        ,    54.9       ,    61.        ,
          60.        ,    84.        ,    93.        ,   100.65      ,
         109.8       ,   130.        ,   131.        ,   149.45      ,
         155.        ,   167.75      ,   196.        ,   200.        ,
         212.        ,   220.61666666,   244.        ,   266.        ,
         267.        ,   274.        ,   292.        ,   295.85      ,
         314.15      ,   348.        ,   350.        ,   385.        ,
         386.        ,   390.4       ,   407.        ,   421.        ,
         427.        ,   428.        ,   428.        ,   433.        ,
         435.        ,   441.        ,   442.25      ,   445.        ,
         454.45      ,   459.        ,   521.55      ,   527.        ,
         537.        ,   542.9       ,   567.        ,   579.5       ,
         610.        ,   633.38333344,   669.98333343,   683.2       ,
         701.5       ,   717.        ,   718.        ,   719.        ,
      

In [206]:
kmf.timeline

array([    0.        ,    36.        ,    54.9       ,    60.        ,
          61.        ,    84.        ,    93.        ,   100.65      ,
         109.8       ,   130.        ,   131.        ,   149.45      ,
         155.        ,   167.75      ,   196.        ,   200.        ,
         212.        ,   220.61666666,   244.        ,   266.        ,
         267.        ,   274.        ,   292.        ,   295.85      ,
         314.15      ,   348.        ,   350.        ,   385.        ,
         386.        ,   390.4       ,   407.        ,   421.        ,
         427.        ,   428.        ,   433.        ,   435.        ,
         441.        ,   442.25      ,   445.        ,   454.45      ,
         459.        ,   521.55      ,   527.        ,   537.        ,
         542.9       ,   567.        ,   579.5       ,   610.        ,
         633.38333344,   669.98333343,   683.2       ,   701.5       ,
         717.        ,   718.        ,   719.        ,   728.95      ,
      

In [191]:
kmf = KaplanMeierFitter()
kmf.fit(data['failure_time'], event_observed=data['status'].values, timeline=times)

<lifelines.KaplanMeierFitter: fitted with 100 observations, 50 censored>

In [190]:
kmf.conditional_time_to_event_

Unnamed: 0_level_0,KM_estimate - Conditional time remaining to event
timeline,Unnamed: 1_level_1
427.0,inf
488.0,inf
549.0,inf
610.0,inf
671.0,inf
