In [1]:
import pytensor
import pytensor.tensor as pt

import numpy as np
import pandas as pd

from lifelines.datasets import load_kidney_transplant
from lifelines import CoxPHFitter

from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.util import Surv

In [2]:
df = load_kidney_transplant()

X = df[['age','black_male','white_male','black_female']].to_numpy().astype(np.float64)


time = df['time'].to_numpy().astype(np.float64)
event = df['death'].to_numpy().astype(np.float64)

argsort = time.argsort(kind='mergesort')

time= time[argsort]
event = event[argsort].astype(np.int64)
X = X[argsort]

unique_times, time_return_inverse =  np.unique(time,return_inverse=True)
n_unique_times = len(unique_times)



In [3]:
len(time)

863

In [4]:
def get_index_per_not_censored_times(time,event):
    """used to calcuate  the 'l over m_j' in the efron loss, assumes that the time & event are ordered by time"""
    last_time = None
    current_index = 0
    indexes = []
    
    for t,e in zip(time,event):
        not_censored =  e == 1
        if t == last_time and not_censored:
            current_index = current_index + 1
    
        elif not_censored:
            current_index = 0
        else:
            current_index = -1
    
        last_time = t
    
        indexes.append(current_index)
        
    return np.array(indexes)


# death_per_time = np.bincount(time_return_inverse,weights= event,minlength=n_unique_times)[time_return_inverse]
# index_per_not_censored_times = get_index_per_not_censored_times(time,event)
# l_div_m = np.divide(np.array(index_per_not_censored_times),death_per_time,out=np.zeros(X.shape[0]),where =np.logical_and ( death_per_time != 0 , event == 1 ))

#l_div_m should be calcuated one, before the traning loop, we will calcuate it here before handing it off to pytensor as a parameter

In [5]:
def get_efron_neg_log_likelihood_loss_jacobian_hessian_function() -> pytensor.compile.function.types.Function:
    def reverse_cumsum(a):
        return pt.flip(pt.cumsum(pt.flip(a)))
    
    weights  = pt.vector('weights',dtype='float64')
    data = pt.matrix('data',dtype='float64')
    l_div_m = pt.vector('l_div_m',dtype='float64')
    n_unique_times = pt.scalar('n_unique_times',dtype='int64') 
    event = pt.vector('event',dtype='int64') 
    time_return_inverse = pt.vector('time_return_inverse',dtype='int64')

    p = pt.dot(data,weights)
    p_exp =  pt.exp(p)

    set_at_time_indexed_at_time = pt.bincount(time_return_inverse,weights= p_exp,minlength=n_unique_times)
    set_per_time = set_at_time_indexed_at_time[time_return_inverse]
    risk_set = reverse_cumsum(set_at_time_indexed_at_time)[time_return_inverse]

    loss = - pt.sum(event * (p - np.log(risk_set - (l_div_m * set_per_time))))

    jacobian = pytensor.gradient.jacobian(loss,weights)
    hessian = pytensor.gradient.hessian(loss,weights)
    neg_log_likelihood_loss_jacobian_hessian = pytensor.function(inputs=[weights,data,event,l_div_m,n_unique_times,time_return_inverse],outputs= [loss,jacobian,hessian])

    return neg_log_likelihood_loss_jacobian_hessian


In [6]:
efron_neg_log_likelihood = get_efron_neg_log_likelihood_loss_jacobian_hessian_function()

In [22]:
def train_weights_for_cox_ph_efron(X,event,n_unique_times,time_return_inverse, max_itterations = 100, loss_jacobian_hessian_function=efron_neg_log_likelihood):
    #https://myweb.uiowa.edu/pbreheny/7210/f15/notes/10-27.pdf 
    #according to Dr.Breheny's notes, one should start halfsteping Newton-Raphson for cox when one starts having touble traning, before terminating the training loop
    #"Supposedly" R's survival package does this


    
    death_per_time = np.bincount(time_return_inverse,weights= event,minlength=n_unique_times)[time_return_inverse]
    index_per_not_censored_times = get_index_per_not_censored_times(time,event)
    l_div_m = np.divide(np.array(index_per_not_censored_times),death_per_time,out=np.zeros(X.shape[0]),where =np.logical_and ( death_per_time != 0 , event == 1 ))


    weights = np.zeros(X.shape[1])

    last_loss = np.array(np.inf)
    
    half_step = False
    
    for i in range(max_itterations):
        loss, jacobian, hessian = loss_jacobian_hessian_function(weights,X,event,l_div_m,n_unique_times,time_return_inverse)
        if (loss < last_loss) &  (not half_step):
            last_loss = loss
            weights = weights -  np.dot(np.linalg.inv(hessian),jacobian)
        elif (loss < last_loss) & half_step:
            last_loss = loss
            weights = weights - (0.5 * np.dot(np.linalg.inv(hessian),jacobian))
        else:
            if half_step:
                break
            else:
                half_step = True

    return weights


In [23]:
train_weights_for_cox_ph_efron(X,event,n_unique_times,time_return_inverse)

array([ 0.05067288, -0.03565723,  0.09737128,  0.45374737])

In [9]:
#lifelines only used the efron loss, it also scales X automaticly

llcox = CoxPHFitter().fit(df,'time','death')

In [10]:
llcox.summary['coef'].values


array([ 0.0506714 , -0.03561573,  0.09739612,  0.45378663])

In [11]:
y_sur = Surv().from_arrays(event,time)
CoxPHSurvivalAnalysis(ties='efron').fit(X ,y_sur).coef_

array([ 0.05067238, -0.03560988,  0.09739891,  0.45379433])

In [None]:
#the coefs are all very close

In [12]:
%timeit train_weights_for_cox_ph_efron(X,event,n_unique_times,time_return_inverse)

7.22 ms ± 1.23 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [13]:
%timeit CoxPHSurvivalAnalysis(ties='efron').fit(X ,y_sur)

69.6 ms ± 2.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
%timeit CoxPHFitter().fit(df,'time','death')

59.7 ms ± 1.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [15]:
#we are now around 10X faster than lifelines and sksurv, we can get even faster by jiting get_index_per_not_censored_times


In [17]:

from numba import njit
import  numba as nb

signature = nb.types.Array(nb.types.int64,1,'C')(nb.types.Array(nb.types.float64,1,'C'),nb.types.Array(nb.types.int64,1,'C'))


@njit(signature)
def get_index_per_not_censored_times(time,event):
    last_time = np.inf
    current_index = 0
    indexes = []

    
    for i in range(time.shape[0]):
        t = time[i]
        e = event[i]
        
        not_censored =  e == 1
        
        if t == last_time and not_censored:
            current_index = current_index + 1
    
        elif not_censored:
            current_index = 0
        else:
            current_index = -1
    
        last_time = t
    
        indexes.append(current_index)
        
    return np.array(indexes)

In [18]:
%timeit train_weights_for_cox_ph_efron(X,event,n_unique_times,time_return_inverse)

5.81 ms ± 472 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
#we are now around 30X faster than lifelines and sksurv, =)