## Weibull AFT

In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pymc3 as pm
from pymc3.distributions.timeseries import GaussianRandomWalk
import seaborn as sns
import pandas as pd
from theano import tensor as T
import theano.tensor as tt
import statsmodels
import patsy
import pystan
import matplotlib.pylab as plt

  from ._conv import register_converters as _register_converters


In [3]:
dataset = statsmodels.datasets.get_rdataset(package='survival', dataname='flchain' )
d = dataset.data.query('futime > 7').sample(500)
d.reset_index(level=0, inplace=True)
d.rename(columns={'futime': 't', 'death': 'event'}, inplace=True)

In [4]:
y, x_df = patsy.dmatrices("event ~ age + sex", d, return_type='dataframe')
x_df = x_df.iloc[:, x_df.columns != 'Intercept']

N, M = x_df.shape
x = x_df.as_matrix()
y = d['t'].values
event = d['event'].values.astype(int)
d.head()

Unnamed: 0,index,age,sex,sample.yr,kappa,lambda,flc.grp,creatinine,mgus,t,event,chapter
0,5401,59,F,1996,1.4,1.34,5,1.0,0,4655,0,
1,771,75,M,1996,3.23,3.61,10,1.3,0,4698,0,
2,1502,71,F,1996,1.16,1.29,4,0.9,0,4837,0,
3,4107,61,F,1997,2.72,2.51,10,0.9,0,4458,0,
4,5355,55,F,1997,1.2,1.19,4,0.8,0,4514,0,


In [None]:
sd_mu = 10.0
sd_al = 10.0


def weibull_lccdf(value, alpha, beta):
    return - (value / beta)**alpha


with pm.Model() as weibull:
    beta_raw = pm.Normal('b0', mu=0., sd=1., shape=M)
    alpha_raw = pm.Normal('a0', mu=0., sd=.1)
    mu = pm.Normal('mu', mu=0., sd=sd_mu)

    tau_s_raw = pm.HalfNormal('tau_s_raw', 10.)
    tau_raw = pm.ChiSquared('tau_raw', 1., shape=M)
    beta = pm.Deterministic('beta', tau_s_raw * tt.sqrt(1. / tau_raw) * beta_raw)

    alpha = pm.Deterministic('alpha', tt.exp(sd_al * alpha_raw))

    lp = mu + tt.dot(x, beta)

    y1 = pm.Weibull('y1', alpha, tt.exp(-(lp[event == 1]) / alpha), observed=y[event == 1])
    y0 = pm.Potential('y0', weibull_lccdf(y[event == 0], alpha, tt.exp(-(lp[event == 0]) / alpha)))

In [None]:
with weibull:
    trace = pm.sample(1000, tune=1000, init='adapt_diag',cores=1)

Auto-assigning NUTS sampler...
INFO:pymc3:Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
INFO:pymc3:Initializing NUTS using adapt_diag...


In [None]:
time = np.asarray([59, 115, 156, 421, 431, 448, 464, 475, 477, 563, 638, 744,
                   769, 770, 803, 855, 1040, 1106, 1129, 1206, 1227, 268, 329, 353, 365, 377], dtype=np.float64)
event = np.asarray([1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [None]:
sd_mu = 100.0
sd_al = 10.0


def weibull_lccdf(value, alpha, beta):
    return - (value / beta)**alpha


with pm.Model() as weibull:
    alpha_raw = pm.Normal('a0', mu=0., sd=.1)
    mu = pm.Normal('mu', mu=0., sd=sd_mu)

    alpha = pm.Deterministic('alpha', tt.exp(sd_al * alpha_raw))
    beta = pm.Deterministic('beta', tt.exp(mu / alpha))
    y1 = pm.Weibull(
        'y1', alpha, beta, observed=time[event == 1])
    y0 = pm.Potential(
        'y0', weibull_lccdf(time[event == 0], alpha, beta))
    
    trace = pm.sample(1000, tune=1000, init='adapt_diag')

In [None]:
pm.traceplot(trace)

In [None]:
df = pm.summary(trace)
df

In [None]:
with pm.Model() as m:
    alpha = pm.Normal('alpha', 0., 10.)
    r = pm.Gamma('r', 1., .001, testval=.25)
    beta = pm.Deterministic('beta', tt.exp(-alpha / r))
    y1 = pm.Weibull(
        'y1', r, beta, observed=time[event == 1])
    y0 = pm.Bernoulli(
        'y0', tt.exp(-tt.pow(time[event == 0] / beta, r)),
         observed=np.ones(np.sum(event == 0)))
#     y0 = pm.Potential(
#         'y0', weibull_lccdf(time[event == 0], r, beta))


In [None]:
for var in m.basic_RVs:
    print(var.name, var.logp(m.test_point))

In [None]:
with m:
    tr = pm.sample(1000, tune=1000, init='adapt_diag')

In [None]:
pm.traceplot(tr)

In [None]:
pm.summary(tr)

In [None]:
logtime = np.log(time)

def gumbel_sf(y, mu, sigma):
    return 1.0 - tt.exp(-tt.exp(-(y - mu) / sigma))


# Build Bayesian model
with pm.Model() as model:
    # Hyperprior
    s = pm.HalfNormal("s", tau=5.0)

    # Priors
    gamma = pm.Normal("gamma", 0., 5.0)

    # Likelihood for uncensored and censored survival times
    y_obs = pm.Gumbel("y_obs", mu=gamma, beta=s, observed=logtime[event == 1])
    y_cens = pm.Bernoulli("y_cens", p=gumbel_sf(
        logtime[event == 0], mu=gamma, sigma=s), observed=np.ones(np.sum(event == 0)))

    trace = pm.sample(1000, tune=1000, init='adapt_diag')

In [None]:
pm.traceplot(trace);

In [None]:
df = pm.summary(trace)
df