In [1]:
import numpy as np
import pymc3 as pm
import theano.tensor as tt
import statsmodels
import patsy

  from ._conv import register_converters as _register_converters


In [2]:
dataset = statsmodels.datasets.get_rdataset(package='survival', dataname='flchain' )
d = dataset.data.query('futime > 7')
d.reset_index(level=0, inplace=True)
d.rename(columns={'futime': 't', 'death': 'event'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [3]:
d.head()

Unnamed: 0,index,age,sex,sample.yr,kappa,lambda,flc.grp,creatinine,mgus,t,event,chapter
0,0,97,F,1997,5.7,4.86,10,1.7,0,85,1,Circulatory
1,1,92,F,2000,0.87,0.683,1,0.9,0,1281,1,Neoplasms
2,2,94,F,1997,4.36,3.85,10,1.4,0,69,1,Circulatory
3,3,92,F,1996,2.42,2.22,9,1.0,0,115,1,Circulatory
4,4,93,F,1996,1.32,1.69,6,1.1,0,1039,1,Circulatory


In [4]:
y, x_df = patsy.dmatrices("event ~ age + sex", d, return_type='dataframe')
x_df = x_df.iloc[:, x_df.columns != 'Intercept']

In [5]:
N, M = x_df.shape
x = x_df.as_matrix()
y = d['t'].values
event = d['event'].values.astype(int)

https://github.com/hammerlab/survivalstan/blob/master/survivalstan/stan/weibull_survival_model.stan

In [7]:
sd_mu = 10.0
sd_al = 10.0


def weibull_lccdf(value, alpha, beta):
    return - (value / beta)**alpha


with pm.Model() as weibull:
    beta_raw = pm.Normal('b0', mu=0., sd=1., shape=M)
    alpha_raw = pm.Normal('a0', mu=0., sd=.1)
    mu = pm.Normal('mu', mu=0., sd=sd_mu)

    tau_s_raw = pm.HalfNormal('tau_s_raw', 10.)
    tau_raw = pm.ChiSquared('tau_raw', 1., shape=M)
    beta = tau_s_raw * tt.sqrt(1. / tau_raw) * beta_raw

    alpha = tt.exp(sd_al * alpha_raw)

    lp = mu + tt.dot(x, beta)

    y1 = pm.Weibull(
        'y1', alpha, tt.exp(-(lp[event == 1]) / alpha), observed=y[event == 1])
    y0 = pm.Potential(
        'y0', weibull_lccdf(y[event == 0], alpha,
                            tt.exp(-(lp[event == 0]) / alpha)))

In [None]:
with weibull:
    trace = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [tau_raw_log__, tau_s_raw_log__, mu, a0, b0]
  0%|          | 0/1000 [00:00<?, ?it/s]

[Weibull Survival Regression](https://discourse.pymc.io/t/weibull-survival-regression-aft/1107/3)

```
time <-c(59, 115, 156, 421, 431, 448, 464, 475, 477, 563, 638, 744, 769, 770, 803, 855, 1040, 1106, 1129, 1206, 1227, 268, 329, 353, 365, 377)
event <- c(1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0)
library(survival)
r <- survreg(Surv(time, event) ~ 1, dist="weibull")
beta <- 1/r$scale
eta <- exp(r$coefficients[1])

> beta
[1] 1.10806
> eta
(Intercept) 
   1225.419 
```

In [None]:
time = np.asarray([59, 115, 156, 421, 431, 448, 464, 475, 477, 563, 638, 744,
                   769, 770, 803, 855, 1040, 1106, 1129, 1206, 1227, 268, 329, 353, 365, 377], dtype=np.float64)
event = np.asarray([1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0])