In [8]:
from __future__ import print_function
import numpy as np
import bayesiancoresets as bc
from scipy.optimize import minimize
from scipy.linalg import solve_triangular
import time
import sys, os
import argparse
import pandas as pd
import pystan
import pickle

#make it so we can import models/etc from parent folder
sys.path.insert(1, os.path.join(sys.path[0], '../../examples/common'))
import mcmc
import results
import plotting
import radon

In [2]:
data_dict_, prior_dict_ = radon.load_data()

stan_representation = radon.weighted_varying_intercept

# load stanfit (or make one if no cache exists)
path_with_data = 'stancache/weighted_radon.pkl'
if os.path.isfile(path_with_data):
    sm = pickle.load(open(path_with_data, 'rb'))
else:
    sm = pystan.StanModel(model_code=stan_representation)
    with open(path_with_data, 'wb') as f: pickle.dump(sm, f)
        
path_without_data = 'stancache/radon_prior.pkl'
if os.path.isfile(path_without_data):
    sm_prior = pickle.load(open(path_without_data, 'rb'))
else:
    sm_prior = pystan.StanModel(model_code=radon.prior_code)
    with open(path_without_data, 'wb') as f: pickle.dump(sm_prior, f)

## load pystan fit object, which has functions to eval log_likelihood
## we don't really need the samples from this stanfit, but it's convenient
## to automatically evaluate log_likelihood (as opposed to making a separate
## function to do it)

print('Creating likelihood evaluator')
stanfit = sm.sampling(data=data_dict_)
# print(stanfit)
likelihood = lambda pts, th: stanfit.log_prob(stanfit.unconstrain_pars(th))

Creating likelihood evaluator


In [14]:
df = stanfit.to_dataframe()
print(df)

      chain  draw  warmup      a[1]      a[2]      a[3]      a[4]      a[5]  \
0         0     0       0  1.189427  1.057700  1.515686  1.698822  1.401264   
1         0     1       0  1.113355  0.858175  1.590796  1.474603  1.584321   
2         0     2       0  1.413810  0.926201  1.500945  1.518574  1.227224   
3         0     3       0  0.970928  0.927210  1.185009  1.368137  1.993872   
4         0     4       0  0.838918  0.913898  1.497293  1.434749  1.122874   
...     ...   ...     ...       ...       ...       ...       ...       ...   
3995      3   995       0  0.995801  1.069703  2.020472  1.791328  1.437761   
3996      3   996       0  1.466538  0.883083  0.932998  1.364765  1.540371   
3997      3   997       0  0.881978  1.053876  1.890008  1.632381  1.282603   
3998      3   998       0  1.687193  0.896655  1.096194  1.400984  1.700222   
3999      3   999       0  0.828805  0.946414  1.819765  1.850988  1.623895   

          a[6]      a[7]  ...  y_hat[917]  y_hat[91

In [18]:
df = stanfit.to_dataframe("a")
# print(df)
df.head()
df.columns

Index(['chain', 'draw', 'warmup', 'a[10]', 'a[11]', 'a[12]', 'a[13]', 'a[14]',
       'a[15]', 'a[16]', 'a[17]', 'a[18]', 'a[19]', 'a[1]', 'a[20]', 'a[21]',
       'a[22]', 'a[23]', 'a[24]', 'a[25]', 'a[26]', 'a[27]', 'a[28]', 'a[29]',
       'a[2]', 'a[30]', 'a[31]', 'a[32]', 'a[33]', 'a[34]', 'a[35]', 'a[36]',
       'a[37]', 'a[38]', 'a[39]', 'a[3]', 'a[40]', 'a[41]', 'a[42]', 'a[43]',
       'a[44]', 'a[45]', 'a[46]', 'a[47]', 'a[48]', 'a[49]', 'a[4]', 'a[50]',
       'a[51]', 'a[52]', 'a[53]', 'a[54]', 'a[55]', 'a[56]', 'a[57]', 'a[58]',
       'a[59]', 'a[5]', 'a[60]', 'a[61]', 'a[62]', 'a[63]', 'a[64]', 'a[65]',
       'a[66]', 'a[67]', 'a[68]', 'a[69]', 'a[6]', 'a[70]', 'a[71]', 'a[72]',
       'a[73]', 'a[74]', 'a[75]', 'a[76]', 'a[77]', 'a[78]', 'a[79]', 'a[7]',
       'a[80]', 'a[81]', 'a[82]', 'a[83]', 'a[84]', 'a[85]', 'a[8]', 'a[9]',
       'accept_stat__', 'stepsize__', 'treedepth__', 'n_leapfrog__',
       'divergent__', 'energy__'],
      dtype='object')