In [1]:
import functools

import numpy as np
from scipy import stats
import tensorflow as tf
import tensorflow_probability as tfp

import prostate

np.set_printoptions(suppress=True)

prostate.load_data().head()

  res = PandasDataFrame.from_items(items)


Unnamed: 0,lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45,lpsa
0,-0.579818,2.769459,50.0,-1.386294,0.0,-1.386294,6.0,0.0,-0.430783
1,-0.994252,3.319626,58.0,-1.386294,0.0,-1.386294,6.0,0.0,-0.162519
2,-0.510826,2.691243,74.0,-1.386294,0.0,-1.386294,7.0,20.0,-0.162519
3,-1.203973,3.282789,58.0,-1.386294,0.0,-1.386294,6.0,0.0,-0.162519
4,0.751416,3.432373,62.0,-1.386294,0.0,-1.386294,6.0,0.0,0.371564


In [12]:
from importlib import reload
from stat570.mcmc import gibbs_sampling

In [28]:
reload(gibbs_sampling)

<module 'stat570.mcmc.gibbs_sampling' from '/local/stat570/mcmc/gibbs_sampling.py'>

In [20]:
def prostate_input_fn():
    data = prostate.load_data()
    features = data.to_dict('list')
    labels = features.pop('lpsa')
    return tf.data.Dataset.from_tensors((features, labels))

In [711]:
#from stat570.mcmc.gibbs_sampling import GibbsSamplingKernel
import stat570

ImportError: No module named 'stat570'

In [669]:
import collections

GibbsSamplingKernelResults = collections.namedtuple(
    'GibbsSamplingKernelResults',
    [
        'log_acceptance_correction',
        'target_log_prob',
        'steps_completed',
    ])

class GibbsSamplingKernel(tfp.mcmc.TransitionKernel):
    """Makes a transition kernel that does sequential Gibbs sampling.
    
    Args:
      samplers: A list of samplers that take a slice of state and return a
        distribution.
      target_log_prob_fn: A function to compute the log probability of state.            
    """
    def __init__(self, samplers, target_log_prob_fn,
                 name='gibbs_sampling_kernel'):
        self._name = name
        self._samplers = samplers
        self._target_log_prob_fn = target_log_prob_fn
    
    def one_step(self, current_state, previous_kernel_results):
        def update_state(current_state, i):
            head, tail = current_state[:i], current_state[(i + 1):]
            return head + [self._samplers[i](*(head + tail)).sample()] + tail        
        
        num_samplers = len(self._samplers)
        steps_completed = previous_kernel_results.steps_completed
        pred_fn_pairs = [
            (tf.equal(tf.mod(steps_completed, num_samplers), i),
             functools.partial(update_state, current_state, i))
            for i in range(num_samplers)]
        next_state = tf.case(pred_fn_pairs)
        
        target_log_prob = self._target_log_prob_fn(*next_state)        
        kernel_results = GibbsSamplingKernelResults(
            target_log_prob=target_log_prob,
            log_acceptance_correction=(
                -target_log_prob + previous_kernel_results.target_log_prob),
            steps_completed=steps_completed + 1)
        
        return next_state, kernel_results
    
    def bootstrap_results(self, init_state):
        with tf.name_scope(
            '_'.join([self._name, 'bootstrap_results']),
            values=init_state):
            target_log_prob = self._target_log_prob_fn(*init_state)
            return GibbsSamplingKernelResults(
                log_acceptance_correction=tf.zeros_like(target_log_prob),
                target_log_prob=target_log_prob,
                steps_completed=tf.constant(0, dtype=tf.int64))
    
    @property
    def is_calibrated(self):
        return False

In [25]:
def make_inverse_error_variance_dist(
    prior_concentration, prior_rate, features, labels, beta):
    with tf.name_scope('make_inverse_error_variance_dist', 
                       values=[
                           prior_concentration,
                           prior_rate,
                           features,
                           labels,
                           beta,
                       ]):
        posterior_concentration = (
            prior_concentration +
            tf.divide(tf.cast(tf.shape(features)[0], tf.float32), 2.))
        posterior_rate = (prior_rate +
                          tf.nn.l2_loss(labels - tf.tensordot(features, beta, 1)))
        
        return tf.distributions.Gamma(
            concentration=posterior_concentration, rate=posterior_rate,
            name='posterior_inverse_error_variance')
    
def make_beta_dist(prior_mean, prior_variance, features, labels, inverse_error_variance):
    shape = int(prior_mean.shape[0])
    with tf.name_scope('make_beta_dist',
                       values=[
                           inverse_error_variance,
                           features,
                           labels,
                           prior_mean,
                           prior_variance,
                       ]):
        transposed_features = tf.transpose(features)
        gramian_matrix = tf.matmul(transposed_features, features)
        mle_mean = tf.squeeze(tf.linalg.cholesky_solve(
            tf.linalg.cholesky(gramian_matrix),
            tf.matmul(transposed_features, tf.expand_dims(labels, -1))))
        mle_precision = gramian_matrix*inverse_error_variance            
        
        posterior_precision = mle_precision + tf.eye(shape)/prior_variance
        posterior_covariance = tf.linalg.cholesky_solve(
            tf.linalg.cholesky(posterior_precision), tf.eye(shape))
        
        posterior_mean = tf.tensordot(
            tf.matmul(posterior_covariance, mle_precision),
            mle_mean - prior_mean, axes=1) + prior_mean
        
        return tfp.distributions.MultivariateNormalFullCovariance(
            loc=posterior_mean, covariance_matrix=posterior_covariance,
            name='posterior_beta')

In [29]:
def model_fn(features, labels, mode, params, config):
    del config
    
    prior_inverse_error_variance_concentration = (
        params['prior']['inverse_error_variance']['concentration'])
    prior_inverse_error_variance_rate = (
        params['prior']['inverse_error_variance']['rate'])
        
    prior_beta_mean = tf.constant(params['prior']['beta']['mean'],
                                  dtype=tf.float32)
    prior_beta_variance = tf.constant(params['prior']['beta']['variance'],
                                      dtype=tf.float32)
    
    def forward(features):
        inverse_error_variance = tfp.edward2.Gamma(
            concentration=prior_inverse_error_variance_concentration,
            rate=prior_inverse_error_variance_rate,
            name='inverse_error_variance')
    
        beta = tfp.edward2.MultivariateNormalDiag(
            loc=prior_beta_mean,
            scale_identity_multiplier=tf.sqrt(prior_beta_variance), name='beta')
        
        return tfp.edward2.Normal(
            loc=tf.tensordot(features, beta, axes=1), scale=1/tf.sqrt(inverse_error_variance),
            name='labels')
    
    features = tf.feature_column.input_layer(
        features, [tf.feature_column.numeric_column('lcavol')])
    features = tf.concat((tf.ones_like(features), features), axis=-1)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return forward(features).value
    
    log_joint_fn = functools.partial(
        tfp.edward2.make_log_joint_fn(lambda: forward(features)),
        labels=labels)
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return log_joint_fn(labels=labels) # currently will error
    
    make_inverse_error_variance_dist_fn = functools.partial(
        make_inverse_error_variance_dist,
        prior_inverse_error_variance_concentration,
        prior_inverse_error_variance_rate,
        features, labels)
    
    make_beta_dist_fn = functools.partial(
        make_beta_dist, prior_beta_mean, prior_beta_variance, features, labels)
        
    kernel = tfp.mcmc.MetropolisHastings(
        inner_kernel=gibbs_sampling.GibbsSamplingKernel(
            samplers=[
                make_inverse_error_variance_dist_fn,
                make_beta_dist_fn,
            ],
            target_log_prob_fn=lambda inverse_error_variance, beta: log_joint_fn(
                inverse_error_variance=inverse_error_variance,
                beta=beta)))
    
    samples, _ = tfp.mcmc.sample_chain(
        num_results=params['mcmc']['num_results'],
        current_state=(
            params['mcmc']['initial_state']['inverse_error_variance'],
            params['mcmc']['initial_state']['beta']),
        kernel=kernel,
        num_burnin_steps=500,
        num_steps_between_results=1,  # One less the number of samplers.
        parallel_iterations=1)
    
    return samples

In [30]:
DEFAULT_PARAMS = {
    'prior': {
        'inverse_error_variance': {
            'concentration': 0.01,  # Also called shape and denoted alpha
            'rate': 0.01,  # Usually denoted by beta.
        },
        'beta': {
            'mean': [0., 0.],
            'variance': 2.,  # Enforce equal variance and no covariance.
        },
    },
    'mcmc': {
        'num_results': 2048,
        'initial_state': {
            'inverse_error_variance': 1.,
            'beta': [0., 0.],
        },
    },
}

graph = tf.Graph()
with graph.as_default():
    features, labels = prostate_input_fn().repeat().make_one_shot_iterator().get_next()
    states_op = model_fn(
        features, labels,
        tf.estimator.ModeKeys.TRAIN, DEFAULT_PARAMS, tf.estimator.RunConfig())    
    init_op = tf.group(tf.global_variables_initializer())
graph.finalize()

with graph.as_default(), tf.Session() as sess:
    sess.run(init_op)
    states = sess.run(states_op)

  res = PandasDataFrame.from_items(items)
  return _inspect.getargspec(target)


In [32]:
print(np.mean(states[0]))
print(np.mean(states[1], axis=0))

1.6162782
[1.4999154 0.721838 ]


In [678]:
len(kernel_results.is_accepted)

1000

In [408]:
graph = tf.Graph()
with graph.as_default():    
    #chain_step = tf.assign_add(chain_step, 1)
    chain_step = tf.get_variable('chain_step', shape=(), dtype=tf.int64,
                                 initializer=tf.zeros_initializer(),
                                 trainable=False, use_resource=True)
    def next_state(state, elem):        
        _chain_step = tf.assign_add(chain_step, 1)
        return state + tf.cond(
            tf.equal(tf.mod(_chain_step, 2), 0),
            lambda: tf.constant(4), lambda: tf.constant(3))
    
    states = tf.scan(next_state,
                     elems=tf.range(start=1, limit=10, delta=1),
                     initializer=0, parallel_iterations=1)
    
    tmp = tf.one_hot(indices=0,
                     depth=50,
                     on_value=tf.constant(1 + 10, dtype=tf.int64),
                     off_value=tf.constant(1 + 2, dtype=tf.int64),
                     dtype=tf.int64)
    
    init_op = tf.global_variables_initializer()
graph.finalize()

with graph.as_default(), tf.Session() as sess:
    sess.run(init_op)
    print(sess.run(tmp))
    #print(sess.run(tmp))
    #res = sess.run(predictions)
    #res = sess.run(log_likelihood)
    #for i in range(10):
        #print(sess.run(next_state))
    print(sess.run(states))
    print(sess.run(chain_step))
    print(sess.run(chain_step))

[11  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3]
[ 3  7 10 14 17 21 24 28 31]
9
9


In [322]:
filtered = res[1]
filtered.is_accepted

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [293]:
res[0][1][filtered.is_accepted]

array([[1.8227994 , 0.5643822 ],
       [1.2205217 , 0.88387203],
       [1.441134  , 0.7112676 ],
       [1.8741407 , 0.4992837 ],
       [1.2739804 , 0.890952  ],
       [1.2654287 , 0.7994256 ]], dtype=float32)

In [295]:
res[0][0][filtered.is_accepted]

array([1.1572111, 1.3124188, 1.0694737, 1.5457894, 1.3813009, 1.5125797],
      dtype=float32)

1.6063379

array([1.4938455 , 0.72335094], dtype=float32)

In [679]:
np.mean(res[0][1], 0)

array([1.835653  , 0.57863265], dtype=float32)

In [165]:
res[1]

array([[ 1.        , -0.5798185 ],
       [ 1.        , -0.99425226],
       [ 1.        , -0.51082563],
       [ 1.        , -1.2039728 ],
       [ 1.        ,  0.7514161 ],
       [ 1.        , -1.0498221 ],
       [ 1.        ,  0.7371641 ],
       [ 1.        ,  0.6931472 ],
       [ 1.        , -0.7765288 ],
       [ 1.        ,  0.22314355],
       [ 1.        ,  0.25464222],
       [ 1.        , -1.3470737 ],
       [ 1.        ,  1.6134299 ],
       [ 1.        ,  1.4770488 ],
       [ 1.        ,  1.2059708 ],
       [ 1.        ,  1.541159  ],
       [ 1.        , -0.41551545],
       [ 1.        ,  2.2884862 ],
       [ 1.        , -0.56211895],
       [ 1.        ,  0.18232156],
       [ 1.        ,  1.1474024 ],
       [ 1.        ,  2.059239  ],
       [ 1.        , -0.54472715],
       [ 1.        ,  1.7817091 ],
       [ 1.        ,  0.3852624 ],
       [ 1.        ,  1.446919  ],
       [ 1.        ,  0.51282364],
       [ 1.        , -0.40047756],
       [ 1.        ,

In [126]:
res[0].dtype

dtype('float32')

In [110]:
print(np.mean(res[1]))
np.var(res[1])

1.0001259


98.34065

In [4]:
with tf.Graph().as_default():
    beta = tfp.edward2.MultivariateNormalDiag(
        loc=tf.zeros(2, dtype=tf.float32),
        scale_identity_multiplier=tf.sqrt(2.), name='beta')
    with tf.Session() as sess:
        print(sess.run(beta))
        print(sess.run(beta))

[-2.0591733 -0.5218844]
[ 0.74342984 -0.4585815 ]


In [1]:
import tensorflow as tf
import tensorflow_probability as tfp

graph = tf.Graph()
with graph.as_default():
    loc = tf.get_variable(        
        'loc', (), initializer=tf.constant_initializer(5.), use_resource=True)
    update_loc_op = tf.assign(loc, -5.)    
    norm = tfp.edward2.Normal(loc=loc, scale=0.01).value
    init_op = tf.group(tf.global_variables_initializer())
graph.finalize()

with graph.as_default(), tf.Session() as sess:
    sess.run(init_op)
    print(sess.run(norm))
    print(sess.run(norm))    
    print(sess.run((update_loc_op, norm)))

5.0021605
4.9917226
(-5.0, -4.9986978)


In [7]:
np.cov(beta.distribution.sample(200000).numpy().T)

array([[ 1.99481815, -0.00830093],
       [-0.00830093,  2.01050811]])

In [27]:
tmp = 1 + tf.random_normal((), mean=5.)

In [42]:
tmp + 4

<tf.Tensor: id=98, shape=(), dtype=float32, numpy=8.377048>

In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()

loc = tfe.Variable(initial_value=5.)
norm = tfp.edward2.Normal(loc=loc, scale=0.01)

print(norm.numpy())
print(norm.numpy())

loc.assign(-5.)

print(loc.numpy())
print(norm.numpy())
print(norm.distribution.sample())

5.022224
5.022224
-5.0
5.022224
tf.Tensor(5.004547, shape=(), dtype=float32)


In [117]:
norm.distribution.sample()

<tf.Tensor: id=760, shape=(), dtype=float32, numpy=4.9960675>

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=4.0>

In [104]:
tmp.distribution.sample()

<tf.Tensor: id=443, shape=(), dtype=float32, numpy=5.008044>