# Prostate Cancer

In this notebook, we use blocked Gibbs sampling to examine the relationship between a prostate specific antigen and cancer volume.

In [24]:
import copy
import functools

import numpy as np
from scipy import stats
import tensorflow as tf
import tensorflow_probability as tfp

import prostate
from stat570.linear_model import linear_regression
from stat570.mcmc import gibbs_sampling

np.set_printoptions(suppress=True)

prostate_data = prostate.load_data()
prostate_data.head()

  res = PandasDataFrame.from_items(items)


Unnamed: 0,lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45,lpsa
0,-0.579818,2.769459,50.0,-1.386294,0.0,-1.386294,6.0,0.0,-0.430783
1,-0.994252,3.319626,58.0,-1.386294,0.0,-1.386294,6.0,0.0,-0.162519
2,-0.510826,2.691243,74.0,-1.386294,0.0,-1.386294,7.0,20.0,-0.162519
3,-1.203973,3.282789,58.0,-1.386294,0.0,-1.386294,6.0,0.0,-0.162519
4,0.751416,3.432373,62.0,-1.386294,0.0,-1.386294,6.0,0.0,0.371564


## TensorFlow `input_fn`

We carry out our computations in TensorFlow, so we'll convert our data into tensors.

In [14]:
def prostate_input_fn():
    features = prostate_data.to_dict('list')
    labels = features.pop('lpsa')
    return tf.data.Dataset.from_tensors((features, labels))

## Sampling Functions

In Gibbs sampling, we sample from the posterior conditional distributions. The inverse error variance (also known as precision) is gamma-distributed, and the coefficients are normally distributed. It's *blocked* Gibbs sampling since we draw both coefficients at once.

In [15]:
def make_inverse_error_variance_dist(
    prior_concentration, prior_rate, features, labels, beta):
    """Makes the the posterior distribution for inverse error variance."""
    with tf.name_scope('make_inverse_error_variance_dist', 
                       values=[
                           prior_concentration,
                           prior_rate,
                           features,
                           labels,
                           beta,
                       ]):
        posterior_concentration = (
            prior_concentration +
            tf.divide(tf.cast(tf.shape(features)[0], tf.float32), 2.))
        posterior_rate = (prior_rate +
                          tf.nn.l2_loss(labels - tf.tensordot(features, beta, 1)))
        
        return tf.distributions.Gamma(
            concentration=posterior_concentration, rate=posterior_rate,
            name='posterior_inverse_error_variance')
    
def make_beta_dist(prior_mean, prior_variance, features, labels, inverse_error_variance):
    """Makes the posterior distribution for model coefficients."""
    shape = int(prior_mean.shape[0])
    with tf.name_scope('make_beta_dist',
                       values=[
                           inverse_error_variance,
                           features,
                           labels,
                           prior_mean,
                           prior_variance,
                       ]):
        transposed_features = tf.transpose(features)
        gramian_matrix = tf.matmul(transposed_features, features)
        mle_mean = tf.squeeze(tf.linalg.cholesky_solve(
            tf.linalg.cholesky(gramian_matrix),
            tf.matmul(transposed_features, tf.expand_dims(labels, -1))))
        mle_precision = gramian_matrix*inverse_error_variance            
        
        posterior_precision = mle_precision + tf.eye(shape)/prior_variance
        posterior_covariance = tf.linalg.cholesky_solve(
            tf.linalg.cholesky(posterior_precision), tf.eye(shape))
        
        posterior_mean = tf.tensordot(
            tf.matmul(posterior_covariance, mle_precision),
            mle_mean - prior_mean, axes=1) + prior_mean
        
        return tfp.distributions.MultivariateNormalFullCovariance(
            loc=posterior_mean, covariance_matrix=posterior_covariance,
            name='posterior_beta')

## Model

Now, we specify the model. The generative process is specified in `forward`. We build our conditional disributions based on the data and use them to construct the transition kernel for Markov Chain Monte Carlo (MCMC) sampling.

The code for [`gibbs_sampling.GibbsSamplingKernel`](https://github.com/ppham27/stat570/blob/master/stat570/mcmc/gibbs_sampling.py) can be on my [GitHub](https://github.com/ppham27/stat570/blob/master/stat570/mcmc/gibbs_sampling.py).

In [16]:
def model_fn(features, labels, mode, params, config):
    del config
    
    prior_inverse_error_variance_concentration = (
        params['prior']['inverse_error_variance']['concentration'])
    prior_inverse_error_variance_rate = (
        params['prior']['inverse_error_variance']['rate'])
        
    prior_beta_mean = tf.constant(params['prior']['beta']['mean'],
                                  dtype=tf.float32)
    prior_beta_variance = tf.constant(params['prior']['beta']['variance'],
                                      dtype=tf.float32)
    
    def forward(features):
        inverse_error_variance = tfp.edward2.Gamma(
            concentration=prior_inverse_error_variance_concentration,
            rate=prior_inverse_error_variance_rate,
            name='inverse_error_variance')
    
        beta = tfp.edward2.MultivariateNormalDiag(
            loc=prior_beta_mean,
            scale_identity_multiplier=tf.sqrt(prior_beta_variance), name='beta')
        
        return tfp.edward2.Normal(
            loc=tf.tensordot(features, beta, axes=1), scale=1/tf.sqrt(inverse_error_variance),
            name='labels')
    
    features = tf.feature_column.input_layer(
        features, [tf.feature_column.numeric_column('lcavol')])
    features = tf.concat((tf.ones_like(features), features), axis=-1)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return forward(features).value
    
    log_joint_fn = functools.partial(
        tfp.edward2.make_log_joint_fn(lambda: forward(features)),
        labels=labels)
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return log_joint_fn(labels=labels) # currently will error
    
    make_inverse_error_variance_dist_fn = functools.partial(
        make_inverse_error_variance_dist,
        prior_inverse_error_variance_concentration,
        prior_inverse_error_variance_rate,
        features, labels)
    
    make_beta_dist_fn = functools.partial(
        make_beta_dist, prior_beta_mean, prior_beta_variance, features, labels)
        
    kernel = tfp.mcmc.MetropolisHastings(
        inner_kernel=gibbs_sampling.GibbsSamplingKernel(
            samplers=[
                make_inverse_error_variance_dist_fn,
                make_beta_dist_fn,
            ],
            target_log_prob_fn=lambda inverse_error_variance, beta: log_joint_fn(
                inverse_error_variance=inverse_error_variance,
                beta=beta)))
    
    samples, _ = tfp.mcmc.sample_chain(
        num_results=params['mcmc']['num_results'],
        current_state=(
            params['mcmc']['initial_state']['inverse_error_variance'],
            params['mcmc']['initial_state']['beta']),
        kernel=kernel,
        num_burnin_steps=500,
        num_steps_between_results=1,  # One less the number of samplers.
        parallel_iterations=1)
    
    return samples

## Running MCMC

We start a TensorFlow session to run the chain. Parameters are taken from the homework. A gamma distribution with $0$ shape and $0$ rate is improper so very small values were used.

In [40]:
DEFAULT_PARAMS = {
    'prior': {
        'inverse_error_variance': {
            'concentration': 0.01,  # Also called shape and denoted alpha
            'rate': 0.01,  # Usually denoted by beta.
        },
        'beta': {
            'mean': [0., 0.],
            'variance': 2.,  # Enforce equal variance and no covariance.
        },
    },
    'mcmc': {
        'num_results': 2048,
        'initial_state': {
            'inverse_error_variance': 1.,
            'beta': [0., 0.],
        },
    },
}

def get_mle_params():
    mle_params = copy.deepcopy(DEFAULT_PARAMS)
    mle_model = linear_regression.LinearRegression.from_data_frame(
        prostate_data, ['lcavol'], 'lpsa')
    mle_params['mcmc']['initial_state']['inverse_error_variance'] = (
        1./mle_model.residual_variance_)
    mle_params['mcmc']['initial_state']['beta'] = mle_model.coefficients_['estimate'].values
    return mle_params

def get_prior_params(params):
    return params

MLE_PARAMS = get_mle_params()
PRIOR_PARAMS = get_prior_params(DEFAULT_PARAMS)

  X = data_frame[covariates].as_matrix()
  y = data_frame[response].as_matrix()


In [21]:



graph = tf.Graph()
with graph.as_default():
    features, labels = prostate_input_fn().repeat().make_one_shot_iterator().get_next()
    states_op = model_fn(
        features, labels,
        tf.estimator.ModeKeys.TRAIN, DEFAULT_PARAMS, tf.estimator.RunConfig())    
    init_op = tf.group(tf.global_variables_initializer())
graph.finalize()

with graph.as_default(), tf.Session() as sess:
    sess.run(init_op)
    states = sess.run(states_op)

  return _inspect.getargspec(target)


In [36]:

    
get_mle_params()    
#mle_model.residual_variance_
#MLE_PARAMS

  X = data_frame[covariates].as_matrix()
  y = data_frame[response].as_matrix()


{'mcmc': {'initial_state': {'beta': [1.5072974615083856, 0.7193203895350354],
   'inverse_error_variance': 1.6124984637190307},
  'num_results': 2048},
 'prior': {'beta': {'mean': [0.0, 0.0], 'variance': 2.0},
  'inverse_error_variance': {'concentration': 0.01, 'rate': 0.01}}}

In [18]:
print(np.mean(states[0]))
print(np.mean(states[1], axis=0))

1.6069119
[1.4968944 0.7228822]
