In [47]:
import pandas as pd
import numpy as np
import tensorflow as tf
import scipy
import time

In [48]:
training2010 = pd.read_csv('../data/merged_wp_census_data2_081122.csv')
training2010=training2010.fillna(0)
county_adj = pd.read_csv('../data/countyadj2.csv', index_col = 0)

In [49]:
## This function was taken from online
# Generate samples from a multi-variate normal distribution with provided precision matrix WITHOUT inverting
def mv_normal_sample(mu=0, precision_matrix=None, num_models=1):

    # Precision matrix must be a square matrix
    assert precision_matrix.shape[0] == precision_matrix.shape[1], 'Precision matrix must be a square matrix'

    dim = precision_matrix.shape[0]

    chol_U = scipy.linalg.cholesky(precision_matrix, lower=False)

    # Create num_models iid standard normal vectors
    z_vector_matrix = np.random.normal(loc=0, scale=1, size=[num_models, dim])

    # Sample from the MV normal with precision matrix by solving the Cholesky decomp for each normal vector
    samples = np.squeeze(np.array(
        [scipy.linalg.solve_triangular(a=chol_U, b=z_vector_matrix[i, :], unit_diagonal=False) + mu for i in
         range(num_models)]))

    return (np.transpose(samples))

In [73]:
nchain = 5
tau2 = 100
rho = 0.3

Q = (1/tau2)*(np.diag(county_adj.sum(axis=1)) - rho*county_adj)
Q = tf.constant(Q, dtype = tf.float32)

init_state = tf.constant(np.array([mv_normal_sample(precision_matrix = Q, num_models = 3) for i in range(nchain)]),
                        dtype = tf.float32)

This gradient below works fine since it uses the tensorflow mean function

In [51]:
phi = init_state[0,:,:]
tf.math.reduce_sum(phi, 1)

<tf.Tensor: shape=(3064,), dtype=float32, numpy=
array([-0.3884592,  4.1239996, -5.3311143, ...,  6.8933125,  0.7996433,
       -8.623241 ], dtype=float32)>

In [52]:
with tf.GradientTape() as g:
  g.watch(phi)
  y = tf.reduce_mean(phi)
dy_dx = g.gradient(y,phi)
print(dy_dx)

tf.Tensor(
[[0.00010879 0.00010879 0.00010879]
 [0.00010879 0.00010879 0.00010879]
 [0.00010879 0.00010879 0.00010879]
 ...
 [0.00010879 0.00010879 0.00010879]
 [0.00010879 0.00010879 0.00010879]
 [0.00010879 0.00010879 0.00010879]], shape=(3064, 3), dtype=float32)


This gradient below does not work because it uses the numpy mean function, even though it converts the answer to a tf.Tensor.

In [53]:
with tf.GradientTape() as g:
  g.watch(phi)
  y = tf.constant(np.mean(phi))
dy_dx = g.gradient(y,phi)
print(dy_dx)

None


Below is the loss function that I want to use for an MCMC sampler. This function is the log likelihood of my model. Since the operations are not written in tensorflow the gradient doesn't work. I want to know how to implement this in tensorflow.

In [77]:
models = ['acs', 'pep', 'worldpop']

nchain = 5
tau2 = 100
rho = 0.3

Q = (1/tau2)*(np.diag(county_adj.sum(axis=1)) - rho*county_adj)
Q = tf.constant(Q, dtype = tf.float32)

def target_log_prob_fn_CAR(phi):
    
    Q = (1/tau2)*(np.diag(county_adj.sum(axis=1)) - rho_s*county_adj)
    Q = tf.constant(Q, dtype = tf.float32)
        
    ll = tf.Variable(0.)
    for chain in range(phi.shape[0]):
        # (1) Prob of the CAR random effect values
        ll_chain = -0.5*tf.reduce_mean(tf.linalg.diag_part(
            tf.linalg.matmul(phi[chain,:,:],tf.linalg.matmul(Q, phi[chain,:,:]), transpose_a = True))) 
        ll = ll + ll_chain
    
    # add in determinant values
    ll = ll + 0.5*phi.shape[0]*len(models)*np.linalg.slogdet(Q)[1]
    
    # get exponentiated values and sum across models
    exp_phi = tf.math.exp(phi)
    exp_phi_rows = tf.reduce_sum(exp_phi, 2)
    
    # get model weights and calculate mean estimate
    u = exp_phi/exp_phi_rows[...,None]
      
    tmp = training2010[models].values*u
    n = tf.reduce_sum(tmp, axis = 2)
    
    # update the log likelihood 
    ll = ll + tf.reduce_sum([np.sum(training2010['census']*np.log(n[chain,:]) - n[chain,:]) for chain in range(phi.shape[0])])
    
    return(ll)

def target_log_prob_fn_test1(phi):
    
    Q = (1/tau2)*(np.diag(county_adj.sum(axis=1)) - rho_s*county_adj)
    Q = tf.constant(Q, dtype = tf.float32)
        
    ll = tf.Variable(0.)
    for chain in range(phi.shape[0]):
        # (1) Prob of the CAR random effect values
        ll_chain = -0.5*tf.reduce_mean(tf.linalg.diag_part(
            tf.linalg.matmul(phi[chain,:,:],tf.linalg.matmul(Q, phi[chain,:,:]), transpose_a = True))) 
        ll = ll + ll_chain
    
    # get exponentiated values and sum across models
    exp_phi = tf.math.exp(phi)
    exp_phi_rows = tf.reduce_sum(exp_phi, 2)
    
    # get model weights and calculate mean estimate
    u = exp_phi/exp_phi_rows[...,None]
      
    tmp = training2010[models].values*u
    n = tf.reduce_sum(tmp, axis = 2)
    
    # update the log likelihood 
    ll = ll + tf.reduce_sum([np.sum(training2010['census']*np.log(n[chain,:]) - n[chain,:]) for chain in range(phi.shape[0])])
    
    return(ll)

def target_log_prob_fn_test2(phi):
    
    ll = tf.Variable(0.)
    for chain in range(phi.shape[0]):
        # (1) Prob of the CAR random effect values
        ll_chain = -0.5*tf.reduce_mean(tf.linalg.diag_part(
            tf.linalg.matmul(phi[chain,:,:],tf.linalg.matmul(Q, phi[chain,:,:]), transpose_a = True))) 
        ll = ll + ll_chain
    
    # get exponentiated values and sum across models
    exp_phi = tf.math.exp(phi)
    exp_phi_rows = tf.reduce_sum(exp_phi, 2)
    
    # get model weights and calculate mean estimate
    u = exp_phi/exp_phi_rows[...,None]
      
    tmp = training2010[models].values*u
    n = tf.reduce_sum(tmp, axis = 2)
    
    # update the log likelihood 
    ll = ll + tf.reduce_sum([np.sum(training2010['census']*np.log(n[chain,:]) - n[chain,:]) for chain in range(phi.shape[0])])
    
    return(ll)

def target_log_prob_fn_test3(phi):
    
    ll = tf.Variable(0.)
    for chain in range(phi.shape[0]):
        # (1) Prob of the CAR random effect values
        ll_chain = -0.5*tf.reduce_mean(tf.linalg.diag_part(
            tf.linalg.matmul(phi[chain,:,:],tf.linalg.matmul(Q, phi[chain,:,:]), transpose_a = True))) 
        ll = ll + ll_chain
    
    # get exponentiated values and sum across models
    exp_phi = tf.math.exp(phi)
    exp_phi_rows = tf.reduce_sum(exp_phi, 2)
    
    # get model weights and calculate mean estimate
    u = exp_phi/exp_phi_rows[...,None]
      
    tmp = training2010[models].values*u
    n = tf.reduce_sum(tmp, axis = 2)
    
    # update the log likelihood 
    ll = ll + tf.reduce_sum([np.sum(training2010['census']*np.log(n[chain,:]) - n[chain,:]) for chain in range(phi.shape[0])])
    
    return(ll)

In [78]:
t0 = time.perf_counter()
for i in range(10):
    a = target_log_prob_fn_CAR(init_state)
print(time.perf_counter() - t0)

t0 = time.perf_counter()
for i in range(10):
    b = target_log_prob_fn_test1(init_state)
print(time.perf_counter() - t0)

t0 = time.perf_counter()
for i in range(10):
    c = target_log_prob_fn_test2(init_state)
print(time.perf_counter() - t0)

t0 = time.perf_counter()
for i in range(10):
    d = target_log_prob_fn_test3(init_state)
print(time.perf_counter() - t0)

4.43322470000021
1.8067728999999417
0.2740093999996134
0.2710925000001225


So the majority of the work (63%) is in the slogdet function. Is there a faster way to do this? Not sure how this works. Is this necessary to compute each time? Is there a way to store the values from successive calls? How can I set a global counter of calls? That would be nice to do. Like I can store the values for the same tau2 and rho. That way I won't have to repeat call this function, because it is uneccessary. 

In [61]:
print(a)
print(b)

tf.Tensor(17883193000.0, shape=(), dtype=float32)
tf.Tensor(17883263000.0, shape=(), dtype=float32)


In [81]:
with tf.GradientTape() as g:
  g.watch(init_state)
  y = target_log_prob_fn_CAR(init_state)
dy_dx = g.gradient(y, init_state)
print(dy_dx)

tf.Tensor(
[[[ 0.06293806 -0.19838522 -0.12657136]
  [-0.03091805 -0.04359809 -0.19169998]
  [ 0.13187447  0.18273549 -0.19368437]
  ...
  [ 0.029387   -0.04181785 -0.03695919]
  [-0.16595784 -0.05239505  0.03625642]
  [-0.0399273   0.01138137 -0.00183557]]

 [[ 0.14912575  0.12052111  0.07781777]
  [-0.10916156 -0.07026157  0.14019117]
  [-0.04505573  0.09033561  0.10722545]
  ...
  [ 0.06008033 -0.09975737  0.00951293]
  [ 0.00195231  0.00384467  0.10106154]
  [ 0.1319578   0.08323511  0.01198121]]

 [[-0.06555098 -0.02694318  0.04660968]
  [ 0.07939043  0.03178913  0.15002967]
  [-0.00882181  0.11389777  0.07034828]
  ...
  [-0.09443994 -0.04512857 -0.00781568]
  [-0.00819376 -0.12262242 -0.13971454]
  [-0.03177919 -0.09681055 -0.14664814]]

 [[ 0.08906697  0.0099615  -0.02503883]
  [-0.14669773 -0.06447905  0.08280015]
  [ 0.05262746 -0.07116685 -0.06322917]
  ...
  [ 0.00870429  0.01221565 -0.00885389]
  [-0.01572543  0.03894701 -0.03172914]
  [ 0.13984008  0.11564143 -0.06373934]

In [83]:
with tf.GradientTape() as g:
  g.watch(init_state)
  y = target_log_prob_fn_test3(init_state)
dy_dx = g.gradient(y, init_state)
print(dy_dx)

tf.Tensor(
[[[ 0.09127215 -0.20940161 -0.11793244]
  [-0.01927063 -0.04018708 -0.19272867]
  [ 0.11517505  0.18378654 -0.19533205]
  ...
  [ 0.03702173 -0.05179448 -0.00932288]
  [-0.17377016 -0.0441881   0.00396088]
  [-0.04036826 -0.00908633 -0.02932849]]

 [[ 0.15043935  0.14772579  0.06314655]
  [-0.07581358 -0.06281566  0.13385895]
  [-0.08030345  0.07824104  0.11501669]
  ...
  [ 0.06758705 -0.08850935  0.01304885]
  [ 0.00876922 -0.0262132   0.0916976 ]
  [ 0.15009926  0.09006016 -0.02536286]]

 [[-0.11102764 -0.00179667  0.05647185]
  [ 0.05864335  0.03995913  0.14371407]
  [ 0.02571499  0.11598949  0.07710124]
  ...
  [-0.10117067 -0.04165079 -0.03552025]
  [-0.00370917 -0.10642684 -0.10120192]
  [-0.01930687 -0.10010205 -0.21290722]]

 [[ 0.06365295 -0.00117846 -0.00061857]
  [-0.1136101  -0.0911833   0.11023029]
  [ 0.05521546 -0.04533196 -0.01646566]
  ...
  [ 0.02088382  0.01095701 -0.0116246 ]
  [-0.0087244   0.0754048  -0.03407851]
  [ 0.0948333   0.13605219 -0.05307309]