### Load Libraries

In [1]:
import tensorflow_probability as tfp
from pprint import pprint
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import seaborn as sns
import pandas as pd
from functools import partial
import plotly.express as px
import plotly.graph_objs as go
import datetime
import os

tfd = tfp.distributions
tfb = tfp.bijectors

In [2]:
# User Inputs

ITER = 20000
CHAINS = 4

### Load Data

In [3]:
def load_data():

    # Load Data

    df = pd.read_table("../data/rdata", header=None, delim_whitespace=True)
    df.columns = ["X", "Y"]
    df["index"] = np.where(df.index < 100, "Train", "Test")

    # Create train and test

    X_train = np.array(df.loc[df["index"] == "Train", "X"]).reshape(-1, 1)
    Y_train = np.array(df.loc[df["index"] == "Train", "Y"])
    X_test = np.array(df.loc[df["index"] == "Test", "X"]).reshape(-1, 1)
    Y_test = np.array(df.loc[df["index"] == "Test", "Y"])

    return X_train, X_test, Y_train, Y_test

In [4]:
def bnn_forward(X, params):
    
    """
    Forward pass of the neural network 
    """
    
    w_ih = tf.reshape(params[0:8], (1, 8))
    b_h = tf.reshape(params[8:16], (8,))
    w_ho = tf.reshape(params[16:24], (8, 1))
    b_o = tf.reshape(params[24], (1, ))
    
    # Forward pass
    
    net = tf.nn.tanh(tf.matmul(X, w_ih) + b_h)
    net = tf.matmul(net, w_ho) + b_o
    preds = net[:, 0]
    
    return preds

In [5]:
def log_posterior_unnormalized(params, x, y):
    
    """
    
    Computes the log posterior density for any given params "state vector", and x and y
    
    
    Indexing for reference:
    
    Low level parameters: 
    
    w_ih = params[0:7]
    b_h = params[8:16]
    w_ho = params[16:24]
    b_o = params[24]
    
    Hyperparameters:
    
    w_prec_ih = params[25]
    b_prec_h = params[26]
    w_prec_ho = params[27]
    y_prec = params[28]
    
    """
    
    params = tf.reshape(params, shape=(29, ))
    params = tf.cast(params, dtype=tf.float32)
    
    # Hyperparameter definition
    
    prior_w_prec_ih = tfd.Gamma(concentration=0.025, rate=0.000625)
    prior_b_prec_h = tfd.Gamma(concentration=0.025, rate=0.000625)
    prior_w_prec_ho = tfd.Gamma(concentration=0.025, rate=0.000625)
    prior_y_prec = tfd.Gamma(concentration=0.025, rate=0.000625)

    # Prior definition of weights and biases (centered)

    prior_w_ih = tfd.MultivariateNormalDiag(
        loc=tf.zeros(8, dtype=tf.float32),
        scale_diag=tf.ones(8, dtype=tf.float32)*(1/tf.sqrt(params[25]))
    )

    prior_b_h = tfd.MultivariateNormalDiag(
        loc=tf.zeros(8, dtype=tf.float32),
        scale_diag=tf.ones(8, dtype=tf.float32)*(1/tf.sqrt(params[26]))
    )

    prior_w_ho = tfd.MultivariateNormalDiag(
        loc=tf.zeros(8, dtype=tf.float32),
        scale_diag=tf.ones(8, dtype=tf.float32)*(1/tf.sqrt(params[27]))*(1/np.sqrt(8)), 
        # Scaling factor for GP limit as n_units -> inf
    )

    prior_b_o = tfd.MultivariateNormalDiag(
        loc=tf.zeros(1, dtype=tf.float32),
        scale_diag=tf.ones(1, dtype=tf.float32)*100
    )

    # Prior definition of weights and biases (non-centered)
    
    # Likelihood Function definition
    
    train_predictions = bnn_forward(x, params)
    train_predictions = tf.cast(train_predictions, dtype=tf.float32)

    likelihood = tfd.MultivariateNormalDiag(
        loc=train_predictions, 
        scale_diag=tf.ones_like(train_predictions)*(1 / tf.sqrt(params[28]))
    )

    # Calculate log_posterior_probability upto a constant 
    # (log posterior = log likelihood + log prior - log Z)

    log_prior_prob = prior_w_ih.log_prob(params[0:8]) + \
        prior_b_h.log_prob(params[8:16]) + \
        prior_w_ho.log_prob(params[16:24]) + \
        prior_b_o.log_prob(tf.reshape(params[24], (1, 1))) + \
        prior_w_prec_ih.log_prob(tf.reshape(params[25], (1, 1))) + \
        prior_b_prec_h.log_prob(tf.reshape(params[26], (1, 1))) + \
        prior_w_prec_ho.log_prob(tf.reshape(params[27], (1, 1))) + \
        prior_y_prec.log_prob(tf.reshape(params[28], (1, 1)))
    
    log_likelihood_prob = likelihood.log_prob(y)
    
    log_posterior_prob = log_prior_prob + tf.cast(log_likelihood_prob, dtype=tf.float32)

    return tf.reshape(log_posterior_prob, [])

def log_posterior(params):
    
    """
    Fix X and Y
    """
    
    return log_posterior_unnormalized(params, X_train, Y_train)

# # Test the log posterior function on random data

# test_inputs = tf.random.uniform((29, ), 1, 100, dtype=tf.float32)
# temp = log_posterior_unnormalized(test_inputs, X_train, Y_train)

# log_posterior(test_inputs)

In [6]:
# @tf.function(experimental_compile=True)
def bnn_forward_measurement_noise(X, params):
    """
    Forward pass of the neural network 
    """

    predictive_dist = tfd.Normal(loc=tf.cast(bnn_forward(X, params), dtype=tf.float32),
                                 scale=1/tf.sqrt(params[28]))

    return predictive_dist.sample(10)

### Run HMC/NUTS

In [7]:
# def main(args):

# Load Data

X_train, X_test, Y_train, Y_test = load_data()
X_train = X_train.astype('float32')
Y_train = Y_train.astype('float32')
X_test = X_test.astype('float32')
Y_test = Y_test.astype('float32')

# Define MCMC transition kernels (composition step)

# HMC
hmc_kernel = tfp.mcmc.HamiltonianMonteCarlo(
    target_log_prob_fn=log_posterior,
    step_size=np.float64(5e-3),
    num_leapfrog_steps=1000
)

# NUTS
nuts_kernel = tfp.mcmc.NoUTurnSampler(
    log_posterior, step_size=0.1, max_tree_depth=10
)

# Adaptation inside Dual Averaging Method
adaptive_kernel = tfp.mcmc.SimpleStepSizeAdaptation(
        nuts_kernel,
        num_adaptation_steps=int(500),
        target_accept_prob=np.float64(.75)
)

# Define initial state

initial_state = tf.concat([tf.zeros((1, 24)),  # initial low-level parameters (fixed at 0...)
                           tf.ones((1, 5))],  # hyperparameters (fixed at 1)
                          axis=1)

# Trace function wrapper - compile using XLA for speed purposes (used to be faster?)

@tf.function(experimental_compile=True)
def run_chain(initial_state, num_results=2000, num_burnin_steps=500, seed=1):

    return tfp.mcmc.sample_chain(
        num_results=num_results,
        num_burnin_steps=num_burnin_steps,
        current_state=initial_state,
        kernel=adaptive_kernel,
        trace_fn=lambda cs, kr: kr, 
        seed=seed)

# Run all chains

all_results = dict()
kernel_results = dict()

for chain in range(1, CHAINS + 1):
    
    print(chain)
    samples, kernel_results = run_chain(initial_state=initial_state, num_results=ITER, seed=chain*100)
    print("Acceptance rate:", kernel_results.inner_results.is_accepted.numpy().mean())
    all_results[chain] = samples
    # kernel_results[chain] = kernel_results


TensorFloat-32 matmul/conv are enabled for NVIDIA Ampere+ GPUs. The resulting loss of precision may hinder MCMC convergence. To turn off, run `tf.config.experimental.enable_tensor_float_32_execution(False)`. For more detail, see https://github.com/tensorflow/community/pull/287.



1
Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.
Acceptance rate: 0.94045
2
Acceptance rate: 0.93945
3
Acceptance rate: 0.99075
4
Acceptance rate: 0.98275


### Trace plots of hyperparameters

In [8]:
# Collect all samples into one tensor

samples = tf.concat([all_results[1],
           all_results[2],
           all_results[3],
           all_results[4]], axis=0)

In [9]:
# Predictions

y_train_pred = np.zeros((10, 100, samples.shape[0]))
y_test_pred = np.zeros((10, 100, samples.shape[0]))

for i in range(0, samples.shape[0]):
    y_train_pred[:, :, i] = bnn_forward_measurement_noise(X_train, tf.reshape(samples[i, :, :], (29, )))
    
for i in range(0, samples.shape[0]):
    y_test_pred[:, :, i] = bnn_forward_measurement_noise(X_test, tf.reshape(samples[i, :, :], (29, )))

### Train and Test set predictions

In [10]:
# Write predictions to a data frame

df_predictions_train = pd.DataFrame({
    
    "inputs": X_train.flatten(),
    "targets": Y_train.flatten(),
    "mean": y_train_pred[:, :, :].mean(axis=(0,2)),
    "median": np.quantile(y_train_pred[:, :, :], 0.5, axis=(0,2)),
    "q1": np.quantile(y_train_pred[:, :, :], 0.01, axis=(0,2)),
    "q10": np.quantile(y_train_pred[:, :, :], 0.10, axis=(0,2)),
    "q90": np.quantile(y_train_pred[:, :, :], 0.90, axis=(0,2)),
    "q99": np.quantile(y_train_pred[:, :, :], 0.99, axis=(0,2)),
    "label": "train"

})

df_predictions_test = pd.DataFrame({
    
    "inputs": X_test.flatten(),
    "targets": Y_test.flatten(),
    "mean": y_test_pred[:, :, :].mean(axis=(0,2)),
    "median": np.quantile(y_test_pred[:, :, :], 0.5, axis=(0,2)),
    "q1": np.quantile(y_test_pred[:, :, :], 0.01, axis=(0,2)),
    "q10": np.quantile(y_test_pred[:, :, :], 0.10, axis=(0,2)),
    "q90": np.quantile(y_test_pred[:, :, :], 0.90, axis=(0,2)),
    "q99": np.quantile(y_test_pred[:, :, :], 0.99, axis=(0,2)),
    "label": "test"

})

In [11]:
# Organize results for traces

traces = samples.numpy().reshape((ITER*4, 29))

w_ih_names = ["w_ih_" + str(i) for i in range(0,8)]
b_h_names = ["b_h_" + str(i) for i in range(0, 8)]
w_ho_names = ["w_ho_" + str(i) for i in range(0, 8)]
b_o_names = ["b_o_" + str(i) for i in range(1, 2)]

df_traces = pd.DataFrame(traces)
df_traces.columns = w_ih_names + b_h_names + w_ho_names + b_o_names + ["W_prec_ih"] + ["B_prec_h"] + ["W_prec_ho"] + ["y_prec"]
df_traces["trace"] = 1

df_traces["id"] = df_traces.index

df_traces["trace"] = np.where(np.logical_and(df_traces["id"] >= 0, df_traces["id"] < ITER), 1, 
                                  np.where(np.logical_and(df_traces["id"] >= ITER, df_traces["id"] < 2*ITER), 2, 
                                      np.where(np.logical_and(df_traces["id"] >= 2*ITER, df_traces["id"] < 3*ITER), 3, 
                                              np.where(np.logical_and(df_traces["id"] >= 3*ITER, df_traces["id"] < 4*ITER), 4, 0))))

In [12]:
# Write traces to disk

time = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
new_dir = os.path.join("../output/", "tfprob_c_" + time)
os.mkdir(new_dir)
df_traces.to_feather(f"{new_dir}/df_traces.feather")

# Write predictions to disk

df_predictions = pd.concat([df_predictions_train, df_predictions_test]).reset_index()
df_predictions.drop(f"index", axis=1).to_feather(f"{new_dir}/df_predictions.feather")

In [13]:
print(new_dir)

../output/tfprob_c_2021_03_17_15_59_02
