In [None]:
import numpy as np
import scipy
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tf.enable_eager_execution()

In [None]:
import altair as alt
alt.renderers.enable("notebook")
import pandas as pd
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

In [None]:
tfd.LogNormal(loc=3, scale=2).sample(4)

In [None]:
elbo(-0.6, 1.1)

In [None]:
elbo_grid(-0.6, 1.1)

In [None]:
alpha = 2.
beta = 5.
gamma = tfd.Gamma(concentration=alpha, rate=beta)

def elbo(loc, scale):
    q_log_prob = tfd.LogNormal(loc=loc, scale=scale).log_prob(x_grid).numpy()
    return np.sum(scipy.special.softmax(q_log_prob) * (gamma.log_prob(x_grid).numpy() - q_log_prob))

def elbo_sample(loc, scale, sample_count = 100):
    q_sample = tfd.LogNormal(loc=loc, scale=scale).sample(sample_count, dtype=tf.float32)
    return (1/sample_count) * tf.reduce_sum(
        gamma.log_prob(q_sample) - tfd.LogNormal(loc=loc, scale=scale).log_prob(q_sample)).numpy()

x_grid = np.linspace(0.01, 4)

def plot_fit(loc, scale):
    df = pd.DataFrame({
        "x": x_grid, 
        "gamma": gamma.prob(x_grid).numpy(), 
        "lognormal": tfd.LogNormal(loc=loc, scale=scale).prob(x_grid).numpy()})
    return df.plot(x="x", y=["gamma", "lognormal"], kind="line", title="elbo: "+str(elbo(loc, scale)))

plot_fit(-0.6, 1.1)

In [None]:
def tf_lognormal_multi_elbo_grad(loc, scale, true_distribution, classical, particle_count):
    # A variety of epsilons.
    epsilon = tf.constant(np.random.normal(0., 1., particle_count), dtype=tf.float32)
    with tf.GradientTape() as g:
        tf_loc = tf.constant(loc, dtype=tf.float32)
        tf_scale = tf.constant(scale, dtype=tf.float32)
        g.watch(tf_loc)
        g.watch(tf_scale)
        tf_x = tf.math.exp(tf_loc + tf_scale * epsilon)
        # This is the log of the full sum of ratios as in the equation just before (7)
        # in the 2018 ICLR paper.
        if classical:
            y = tf.math.log(
                tf.math.reduce_sum(
                    true_distribution.prob(tf_x)
                    / tfp.distributions.LogNormal(loc=tf_loc, scale=tf_scale).prob(tf_x)
                )
            )
        else:
            y = (1/particle_count) * tf.math.reduce_sum(
                tf.math.log(
                    true_distribution.prob(tf_x)
                    / tfp.distributions.LogNormal(loc=tf_loc, scale=tf_scale).prob(tf_x)
                )
            )
        x_arr = np.array([tf_x.numpy()]).transpose()
        tf_gradient = np.array([grad.numpy() for grad in g.gradient(y, [tf_loc, tf_scale])])
    return tf_gradient

def gradient_analysis(loc, scale, classical, particle_count, gradient_count):
    gradients = np.concatenate([
        np.array([tf_lognormal_multi_elbo_grad(loc, scale, gamma, classical, particle_count)]) for _ in range(gradient_count)])
    gradients_df = pd.DataFrame(gradients, columns=["loc_grad", "scale_grad"])

    print(gradients_df.describe())
    return alt.Chart(gradients_df).mark_bar().encode(
        alt.X("loc_grad", bin=alt.Bin()),
        y='count()',
    )

In [None]:
def gradient_ascent(loc, scale, step_size, particle_count, step_count):
    elbos = [[loc, scale, elbo(loc, scale)]]
    for _ in range(step_count):
        grad = tf_lognormal_multi_elbo_grad(loc, scale, gamma, False, 100)
        loc += step_size * grad[0]
        scale += step_size * grad[1]
        elbos.append([loc, scale, elbo(loc, scale)])
    return pd.DataFrame(elbos, columns = ["loc", "scale", "elbo"])

gradient_results = gradient_ascent(-1.6, 1.1, 0.1, 10, 10)
gradient_results["elbo"].plot.line()

In [None]:
plot_fit(gradient_results.iloc[-1,0], gradient_results.iloc[-1,1])

In [None]:
plot_fit(-1.6, 0.69)

In [None]:
gradient_analysis(-1.6, 1.1, True, particle_count = 100, gradient_count = 1000)

In [None]:
gradient_analysis(-1.6, 1.1, False, particle_count = 100, gradient_count = 1000)

In [None]:
gradient_analysis(-0.6, 1.1, True, particle_count = 100, gradient_count = 1000)