### Week 9: Normalising flows pt 1 - bijectors

In [None]:
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors

import numpy as np
import matplotlib.pyplot as plt

## Tensorflow bijectors

### Base distribution

In [None]:
base_dist = tfd.MultivariateNormalDiag(loc=tf.zeros([2], tf.float32), scale_diag=tf.constant([1, 1], tf.float32))

In [None]:
SAMPLE_BATCH_SIZE = 512

In [None]:
z = base_dist.sample(SAMPLE_BATCH_SIZE)
print(z)

In [None]:
sess = tf.InteractiveSession()

In [None]:
z_samples = z.eval()
print(type(z_samples))
print(z_samples.shape)

In [None]:
fig = plt.figure(figsize=(5, 5))
plt.scatter(z_samples[:, 0], z_samples[:, 1], s=10)
plt.title("Base distribution: standard normal")
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.show()

### Transform the distribution

A Bijector is used to transform distributions. Bijectors are the building blocks for a normalising flow. 
They are characterised by the following three main methods:
    1. forward
    2. inverse
    3. log_det_jacobian

Conventionally, think of the `forward` operation as acting on the base distribution (generate samples) and the `inverse` operation is used to calculate probabilities.

For example, the Affine Bijector:

In [None]:
affine_bijector = tfb.Affine(shift=[1., -1.], scale_diag=[0.5, 1.5])

In [None]:
fwd_z = affine_bijector.forward(z)

In [None]:
z_samples, x_samples = sess.run([z, fwd_z])

In [None]:
fig = plt.figure(figsize=(12, 5))
ax = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

ax.scatter(z_samples[:, 0], z_samples[:, 1], s=10)
ax.set_title("Base distribution: standard normal")
ax.set_xlim([-5, 5])
ax.set_ylim([-5, 5])

ax2.scatter(x_samples[:, 0], x_samples[:, 1], s=10, color='r')
ax2.set_title("Transformed distribution: shift [1, -1], scale [0.5, 1.5]")
ax2.set_xlim([-5, 5])
ax2.set_ylim([-5, 5])
plt.show()

In [None]:
fwd_inv_z = affine_bijector.inverse(fwd_z)

In [None]:
latents = np.random.random((SAMPLE_BATCH_SIZE, 2))
print(np.allclose(latents, sess.run(fwd_inv_z, feed_dict={z: latents})))

### Computing probabilities

In [None]:
x = tf.placeholder(shape=(1, 2), dtype=tf.float32)

log_det_dzdx = affine_bijector.inverse_log_det_jacobian(x, event_ndims=1)
log_det_dzdx

In [None]:
inv_x = affine_bijector.inverse(x)
inv_x

In [None]:
log_prob_inv_x = base_dist.log_prob(inv_x)
log_prob_inv_x

In [None]:
x_fixed_sample = np.array([[1., -1.]])  # Mode of the transformed distribution

sess.run(log_det_dzdx, feed_dict={x: x_fixed_sample})

Check: Jacobian determinant is just the product of scaling factors

In [None]:
- np.log(0.5) - np.log(1.5)

Calculate log probability of `x`:

In [None]:
sess.run(log_prob_inv_x + log_det_dzdx, feed_dict={x: np.array([[1., -1.]])})

Check:

In [None]:
np.log(np.sqrt(1 / (2 * np.pi)**2)) - np.log(0.5) - np.log(1.5)

In [None]:
sess.close()