In [1]:
%%bash
pip install -q --upgrade tensorflow-gpu==2.0.0
pip install -q --upgrade tensorflow-probability

ERROR: tensorflow 1.15.0 has requirement tensorboard<1.16.0,>=1.15.0, but you'll have tensorboard 2.0.0 which is incompatible.
ERROR: tensorflow 1.15.0 has requirement tensorflow-estimator==1.15.1, but you'll have tensorflow-estimator 2.0.1 which is incompatible.
ERROR: tensor2tensor 1.14.1 has requirement tensorflow-probability==0.7.0, but you'll have tensorflow-probability 0.8.0 which is incompatible.


In [0]:
%matplotlib inline

import numpy as np

import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow_probability as tfp

tfpl = tfp.layers
tfd = tfp.distributions

In [0]:
w0 = 0.125
b0 = 5.
x_range = [-20, 60]

def load_dataset(n=150, n_tst=150):
    np.random.seed(43)
    def s(x):
        g = (x - x_range[0]) / (x_range[1] - x_range[0])
        return 3 * (0.25 + g**2.)
    x = (x_range[1] - x_range[0]) * np.random.rand(n) + x_range[0]
    eps = np.random.randn(n) * s(x)
    y = (w0 * x * (1. + np.sin(x)) + b0) + eps
    x = x[..., np.newaxis].astype(np.float32)
    x_tst = np.linspace(*x_range, num=n_tst)
    x_tst = x_tst[..., np.newaxis]

    y = y.astype(np.float32)
    x = x.astype(np.float32)
    x_tst = x_tst.astype(np.float32)
    return y, x, x_tst

y, x, x_tst = load_dataset()

In [0]:
negloglik = lambda y, rv_y: -rv_y.log_prob(y)

In [0]:
def posterior_mean_field(kernel_size, bias_size=0, dtype=None):
    n = kernel_size + bias_size
    c = np.log(np.expm1(1.0))
    return tfk.Sequential([
        tfp.layers.VariableLayer(2*n, dtype=dtype),
        tfp.layers.DistributionLambda(lambda t: tfd.Independent(
            tfd.Normal(loc=t[..., :n],
                       scale=1e-5 + tf.nn.softplus(c + t[..., n:])),
            reinterpreted_batch_ndims=1))
    ])

def prior(kernel_size, bias_size=0, dtype=None):
    n = kernel_size + bias_size
    return tfk.Sequential([
        tfp.layers.VariableLayer(n, dtype=dtype, trainable=False),
        tfp.layers.DistributionLambda(lambda t: tfd.Independent(
            tfd.Normal(loc=t, scale=1),
            reinterpreted_batch_ndims=1))
    ])

In [0]:
model = tfk.Sequential([
    tfp.layers.DenseVariational(1, posterior_mean_field, prior, kl_weight=1/x.shape[0]),
    tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=1))
])

In [0]:
optimizer = tfk.optimizers.Adam(learning_rate=0.01)
train_loss = tfk.metrics.Mean(name='train_loss')    

In [0]:
def train_step(x, y):
    with tf.GradientTape() as tape:
        ŷ = model(x)
        loss = -tf.reduce_mean(ŷ.log_prob(tf.reshape(y, ŷ.shape)))
        loss += sum(model.losses) # kl divergence loss
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)

In [9]:
for i in range(1000):
    train_step(x, y)
    if i % 200 == 0:
        print("epoch: {} loss: {}".format(i, train_loss.result().numpy()))
        train_loss.reset_states()

epoch: 0 loss: 240.15577697753906
epoch: 200 loss: 277.5927734375
epoch: 400 loss: 101.48238372802734
epoch: 600 loss: 55.88340377807617
epoch: 800 loss: 40.153587341308594
