# LFVI for conditional density estimation

In [None]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import edward as ed
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import os
import tensorflow as tf

from edward.models import Uniform
from observations import mnist
%matplotlib inline

ed.set_seed(44)
data_dir = "/tmp/data"
out_dir = "/tmp/out"
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

def generator(array, batch_size):
    """Generate batch with respect to array's first axis."""
    start = 0  # pointer to where we are in iteration
    while True:
        stop = start + batch_size
        diff = stop - array.shape[0]
        if diff <= 0:
            batch = array[start:stop]
            start += batch_size
        else:
            batch = np.concatenate((array[start:], array[:diff]))
            start = diff
            batch = batch.astype(np.float32) 
            #batch = np.random.binomial(1, batch)  # binarize images
        yield batch


# define experiment, generate toy data

In [None]:
N, M = 1000, 1000  # batch size during training
D, K = 1, 1

def gen_z(N):
    return np.random.normal(size=(N,K))/2.
def gen_x(z):
    N = z.shape[0]
    return np.exp(z)/2. - 1/2. + np.random.normal(size=(N,D))/25.

z_train, z_test = gen_z(N), gen_z(N)
x_train, x_test = gen_x(z_train), gen_x(z_test)
x_train_generator = generator(x_train, M)


# translate experiment to Edward

In [None]:
from edward.models import Normal, PointMass

# define log-ratio estimator r(x,z) (has to be flexible enough or nothing works!)
def discriminative_network(xdict, zdict, betadict):
    """Outputs probability in logits."""
    net = tf.layers.dense(tf.concat([xdict[x], zdict[z]], 1), 64, activation=tf.nn.relu)
    net = tf.layers.dense(net, 64, activation=tf.nn.relu)
    net = tf.layers.dense(net, 64, activation=tf.nn.relu)
    net = tf.layers.dense(net, 1, activation=None)
    return net


# define simple generative model
z = Normal(loc=tf.zeros([M, K]), scale=tf.ones([M,K])/2.)  # p(z)
x = Normal(loc= tf.exp(z)/2. - 0.5, scale=tf.ones([M,D])/25.)    # p(x|z)

# define simple flexible recognition model q(z|x)
def generative_network(eps):
    net = tf.layers.dense(eps, 5, activation=tf.nn.tanh)
    net = tf.layers.dense(net, 5, activation=tf.nn.tanh)
    net = tf.layers.dense(net, D, activation=None)
    #net = tf.layers.dense(eps, D, activation=None)
    return net

x_ph = tf.placeholder(tf.float32, [M, D]) # container for x_train
qz = Normal(loc=generative_network(x_ph), scale=tf.ones([K])/10.) # 'single-component MDN with fixed covariance'
#qz = Normal(loc=tf.log(2 * x_ph + 1), scale=tf.ones([M,D])/10.) # this would be the target solution


# ImplicitKLqp does everything ...
inference = ed.ImplicitKLqp(latent_vars={z: qz}, 
                            data={x: x_ph}, 
                            discriminator=discriminative_network,
                            global_vars=None)

optimizer = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9)   # optimizer for q(z|x)
optimizer_d = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9) # optimizer for r(x,z)

inference.initialize(
    optimizer=optimizer, optimizer_d=optimizer_d,
    global_step = None, #tf.Variable(0, trainable=False, name="global_step"),
    global_step_d = None, #tf.Variable(0, trainable=False, name="global_step_d"),    
    n_iter=10000, n_print=1000)

In [None]:
# start session
sess = ed.get_session()
tf.global_variables_initializer().run()

# visualize initial state of density estimator

In [None]:
idx = np.random.choice(x_test.shape[0], M, replace=False)
plt.plot(x_test[idx], qz.eval(session=sess, feed_dict={x_ph: x_test[idx]}), 'bo')
plt.plot(x_test[idx], z_test[idx], 'ko')

plt.title('q(z | x) before training')
plt.xlabel('x')
plt.ylabel('z')
plt.legend(['est.', 'true'])
plt.show()

np.corrcoef(z_test[idx].T, qz.eval(session=sess, feed_dict={x_ph: x_test[idx]}).T)[1,0]

# fit conditional density model

In [None]:

for t in range(inference.n_iter):

    x_batch = next(x_train_generator)
    info_dict = inference.update(feed_dict={x_ph: x_batch})
    inference.print_progress(info_dict)
    
plt.plot(x_test[idx], qz.eval(session=sess, feed_dict={x_ph: x_test[idx]}), 'bo')
plt.plot(x_test[idx], z_test[idx], 'ko')
plt.title('q(z | x) after training')
plt.xlabel('x')
plt.ylabel('z')
plt.legend(['est.', 'true'])
plt.show()

np.corrcoef(z_test[idx].T, qz.eval(session=sess, feed_dict={x_ph: x_test[idx]}).T)[1,0]

In [None]:
inference.increment_t

# testbench

In [None]:
import six
from edward.util import check_latent_vars, copy, get_session
from edward.models import RandomVariable

sess = ed.get_session()
discriminator=discriminative_network

scope = tf.get_default_graph().unique_name("inference")
latent_vars={z: qz}
data = {x: z}
qbeta_sample = {}
pz_sample = {}
qz_sample = {}
for z, qz in six.iteritems(latent_vars):
    # Copy local variables p(z), q(z) to draw samples
    # z' ~ p(z | beta'), z' ~ q(z | beta').
    pz_copy = copy(z, dict_swap=qbeta_sample, scope=scope)
    pz_sample[z] = pz_copy.value()
    qz_sample[z] = qz.value()

# Collect x' ~ p(x | z', beta') and x' ~ q(x).
dict_swap = qbeta_sample.copy()
dict_swap.update(qz_sample)
x_psample = {}
x_qsample = {}
for x, x_data in six.iteritems(data):
    if isinstance(x, tf.Tensor):
        if "Placeholder" not in x.op.type:
            # Copy p(x | z, beta) to get draw p(x | z', beta').
            x_copy = copy(x, dict_swap=dict_swap, scope=scope)
            x_psample[x] = x_copy
            x_qsample[x] = x_data
    elif isinstance(x, RandomVariable):
        # Copy p(x | z, beta) to get draw p(x | z', beta').
        x_copy = copy(x, dict_swap=dict_swap, scope=scope)
        x_psample[x] = x_copy.value()
        x_qsample[x] = x_data    

with tf.variable_scope("Disc"):
    r_psample = discriminator(x_psample, pz_sample, qbeta_sample)        
        
tf.global_variables_initializer().run()
        
        
x_batch_ = next(x_train_generator)
plt.plot(x_batch_, qz.eval(session=sess, feed_dict={x_ph: x_batch_}), 'bo')
plt.show()    
    
x_batch = x_train[:100]
plt.plot(x_batch, x_psample[x].eval(feed_dict={x_ph : x_train[:100]}), 'bo')
plt.show()

x_batch = x_train[:100]
plt.plot(z_train[:100], pz_sample[z].eval(feed_dict={x_ph : z_train[:100]}), 'bo')
plt.show()

with tf.variable_scope("Disc", reuse=True):
    r_psample = discriminator(x_psample, pz_sample, qbeta_sample)
    rs = r_psample.eval(feed_dict={x_ph : x_train[:100]})
    pz_sample[z].eval(feed_dict={x_ph : z_train[:100]})
    
    

In [None]:
with tf.variable_scope("Disc", reuse=True):
    r_qsample = inference.discriminator(x_psample, pz_sample, qbeta_sample)
    rs = r_qsample.eval(feed_dict={x_ph : x_train})
    pz_sample[z].eval(feed_dict={x_ph : x_train})
    
plt.plot(x_batch, rs, '.')
plt.show()

In [None]:
with tf.variable_scope("Disc", reuse=True):
    r_qsample = inference.discriminator(x_qsample, qz_sample, qbeta_sample)
    rs = r_qsample.eval(feed_dict={x_ph : x_train})
    pz_sample[z].eval(feed_dict={x_ph : x_train})
    
plt.plot(x_batch, rs, '.')
plt.show()

In [None]:
#plt.plot(x_test, z_test, 'ko')
x_batch = next(x_train_generator)
x_batch[0,0] = 10
qs = np.zeros(1000)
for i in range(1000):
    qs[i] = qz.eval(session=sess, feed_dict={x_ph : x_batch})[0,0]
plt.hist(qs)
#plt.hist(z_train, bins=np.linspace(-1, 3, 30))
    
plt.show()