# LFVI for conditional density estimation

- testing the log-denisty-ratio network

In [None]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import edward as ed
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import os
import tensorflow as tf

from edward.models import Uniform
from observations import mnist
%matplotlib inline

ed.set_seed(44)
data_dir = "/tmp/data"
out_dir = "/tmp/out"
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

def generator(array, batch_size):
    """Generate batch with respect to array's first axis."""
    start = 0  # pointer to where we are in iteration
    while True:
        stop = start + batch_size
        diff = stop - array.shape[0]
        if diff <= 0:
            batch = array[start:stop]
            start += batch_size
        else:
            batch = np.concatenate((array[start:], array[:diff]))
            start = diff
            batch = batch.astype(np.float32) 
            #batch = np.random.binomial(1, batch)  # binarize images
        yield batch


# define experiment, generate toy data

In [None]:
N, M = 1000, 1000  # batch size during training
D, K = 1, 1

def gen_z(N):
    return np.random.normal(size=(N,K))/2.
def gen_x(z):
    N = z.shape[0]
    return np.exp(z)/2. - 1/2. + np.random.normal(size=(N,D))/25.

z_train, z_test = gen_z(N), gen_z(N)
x_train, x_test = gen_x(z_train), gen_x(z_test)
x_train_generator = generator(x_train, M)


# translate experiment to Edward

In [None]:
from edward.models import Normal, PointMass

# define log-ratio estimator r(x,z) (has to be flexible enough or nothing works!)
def discriminative_network(xdict, zdict, betadict):
    """Outputs probability in logits."""
    net = tf.layers.dense(tf.concat([xdict[x], zdict[z]], 1), 64, activation=tf.nn.relu)
    net = tf.layers.dense(net, 64, activation=tf.nn.relu)
    net = tf.layers.dense(net, 1, activation=None)
    return net


# define simple generative model
z = Normal(loc=tf.zeros([M, K]), scale=tf.ones([M,K])/2.)  # p(z)
x = Normal(loc= tf.exp(z)/2. - 0.5, scale=tf.ones([M,D])/25.)    # p(x|z)

# defome recognition model
x_ph = tf.placeholder(tf.float32, [M, D]) # container for x_train/x_test
z_ph = tf.placeholder(tf.float32, [M, K]) # container for z_train/z_train
#qz = Normal(loc=tf.log(2 * x_ph + 1), scale=tf.ones([M,D])/10.) # this would be the target solution
qz = Normal(loc=-tf.cos(3*x_ph), scale=tf.ones([M,D])/10.) # this would be the target solution


In [None]:
import six
from edward.inferences.gan_inference import GANInference
from edward.models import RandomVariable
from edward.util import check_latent_vars, copy, get_session

scope = tf.get_default_graph().unique_name("inference")
discriminator=discriminative_network
qbeta_sample = {}
x_qsample, x_psample = {x : x_ph},        {x: copy(x, dict_swap=qbeta_sample, scope=scope).value()}
qz_sample, pz_sample = {z: qz.value()},   {z: copy(z, dict_swap=qbeta_sample, scope=scope).value()}

px_test, pz_test = {x: x_ph}, {z: z_ph}

with tf.variable_scope("Disc"):
    r_qsample = discriminator(x_qsample, qz_sample, qbeta_sample)
with tf.variable_scope("Disc", reuse=True):
    r_psample = discriminator(x_psample, pz_sample, qbeta_sample)
with tf.variable_scope("Disc", reuse=True):
    r_test = discriminator(px_test, pz_test, qbeta_sample)
    

def log_loss(psample, qsample):
    """Point-wise log loss."""
    loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.ones_like(psample), logits=psample) + \
        tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(qsample), logits=qsample)
    return loss
ratio_loss = log_loss
loss_d = tf.reduce_mean(ratio_loss(r_psample, r_qsample))

var_list_d = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, scope="Disc")
print('var_list_d', var_list_d)

optimizer_d = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9) # optimizer for r(x,z)

grads_d = tf.gradients(loss_d, var_list_d)
grads_and_vars_d = list(zip(grads_d, var_list_d))
train_d = optimizer_d.apply_gradients(grads_and_vars_d,
global_step=tf.Variable(0, trainable=False, name="global_step_d"))

increment_t = tf.Variable(0, trainable=False, name="iteration").assign_add(1)


In [None]:
# start session
sess = ed.get_session()
tf.global_variables_initializer().run()

feed_dict = {x_ph : x_train}


In [None]:
lss = np.zeros(10000)
for i in range(lss.size):
    _, t, loss_d_ = sess.run(
              [train_d, increment_t, loss_d], feed_dict=feed_dict)
    lss[i] = loss_d_
    
plt.semilogx(lss)
plt.show()

In [None]:
gx, gz = np.meshgrid(np.linspace(-1., 2., 25), np.linspace(-1.5, 1.5, 40))

feed_dict = {x_ph: gx.reshape(-1,1),
             z_ph: gz.reshape(-1,1)}


plt.figure(figsize=(12,12))
plt.subplot(1,2,1)
idx = np.random.choice(x_test.shape[0], M, replace=False)
plt.plot(x_test[idx], qz.eval(session=sess, feed_dict={x_ph: x_test[idx]}), 'bo')
plt.plot(x_test[idx], z_test[idx], 'ko')
plt.title('q(z | x) before training')
plt.xlabel('x')
plt.ylabel('z')
plt.legend(['est.', 'true'], loc=4)
plt.axis([-1, 2, -1.5, 1.5])

plt.subplot(1,2,2)
plt.imshow( np.flipud(r_test.eval(session=sess, feed_dict=feed_dict).reshape(40,25)) ) 
plt.colorbar()
plt.show()

np.corrcoef(z_test[idx].T, qz.eval(session=sess, feed_dict={x_ph: x_test[idx]}).T)[1,0]

# Gallery

In [None]:
# true model (up to covariance)

In [None]:
# offset +1

In [None]:
# offset -1 

In [None]:
# positive slope

In [None]:
# uncorrelated model

In [None]:
# negative slope