##### Copyright 2018 The TensorFlow Authors.

Licensed under the Apache License, Version 2.0 (the "License");

In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License"); { display-mode: "form" }
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Sigmoid Beleif Network with TFP

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://drive.google.com/file/d/1-lcrM5lV0TUmJJBRXfXajYDPGswAXkzd/view?usp=sharing"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href=""><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>
<br>
<br>
<br>

Original content [this Repository](https://github.com/blei-lab/edward/blob/master/examples/sigmoid_belief_network.py), created by [the Blei Lab](http://www.cs.columbia.edu/~blei/)

Ported to Tensorflow Probability by Matthew McAteer ([`@MatthewMcAteer0`](https://twitter.com/MatthewMcAteer0)), with help from the TFP team at  Google ([`tfprobability@tensorflow.org`](mailto:tfprobability@tensorflow.org)).

---

>[Dependencies & Prerequisites](#scrollTo=2ZtWUjXYRXQi)

>[Introduction](#scrollTo=2ZtWUjXYRXQi)

>>[Data](#scrollTo=2ZtWUjXYRXQi)

>>[Model](#scrollTo=2ZtWUjXYRXQi)

>>[Inference](#scrollTo=2ZtWUjXYRXQi)

>>[Criticism](#scrollTo=2ZtWUjXYRXQi)

>[References](#scrollTo=2ZtWUjXYRXQi)

## Dependencies & Prerequisites

In [0]:
!pip3 install -q tfp-nightly
!pip3 install -q observations

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# import edward as ed
import numpy as np
import os
import tensorflow as tf
import tensorflow_probability as tfp

# from edward.models import Bernoulli
# from edward.util import Progbar
from observations import caltech101_silhouettes
from scipy.misc import imsave

In [0]:
def session_options(enable_gpu_ram_resizing=True, enable_xla=True):
    """
    Allowing the notebook to make use of GPUs if they're available.
    
    XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear 
    algebra that optimizes TensorFlow computations.
    """
    config = tf.ConfigProto()
    config.log_device_placement = True
    if enable_gpu_ram_resizing:
        # `allow_growth=True` makes it possible to connect multiple colabs to your
        # GPU. Otherwise the colab malloc's all GPU ram.
        config.gpu_options.allow_growth = True
    if enable_xla:
        # Enable on XLA. https://www.tensorflow.org/performance/xla/.
        config.graph_options.optimizer_options.global_jit_level = (
            tf.OptimizerOptions.ON_1)
    return config


def reset_sess(config=None):
    """
    Convenience function to create the TF graph & session or reset them.
    """
    if config is None:
        config = session_options()
    global sess
    tf.reset_default_graph()
    try:
        sess.close()
    except:
        pass
    sess = tf.InteractiveSession(config=config)

    
def evaluate(tensors):
    """
    A "Universal" evaluate function for both running either Graph mode (default)
    or Eager mode (https://www.tensorflow.org/guide/eager) in Tensorflow.
    """
    if context.executing_eagerly():
        return (t.numpy() for t in tensprs)
    with tf.get_default_session() as sess:
        return sess.run(tensors)

reset_sess()


def strip_consts(graph_def, max_const_size=32):
  """
  Strip large constant values from graph_def.
  """
  strip_def = tf.GraphDef()
  for n0 in graph_def.node:
    n = strip_def.node.add()
    n.MergeFrom(n0)
    if n.op == 'Const':
      tensor = n.attr['value'].tensor
      size = len(tensor.tensor_content)
      if size > max_const_size:
        tensor.tensor_content = bytes("<stripped %d bytes>"%size, 'utf-8')
  return strip_def


def draw_graph(model, *args, **kwargs):
  """
  Visualize TensorFlow graph.
  """
  graph = tf.Graph()
  with graph.as_default():
    model(*args, **kwargs)
  graph_def = graph.as_graph_def()
  strip_def = strip_consts(graph_def, max_const_size=32)
  code = """
      <script>
        function load() {{
          document.getElementById("{id}").pbtxt = {data};
        }}
      </script>
      <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
      <div style="height:600px">
        <tf-graph-basic id="{id}"></tf-graph-basic>
      </div>
  """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

  iframe = """
      <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
  """.format(code.replace('"', '&quot;'))
  IPython.display.display(IPython.display.HTML(iframe))

In [0]:
# Visualizing the graph we've constructed
# draw_graph(linear_mixed_effects_model, features_train)

## Introduction

Sigmoid belief network (Neal, 1990) trained on the Caltech 101 Silhouettes data set.

Default settings take ~143s / epoch on a Titan X (Pascal). 

Results on epoch 100:
- Training negative log-likelihood: 209.443
- Test negative log-likelihood: 161.244
- Using n_train_samples=50 converges to test NLL of 157.824.

In [0]:
# tf.flags.DEFINE_string("data_dir", default="/tmp/data", help="")
# tf.flags.DEFINE_string("out_dir", default="/tmp/out", help="")
# tf.flags.DEFINE_integer("batch_size", default=24, help="Batch size during training.")
# tf.flags.DEFINE_list("hidden_sizes", default=[300, 100, 50, 10], help="Hidden size per layer from bottom-up.")
# tf.flags.DEFINE_integer("n_train_samples", default=10, help="Number of samples for training.")
# tf.flags.DEFINE_integer("n_test_samples", default=1000, help="Number of samples to calculate test log-lik.")
# tf.flags.DEFINE_float("step_size", default=1e-3, help="Learning rate step size.")
# tf.flags.DEFINE_integer("n_epoch", default=100, help="")
# tf.flags.DEFINE_integer("n_iter_per_epoch", default=10000, help="")
# FLAGS = tf.flags.FLAGS

data_dir = "/tmp/data"
out_dir = "/tmp/out"
batch_size = 24                   # Batch size during training
hidden_sizes = [300, 100, 50, 10] # Hidden size per layer from bottom-up
n_train_samples = 10              # Number of samples for training
n_test_samples = 1000             # Number of samples to calculate test log-lik
step_size = 1e-3                  # Learning rate step size
n_epoch = 100
n_iter_per_epoch = 10000

if not os.path.exists(out_dir):
    os.makedirs(out_dir)

In [0]:
def generator(array, batch_size):
    """
    Generate batch with respect to array's first axis.
    """
    start = 0  # pointer to where we are in iteration
    while True:
        stop = start + batch_size
        diff = stop - array.shape[0]
        if diff <= 0:
            batch = array[start:stop]
            start += batch_size
        else:
            batch = np.concatenate((array[start:], array[:diff]))
            start = diff
        yield batch

### Data

In [0]:
# ed.set_seed(42)

(x_train, _), (x_test, _), (x_valid, _) = caltech101_silhouettes(
      data_dir)
x_train_generator = generator(x_train, batch_size)
x_ph = tf.placeholder(tf.int32, [None, 28 * 28])

### Model

In [0]:
s = [0] * len(hidden_sizes)
for l in reversed(range(len(hidden_sizes))):
    if l == len(hidden_sizes) - 1:
          logits = tf.zeros([tf.shape(x_ph)[0], hidden_sizes[l]])
    else:
          logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32),
                               hidden_sizes[l], activation=None)
    zs[l] = tfd.Bernoulli(logits=logits)

x = tfd.Bernoulli(logits=tf.layers.dense(tf.cast(zs[0], tf.float32),
                                       28 * 28, activation=None))

### Inference

In [0]:
# Define variational model with reverse ordering as probability model:
# if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up.
qzs = [0] * len(hidden_sizes)
for l in range(len(hidden_sizes)):
    if l == 0:
          logits = tf.layers.dense(tf.cast(x_ph, tf.float32),
                               hidden_sizes[l], activation=None)
    else:
          logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32),
                               hidden_sizes[l], activation=None)
    qzs[l] = tfd.Bernoulli(logits=logits)

inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph})
optimizer = tf.train.AdamOptimizer(step_size)
inference.initialize(optimizer=optimizer, n_samples=n_train_samples)

# Build tensor for log-likelihood given one variational sample to run
# on test data.
x_post = tf.copy(x, {z: qz for z, qz in zip(zs, qzs)})
x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) /
                    tf.cast(tf.shape(x_ph)[0], tf.float32))


tf.global_variables_initializer().run()

In [0]:
sess = tf.Session()

for epoch in range(n_epoch):
    print("Epoch {}".format(epoch))
    train_loss = 0.0

    pbar = Progbar(n_iter_per_epoch)
    for t in range(1, n_iter_per_epoch + 1):
        pbar.update(t)
        x_batch = next(x_train_generator)
        info_dict = inference.update(feed_dict={x_ph: x_batch})
        train_loss += info_dict['loss']

    # Print per-data point loss, averaged over training epoch.
    train_loss /= n_iter_per_epoch
    train_loss /= batch_size
    print("Training negative log-likelihood: {:0.3f}".format(train_loss))

    test_loss = [sess.run(x_neg_log_prob, {x_ph: x_test})
                 for _ in range(n_test_samples)]
    test_loss = np.mean(test_loss)
    print("Test negative log-likelihood: {:0.3f}".format(test_loss))

    # Prior predictive check.
    images = sess.run(x, {x_ph: x_batch})  # feed ph to determine sample size
    for m in range(batch_size):
        imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))


## Reference

1. Sigmoid belief network (Neal, 1990)

In [0]:
from IPython.core.display import HTML
def css_styling():
    styles = open("../styles/custom.css", "r").read()
    return HTML(styles)
css_styling()

#  "#F15854",  // red
#  "#5DA5DA",  // blue
#  "#FAA43A",  // orange
#  "#60BD68",  // green
#  "#F17CB0",  // pink
#  "#B2912F",  // brown
#  "#B276B2",  // purple
#  "#DECF3F",  // yellow
#  "#4D4D4D",  // gray