##### Copyright 2018 The TensorFlow Authors.

Licensed under the Apache License, Version 2.0 (the "License");

In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License"); { display-mode: "form" }
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Stochastic Block Model with TFP

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://drive.google.com/file/d/1N-Su0zIkLNFf-qPeCFipf1imrb2ErUgD/view?usp=sharing"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href=""><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>
<br>
<br>
<br>


Original content [this Repository](https://github.com/blei-lab/edward), created by [the Blei Lab](http://www.cs.columbia.edu/~blei/)

Ported to Tensorflow Probability by Matthew McAteer ([`@MatthewMcAteer0`](https://twitter.com/MatthewMcAteer0)), with help from the TFP team at  Google ([`tfprobability@tensorflow.org`](mailto:tfprobability@tensorflow.org)).

---

>[Dependencies & Prerequisites](#scrollTo=2ZtWUjXYRXQi)

>[Introduction](#scrollTo=2ZtWUjXYRXQi)

>>[Data](#scrollTo=2ZtWUjXYRXQi)

>>[Model](#scrollTo=2ZtWUjXYRXQi)

>>[Inference](#scrollTo=2ZtWUjXYRXQi)

>>[Criticism](#scrollTo=2ZtWUjXYRXQi)

>[References](#scrollTo=2ZtWUjXYRXQi)


## Dependencies & Prerequisites

In [0]:
!pip3 install -q tfp-nightly
!pip3 install -q observations

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import time
from observations.karate import karate
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import seaborn as sns
from IPython.core.pylabtools import figsize

import tensorflow as tf                            # importing Tensorflow

import tensorflow_probability as tfp               # Tensorflow probability
from tensorflow_probability import edward2 as ed   # Edwardlib extension

from sklearn.metrics.cluster import adjusted_rand_score

tfd = tf.contrib.distributions             # Basic probability distribution toolkit
tfb = tf.contrib.distributions.bijectors   # and their modifiers

dtype = np.float32    # A tool to make sure we're inputing the right data type

%matplotlib inline
plt.style.use('fivethirtyeight')        # Styling plots like FiveThirtyEight

%config InlineBackend.figure_format='retina' # improves resolution of plots

import warnings
warnings.filterwarnings('ignore')       # Some python imports raise depreciation warnings

In [0]:
def session_options(enable_gpu_ram_resizing=True, enable_xla=True):
    """
    Allowing the notebook to make use of GPUs if they're available.
    
    XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear 
    algebra that optimizes TensorFlow computations.
    """
    config = tf.ConfigProto()
    config.log_device_placement = True
    if enable_gpu_ram_resizing:
        # `allow_growth=True` makes it possible to connect multiple colabs to your
        # GPU. Otherwise the colab malloc's all GPU ram.
        config.gpu_options.allow_growth = True
    if enable_xla:
        # Enable on XLA. https://www.tensorflow.org/performance/xla/.
        config.graph_options.optimizer_options.global_jit_level = (
            tf.OptimizerOptions.ON_1)
    return config


def reset_sess(config=None):
    """
    Convenience function to create the TF graph & session or reset them.
    """
    if config is None:
        config = session_options()
    global sess
    tf.reset_default_graph()
    try:
        sess.close()
    except:
        pass
    sess = tf.InteractiveSession(config=config)

    
def evaluate(tensors):
    """
    A "Universal" evaluate function for both running either Graph mode (default)
    or Eager mode (https://www.tensorflow.org/guide/eager) in Tensorflow.
    """
    if context.executing_eagerly():
        return (t.numpy() for t in tensprs)
    with tf.get_default_session() as sess:
        return sess.run(tensors)

reset_sess()


def strip_consts(graph_def, max_const_size=32):
  """
  Strip large constant values from graph_def.
  """
  strip_def = tf.GraphDef()
  for n0 in graph_def.node:
    n = strip_def.node.add()
    n.MergeFrom(n0)
    if n.op == 'Const':
      tensor = n.attr['value'].tensor
      size = len(tensor.tensor_content)
      if size > max_const_size:
        tensor.tensor_content = bytes("<stripped %d bytes>"%size, 'utf-8')
  return strip_def


def draw_graph(model, *args, **kwargs):
  """
  Visualize TensorFlow graph.
  """
  graph = tf.Graph()
  with graph.as_default():
    model(*args, **kwargs)
  graph_def = graph.as_graph_def()
  strip_def = strip_consts(graph_def, max_const_size=32)
  code = """
      <script>
        function load() {{
          document.getElementById("{id}").pbtxt = {data};
        }}
      </script>
      <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
      <div style="height:600px">
        <tf-graph-basic id="{id}"></tf-graph-basic>
      </div>
  """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

  iframe = """
      <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
  """.format(code.replace('"', '&quot;'))
  IPython.display.display(IPython.display.HTML(iframe))

Introduction

### Data & parameters

  Load Zachary's Karate Club [@zachary1977information].
  It is a social network of friendships between 34 members of a karate
  club at a US university from 1970 to 1972. During the study a
  conflict between instructor 'Mr. Hi' and administrator 'Officer' led
  the club to split into two. Half of the members formed a new club
  around 'Mr.  Hi'; other members found a new instructor or quit karate.
  Args:
    path: str.
      Path to directory which either stores file or otherwise file will
      be downloaded and extracted there. Filename is `out.ucidata-zachary`.
  Returns:
    Tuple of adjacency matrix as a np.darray `x_train` with 34 rows
    and 34 columns and np.darray `y_train` of class memberships (0 for
    'Mr.Hi' and 1 for 'Officer').
 

[Observations](https://github.com/edwardlib/observations) provides
a one line Python API for loading standard data sets in machine
learning. It automates the process from downloading, extracting,
loading, and preprocessing data. Observations helps keep the workflow
reproducible and follow sensible standards.
Observations is a standalone Python library and must be installed
separate from Edward.

In [0]:
x_data, z = karate('~/data')

In [0]:
X_data, Z_true = karate("~/data")
N = X_data.shape[0]  # number of vertices
K = 2  # number of clusters
learning_rate = 1e-4
max_steps = 10000
epsilon= 0.001

def compute_loss(latent_vars, data):
    """ 
    Compute the loss associated with MAP calculations.
    """
    dict_vals = {var: latent.value for var, latent in latent_vars.items()}
    for x, datum in data.items(): # assuming we have the values as input
        dict_vals[x] = datum

    log_prob = 0.0
    for z in latent_vars.keys():
        z_copy = z.distribution.copy()
        log_prob += tf.reduce_sum(z_copy.log_prob(dict_vals[z]))
        
    for x in data.keys():
        x_copy = x.distribution.copy()
        log_prob += tf.reduce_sum(x_copy.log_prob(dict_vals[x]))
            
    reg_penalty = tf.reduce_sum(tf.losses.get_regularization_losses())
    loss = -log_prob + reg_penalty

    return loss

### Model

In [0]:
gamma = tfd.Dirichlet(tf.ones([K]))
pi = tfd.Beta(tf.ones([K,K]), tf.ones([K,K]))
z = tfd.Multinomial(tf.ones([V]), gamma)
x = tfd.Bernoulli(tf.matmul(z, tf.matmul(pi, tf.transpose(z))))

# gamma = ed.Dirichlet(tf.ones([K]))
# pi = ed.Beta(tf.ones([K,K]), tf.ones([K,K]))
# z = ed.Multinomial(tf.ones([V]), gamma)
# x = ed.Bernoulli(tf.matmul(z, tf.matmul(pi, tf.transpose(z))))

### Define the MAP loss

In [0]:
qgamma = tfd.VectorDeterministic(tf.nn.softmax(tf.get_variable('qgamma/params', [K]))) # Gamma must sum to one
qpi = tfd.VectorDeterministic(tf.nn.sigmoid(tf.get_variable('qpi/params', [K, K]))) # Each pi must be between 0 and 1
qz = tfd.VectorDeterministic(tf.nn.softmax(tf.get_variable('qz/params', [V,K]), axis=1)) # the Z_i must sum to one row-wise (axis 0)

# qgamma = ed.VectorDeterministic(tf.nn.softmax(tf.get_variable('qgamma/params', [K]))) # Gamma must sum to one
# qpi = ed.VectorDeterministic(tf.nn.sigmoid(tf.get_variable('qpi/params', [K, K]))) # Each pi must be between 0 and 1
# qz = ed.VectorDeterministic(tf.nn.softmax(tf.get_variable('qz/params', [V,K]), axis=1)) # the Z_i must sum to one row-wise (axis 0)

latent_vars = {gamma: qgamma, pi: qpi, z: qz}
data = {x: x_data}
MAP_loss = compute_loss(latent_vars, data)

### Inference

In [0]:
optimizer = tf.train.AdagradOptimizer(learning_rate)
train_op = optimizer.minimize(MAP_loss)

loss_vals = []
with tf.Session() as session:
    start = time.time()
    session.run(tf.global_variables_initializer())
     for step in range(max_steps):
        _, loss_value = session.run([train_op, MAP_loss])
        duration = time.time() - start
        if step % 100 == 0:
            print("Step: {:>3d} Loss: {:.3f} ({:.3f} sec)".format(step, 
                                                                  loss_value,
                                                                  duration))
        if step > 0:
            if abs(loss_vals[-1]-loss_value) <  epsilon:
                break
        loss_vals.append(loss_value)

fig = plt.figure()
plt.plot(range(len(loss_vals)), loss_vals)
plt.show()

In [0]:
# Visualizing the graph we've constructed
# draw_graph(linear_mixed_effects_model, features_train)

## References

1. 

In [0]:
from IPython.core.display import HTML
def css_styling():
    styles = open("../styles/custom.css", "r").read()
    return HTML(styles)
css_styling()

#  "#F15854",  // red
#  "#5DA5DA",  // blue
#  "#FAA43A",  // orange
#  "#60BD68",  // green
#  "#F17CB0",  // pink
#  "#B2912F",  // brown
#  "#B276B2",  // purple
#  "#DECF3F",  // yellow
#  "#4D4D4D",  // gray
