##### Copyright 2018 The TensorFlow Authors.

Licensed under the Apache License, Version 2.0 (the "License");

In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License"); { display-mode: "form" }
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Probabilistic Matrix Factorization with TFP

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://drive.google.com/file/d/1xcKgtdaCFO33-g70tbfzVlIh_-NnDU6t/view?usp=sharing"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href=""><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>
<br>
<br>
<br>

Original content [this Repository](https://github.com/blei-lab/edward), created by [the Blei Lab](http://www.cs.columbia.edu/~blei/)

Ported to Tensorflow Probability by Matthew McAteer ([`@MatthewMcAteer0`](https://twitter.com/MatthewMcAteer0)), with help from the TFP team at  Google ([`tfprobability@tensorflow.org`](mailto:tfprobability@tensorflow.org)).

---

>[Dependencies & Prerequisites](#scrollTo=2ZtWUjXYRXQi)

>[Introduction](#scrollTo=2ZtWUjXYRXQi)

>>[Data](#scrollTo=2ZtWUjXYRXQi)

>>[Model](#scrollTo=2ZtWUjXYRXQi)

>>[Inference](#scrollTo=2ZtWUjXYRXQi)

>>[Criticism](#scrollTo=2ZtWUjXYRXQi)

>[References](#scrollTo=2ZtWUjXYRXQi)



## Dependencies & Prerequisites

In [0]:
!pip3 install -q tfp-nightly
!pip3 install -q observations

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# import edward as ed
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp

tfd = tfp.distributions

# from edward.models import Normal

In [0]:
def session_options(enable_gpu_ram_resizing=True, enable_xla=True):
    """
    Allowing the notebook to make use of GPUs if they're available.
    
    XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear 
    algebra that optimizes TensorFlow computations.
    """
    config = tf.ConfigProto()
    config.log_device_placement = True
    if enable_gpu_ram_resizing:
        # `allow_growth=True` makes it possible to connect multiple colabs to your
        # GPU. Otherwise the colab malloc's all GPU ram.
        config.gpu_options.allow_growth = True
    if enable_xla:
        # Enable on XLA. https://www.tensorflow.org/performance/xla/.
        config.graph_options.optimizer_options.global_jit_level = (
            tf.OptimizerOptions.ON_1)
    return config


def reset_sess(config=None):
    """
    Convenience function to create the TF graph & session or reset them.
    """
    if config is None:
        config = session_options()
    global sess
    tf.reset_default_graph()
    try:
        sess.close()
    except:
        pass
    sess = tf.InteractiveSession(config=config)

    
def evaluate(tensors):
    """
    A "Universal" evaluate function for both running either Graph mode (default)
    or Eager mode (https://www.tensorflow.org/guide/eager) in Tensorflow.
    """
    if context.executing_eagerly():
        return (t.numpy() for t in tensprs)
    with tf.get_default_session() as sess:
        return sess.run(tensors)

reset_sess()


def strip_consts(graph_def, max_const_size=32):
  """
  Strip large constant values from graph_def.
  """
  strip_def = tf.GraphDef()
  for n0 in graph_def.node:
    n = strip_def.node.add()
    n.MergeFrom(n0)
    if n.op == 'Const':
      tensor = n.attr['value'].tensor
      size = len(tensor.tensor_content)
      if size > max_const_size:
        tensor.tensor_content = bytes("<stripped %d bytes>"%size, 'utf-8')
  return strip_def


def draw_graph(model, *args, **kwargs):
  """
  Visualize TensorFlow graph.
  """
  graph = tf.Graph()
  with graph.as_default():
    model(*args, **kwargs)
  graph_def = graph.as_graph_def()
  strip_def = strip_consts(graph_def, max_const_size=32)
  code = """
      <script>
        function load() {{
          document.getElementById("{id}").pbtxt = {data};
        }}
      </script>
      <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
      <div style="height:600px">
        <tf-graph-basic id="{id}"></tf-graph-basic>
      </div>
  """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

  iframe = """
      <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
  """.format(code.replace('"', '&quot;'))
  IPython.display.display(IPython.display.HTML(iframe))

## Introduction

Probabilistic matrix factorization using variational inference. Visualizes the actual and the estimated rating matrices as heatmaps.

In [0]:
# tf.flags.DEFINE_integer("N", default=50, help="Number of users.")
# tf.flags.DEFINE_integer("M", default=60, help="Number of movies.")
# tf.flags.DEFINE_integer("D", default=3, help="Number of latent factors.")
# FLAGS = tf.flags.FLAGS

N = 50 # Number of users
M = 60 # Number of movies
D = 3  # Number of latent factors

In [0]:
def build_toy_dataset(U, V, N, M, noise_std=0.1):
    R = np.dot(np.transpose(U), V) + np.random.normal(0, noise_std, size=(N, M))
    return R


def get_indicators(N, M, prob_std=0.5):
    ind = np.random.binomial(1, prob_std, (N, M))
    return ind


### True Latent Factors

In [0]:
U_true = np.random.randn(D, N)
V_true = np.random.randn(D, M)

### Data

In [0]:
R_true = build_toy_dataset(U_true, V_true, N, M)
I_train = get_indicators(N, M)
I_test = 1 - I_train

### Model

In [0]:
I = tf.placeholder(tf.float32, [N, M])
U = Normal(loc=0.0, scale=1.0, sample_shape=[D, N])
V = Normal(loc=0.0, scale=1.0, sample_shape=[D, M])
R = Normal(loc=tf.matmul(tf.transpose(U), V) * I,
             scale=tf.ones([N, M]))


### Inference

In [0]:
qU = tfd.Normal(loc=tf.get_variable("qU/loc", [D, N]),
              scale=tf.nn.softplus(
                  tf.get_variable("qU/scale", [D, N])))
qV = tfd.Normal(loc=tf.get_variable("qV/loc", [D, M]),
              scale=tf.nn.softplus(
                  tf.get_variable("qV/scale", [D, M])))

inference = ed.KLqp({U: qU, V: qV}, data={R: R_true, I: I_train})
inference.run()

### Criticism

In [0]:
qR = tfd.Normal(loc=tf.matmul(tf.transpose(qU), qV),
              scale=tf.ones([N, M]))

print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error', data={qR: R_true, I: I_test}))

plt.imshow(R_true, cmap='hot')
plt.show()

R_est = tf.matmul(tf.transpose(qU), qV).eval()
plt.imshow(R_est, cmap='hot')
plt.show()


In [0]:
# Visualizing the graph we've constructed
# draw_graph(linear_mixed_effects_model, features_train)

## References

1. 

In [0]:
from IPython.core.display import HTML
def css_styling():
    styles = open("../styles/custom.css", "r").read()
    return HTML(styles)
css_styling()

#  "#F15854",  // red
#  "#5DA5DA",  // blue
#  "#FAA43A",  // orange
#  "#60BD68",  // green
#  "#F17CB0",  // pink
#  "#B2912F",  // brown
#  "#B276B2",  // purple
#  "#DECF3F",  // yellow
#  "#4D4D4D",  // gray