In [28]:
import scipy as sc
import pandas as pd 
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import tensorflow_probability as tfp
import tensorflow as tf

from src.vi import * 

In [14]:
tfd = tfp.distributions

# Define parameters
N = 10  # Number of datapoints
M = 4  # Dimensionality latent variables
D = 5  # Dimension of initial space 

def probas(N, M, D):
    ###TODO: verifier que 'on veut vraiment des distributions (avec dimension comprenant les n_samples ou uni-dim)
    with tf.compat.v1.variable_scope(None, default_name="posterior"):
        latent_vars = {}
        # Prior distributions
        # z ~ Multivariate Normal distribution
        z_prior = tfd.MultivariateNormalFullCovariance(loc=tf.zeros([N, M]),covariance_matrix=tf.eye(M))
        latent_vars['z_prior'] = z_prior
        z = z_prior.sample()
        proba_z = tf.reduce_prod(z_prior.prob(z)).numpy() # p_z
        exp_log_proba_z = tf.reduce_prod(tf.exp(z_prior.log_prob(z))) # exp(log_p_z)
        print("p_z: ", proba_z, exp_log_proba_z.numpy())

        # alpha ~ Inverse Gamma distribution
        alpha_prior = tfd.InverseGamma(concentration=1.0, scale=tf.ones(M))
        a = alpha_prior.sample()
        latent_vars['alpha_prior'] = alpha_prior
        proba_alpha = tf.reduce_prod(alpha_prior.prob(a)).numpy()
        print("p_alpha: ", proba_alpha)

        # sigma ~ Log-normal distribution
        sigma_prior = tfd.LogNormal(loc=0.0,scale=1.0)
        latent_vars['sigma_prior'] = sigma_prior
        s = sigma_prior.sample()
        proba_sigma = sigma_prior.prob(s).numpy()
        print("p_sigma: ", proba_sigma)

        # w ~ Multivariate Normal distribution
        w_prior = tfd.MultivariateNormalFullCovariance(loc=tf.zeros([D, M]),covariance_matrix=sigma_prior.sample() * tf.linalg.diag(alpha_prior.sample()))
        latent_vars['w_prior'] = w_prior
        w = w_prior.sample()
        proba_w = tf.reduce_prod(w_prior.prob(w)).numpy()
        print("proba_w: ", proba_w)

        lik = []
        proba_lik = 1
        for i in range(N):
            # Define the multivariate normal distribution
            mvn = tfd.MultivariateNormalDiag(loc=tf.tensordot(w, z[i], axes=1), scale_diag=s * tf.ones([D]))
            
            # Sample from the distribution
            obs = mvn.sample()
            
            # Compute the log probability of the observation under the distribution
            log_prob = mvn.log_prob(obs)
            
            # Append the observation and its log probability
            lik.append(obs)
            proba_lik *= tf.exp(log_prob)

        # Convert proba_lik to a scalar value
        # latent_vars['w_prior'] = w_prior
        proba_lik = tf.squeeze(proba_lik).numpy()
        print("proba_lik: ", proba_lik)
    
    '''  
    print("proba_lik: ",proba_lik)
    priors = [z,alpha,sigma,w, lik]
    proba_priors = [proba_z, proba_alpha, proba_sigma, proba_w, proba_lik]

    #joint distribution 
    p_theta = proba_z*proba_alpha*proba_sigma*proba_w
    jd = p_theta*proba_lik
    print("p_jd: ",jd)'''
    return latent_vars 

In [33]:
def on_process(n_samples, data, latent_variables): 
    # Initialize log probabilities
    p_log_prob = tf.constant(0.0, shape=(n_samples,))
    q_log_prob = tf.constant(0.0, shape=(n_samples,))

    base_scope = tf.Graph().as_default().unique_name("inference") + '/'

    for s in range(n_samples): 
        scope = base_scope + tf.get_default_graph().unique_name("sample")
        dict_swap = {}
        # Iterate over observed variables
        for x, qx in data.items():
            if isinstance(x, tfp.distributions.Distribution):
                with tf.name_scope(scope): 
                    # Construct a new random variable with the same distribution 
                    qx_copy = tf.identity(qx)
                dict_swap[x] = qx_copy.sample()
                
        # Iterate over latent variables
        for z, qz in latent_variables.items():
            with tf.name_scope(scope):  # Ensure unique scope for each latent variable
                qz_copy = tf.identity(qz)
            dict_swap[z] = qz_copy.sample()
            if hasattr(qz_copy, 'log_prob'):  # Check if log_prob function exists
                q_log_prob[s] += tf.reduce_sum(inference.scale.get(z, 1.0) * qz_copy.log_prob(dict_swap[z]))
            
            # Option a moi
            z_copy = create_copy(z, dict_swap, scope=scope)
            if hasattr(z_copy, 'log_prob'):  # Check if log_prob function exists
                p_log_prob[s] += tf.reduce_sum(inference.scale.get(z, 1.0) * z_copy.log_prob(z_copy.value()))

            # Option nouvelle 
        '''  for z, _ in latent_vars.items():
        z_copy = create_copy(z, dict_swap, scope=scope)
        if hasattr(z_copy, 'log_prob'):  # Check if log_prob function exists
                p_log_prob[s] += tf.reduce_sum(inference.scale.get(z, 1.0) * z_copy.log_prob(z_copy.value()))
    '''
        
        for x, _ in data.items():
            x_copy = create_copy(x, dict_swap, scope=scope)
            p_log_prob[s] += tf.reduce_sum(
                inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
            
    # Update log probabilities
    p_log_prob = tf.reduce_mean(p_log_prob)
    q_log_prob = tf.reduce_mean(q_log_prob)
    reg_penalty = tf.reduce_sum(tf.losses.get_regularization_losses())

    losses = p_log_prob - q_log_prob
    loss = -(p_log_prob - q_log_prob - reg_penalty)

    q_vars = [v for v in var_list if len(get_descendants(tf.convert_to_tensor(v), q_rvs)) != 0]
    q_grads = tf.gradients(-(tf.reduce_mean(q_log_prob * tf.stop_gradient(losses)) - reg_penalty),q_vars)
    p_vars = [v for v in var_list if v not in q_vars]
    p_grads = tf.gradients(loss, p_vars)
    grads_and_vars = list(zip(q_grads, q_vars)) + list(zip(p_grads, p_vars))
    
    return  loss, grads_and_vars

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import six
import tensorflow as tf

from edward.inferences.variational_inference import VariationalInference
from edward.models import RandomVariable
from edward.util import copy, get_descendants


  ild_reparam_kl_loss_and_gradients(self, var_list)


class ReparameterizationEntropyKLqp(VariationalInference):
  """Variational inference with the KL divergence

  $\\text{KL}( q(z; \lambda) \| p(z \mid x) ).$

  This class minimizes the objective using the reparameterization
  gradient and an analytic entropy term.

  The objective function also adds to itself a summation over all
  tensors in the `REGULARIZATION_LOSSES` collection.
  """
  def __init__(self, latent_vars=None, data=None):
    """Create an inference algorithm.

    Args:
      latent_vars: list of RandomVariable or
                   dict of RandomVariable to RandomVariable.
        Collection of random variables to perform inference on. If
        list, each random variable will be implictly optimized using a
        `Normal` random variable that is defined internally with a
        free parameter per location and scale and is initialized using
        standard normal draws. The random variables to approximate
        must be continuous.
    """
    if isinstance(latent_vars, list):
      with tf.variable_scope(None, default_name="posterior"):
        latent_vars_dict = {}
        continuous = \
            ('01', 'nonnegative', 'simplex', 'real', 'multivariate_real')
        for z in latent_vars:
          if not hasattr(z, 'support') or z.support not in continuous:
            raise AttributeError(
                "Random variable {} is not continuous or a random "
                "variable with supported continuous support.".format(z))
          batch_event_shape = z.batch_shape.concatenate(z.event_shape)
          loc = tf.Variable(tf.random_normal(batch_event_shape))
          scale = tf.nn.softplus(
              tf.Variable(tf.random_normal(batch_event_shape)))
          latent_vars_dict[z] = Normal(loc=loc, scale=scale)
        latent_vars = latent_vars_dict
        del latent_vars_dict

    super(ReparameterizationEntropyKLqp, self).__init__(latent_vars, data)

  def initialize(self, n_samples=1, *args, **kwargs):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Args:
      n_samples: int.
        Number of samples from variational model for calculating
        stochastic gradients.
    """
    if n_samples <= 0:
      raise ValueError(
          "n_samples should be greater than zero: {}".format(n_samples))
    self.n_samples = n_samples
    return super(ReparameterizationEntropyKLqp, self).initialize(
        *args, **kwargs)


class ScoreKLqp(VariationalInference):
  """Variational inference with the KL divergence

  $\\text{KL}( q(z; \lambda) \| p(z \mid x) ).$

  This class minimizes the objective using the score function
  gradient.

  The objective function also adds to itself a summation over all
  tensors in the `REGULARIZATION_LOSSES` collection.
  """
  def __init__(self, latent_vars=None, data=None):
    """Create an inference algorithm.

    Args:
      latent_vars: list of RandomVariable or
                   dict of RandomVariable to RandomVariable.
        Collection of random variables to perform inference on. If
        list, each random variable will be implictly optimized using a
        `Normal` random variable that is defined internally with a
        free parameter per location and scale and is initialized using
        standard normal draws. The random variables to approximate
        must be continuous.
    """
    if isinstance(latent_vars, list):
      with tf.variable_scope(None, default_name="posterior"):
        latent_vars_dict = {}
        continuous = \
            ('01', 'nonnegative', 'simplex', 'real', 'multivariate_real')
        for z in latent_vars:
          if not hasattr(z, 'support') or z.support not in continuous:
            raise AttributeError(
                "Random variable {} is not continuous or a random "
                "variable with supported continuous support.".format(z))
          batch_event_shape = z.batch_shape.concatenate(z.event_shape)
          loc = tf.Variable(tf.random_normal(batch_event_shape))
          scale = tf.nn.softplus(
              tf.Variable(tf.random_normal(batch_event_shape)))
          latent_vars_dict[z] = Normal(loc=loc, scale=scale)
        latent_vars = latent_vars_dict
        del latent_vars_dict

    super(ScoreKLqp, self).__init__(latent_vars, data)

  def initialize(self, n_samples=1, *args, **kwargs):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Args:
      n_samples: int.
        Number of samples from variational model for calculating
        stochastic gradients.
    """
    if n_samples <= 0:
      raise ValueError(
          "n_samples should be greater than zero: {}".format(n_samples))
    self.n_samples = n_samples
    return super(ScoreKLqp, self).initialize(*args, **kwargs)

  def build_loss_and_gradients(self, var_list):
    return build_score_loss_and_gradients(self, var_list)


class ScoreKLKLqp(VariationalInference):
  """Variational inference with the KL divergence

  $\\text{KL}( q(z; \lambda) \| p(z \mid x) ).$

  This class minimizes the objective using the score function gradient
  and an analytic KL term.

  The objective function also adds to itself a summation over all
  tensors in the `REGULARIZATION_LOSSES` collection.
  """
  def __init__(self, latent_vars=None, data=None):
    """Create an inference algorithm.

    Args:
      latent_vars: list of RandomVariable or
                   dict of RandomVariable to RandomVariable.
        Collection of random variables to perform inference on. If
        list, each random variable will be implictly optimized using a
        `Normal` random variable that is defined internally with a
        free parameter per location and scale and is initialized using
        standard normal draws. The random variables to approximate
        must be continuous.
    """
    if isinstance(latent_vars, list):
      with tf.variable_scope(None, default_name="posterior"):
        latent_vars_dict = {}
        continuous = \
            ('01', 'nonnegative', 'simplex', 'real', 'multivariate_real')
        for z in latent_vars:
          if not hasattr(z, 'support') or z.support not in continuous:
            raise AttributeError(
                "Random variable {} is not continuous or a random "
                "variable with supported continuous support.".format(z))
          batch_event_shape = z.batch_shape.concatenate(z.event_shape)
          loc = tf.Variable(tf.random_normal(batch_event_shape))
          scale = tf.nn.softplus(
              tf.Variable(tf.random_normal(batch_event_shape)))
          latent_vars_dict[z] = Normal(loc=loc, scale=scale)
        latent_vars = latent_vars_dict
        del latent_vars_dict

    super(ScoreKLKLqp, self).__init__(latent_vars, data)

  def initialize(self, n_samples=1, kl_scaling=None, *args, **kwargs):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Args:
      n_samples: int.
        Number of samples from variational model for calculating
        stochastic gradients.
      kl_scaling: dict of RandomVariable to tf.Tensor.
        Provides option to scale terms when using ELBO with KL divergence.
        If the KL divergence terms are

        $\\alpha_p \mathbb{E}_{q(z\mid x, \lambda)} [
              \log q(z\mid x, \lambda) - \log p(z)],$

        then pass {$p(z)$: $\\alpha_p$} as `kl_scaling`,
        where $\\alpha_p$ is a tensor. Its shape must be broadcastable;
        it is multiplied element-wise to the batchwise KL terms.
    """
    if kl_scaling is None:
      kl_scaling = {}
    if n_samples <= 0:
      raise ValueError(
          "n_samples should be greater than zero: {}".format(n_samples))
    self.n_samples = n_samples
    self.kl_scaling = kl_scaling
    return super(ScoreKLKLqp, self).initialize(*args, **kwargs)

  def build_loss_and_gradients(self, var_list):
    return build_score_kl_loss_and_gradients(self, var_list)


class ScoreEntropyKLqp(VariationalInference):
  """Variational inference with the KL divergence

  $\\text{KL}( q(z; \lambda) \| p(z \mid x) ).$

  This class minimizes the objective using the score function gradient
  and an analytic entropy term.

  The objective function also adds to itself a summation over all
  tensors in the `REGULARIZATION_LOSSES` collection.
  """
  def __init__(self, latent_vars=None, data=None):
    """Create an inference algorithm.

    Args:
      latent_vars: list of RandomVariable or
                   dict of RandomVariable to RandomVariable.
        Collection of random variables to perform inference on. If
        list, each random variable will be implictly optimized using a
        `Normal` random variable that is defined internally with a
        free parameter per location and scale and is initialized using
        standard normal draws. The random variables to approximate
        must be continuous.
    """
    if isinstance(latent_vars, list):
      with tf.variable_scope(None, default_name="posterior"):
        latent_vars_dict = {}
        continuous = \
            ('01', 'nonnegative', 'simplex', 'real', 'multivariate_real')
        for z in latent_vars:
          if not hasattr(z, 'support') or z.support not in continuous:
            raise AttributeError(
                "Random variable {} is not continuous or a random "
                "variable with supported continuous support.".format(z))
          batch_event_shape = z.batch_shape.concatenate(z.event_shape)
          loc = tf.Variable(tf.random_normal(batch_event_shape))
          scale = tf.nn.softplus(
              tf.Variable(tf.random_normal(batch_event_shape)))
          latent_vars_dict[z] = Normal(loc=loc, scale=scale)
        latent_vars = latent_vars_dict
        del latent_vars_dict

    super(ScoreEntropyKLqp, self).__init__(latent_vars, data)

  def initialize(self, n_samples=1, *args, **kwargs):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Args:
      n_samples: int.
        Number of samples from variational model for calculating
        stochastic gradients.
    """
    if n_samples <= 0:
      raise ValueError(
          "n_samples should be greater than zero: {}".format(n_samples))
    self.n_samples = n_samples
    return super(ScoreEntropyKLqp, self).initialize(*args, **kwargs)

  def build_loss_and_gradients(self, var_list):
    return build_score_entropy_loss_and_gradients(self, var_list)



def build_reparam_loss_and_gradients(inference, var_list):
  """Build loss function. Its automatic differentiation
  is a stochastic gradient of

  $-\\text{ELBO} =
      -\mathbb{E}_{q(z; \lambda)} [ \log p(x, z) - \log q(z; \lambda) ]$

  based on the reparameterization trick [@kingma2014auto].

  Computed by sampling from $q(z;\lambda)$ and evaluating the
  expectation using Monte Carlo sampling.
  """
  p_log_prob = [0.0] * inference.n_samples
  q_log_prob = [0.0] * inference.n_samples
  base_scope = tf.get_default_graph().unique_name("inference") + '/'
  
  for s in range(inference.n_samples):
    # Form dictionary in order to replace conditioning on prior or
    # observed variable with conditioning on a specific value.

    scope = base_scope + tf.get_default_graph().unique_name("sample")     #creates a scope named "inference" with a unique identifier appended to it
    dict_swap = {}
    for x, qx in six.iteritems(inference.data):
      if isinstance(x, RandomVariable):
        if isinstance(qx, RandomVariable):
          qx_copy = copy(qx, scope=scope)
          dict_swap[x] = qx_copy.value()
        else:
          dict_swap[x] = qx

    for z, qz in six.iteritems(inference.latent_vars):
      # Copy q(z) to obtain new set of posterior samples.
      qz_copy = copy(qz, scope=scope)
      dict_swap[z] = qz_copy.value()
      q_log_prob[s] += tf.reduce_sum(
          inference.scale.get(z, 1.0) * qz_copy.log_prob(dict_swap[z]))

    for z in six.iterkeys(inference.latent_vars):
      z_copy = copy(z, dict_swap, scope=scope)
      p_log_prob[s] += tf.reduce_sum(
          inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))

    for x in six.iterkeys(inference.data):
      if isinstance(x, RandomVariable):
        x_copy = copy(x, dict_swap, scope=scope)
        p_log_prob[s] += tf.reduce_sum(
            inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))

  p_log_prob = tf.reduce_mean(p_log_prob)
  q_log_prob = tf.reduce_mean(q_log_prob)
  reg_penalty = tf.reduce_sum(tf.losses.get_regularization_losses())

  if inference.logging:
    tf.summary.scalar("loss/p_log_prob", p_log_prob,
                      collections=[inference._summary_key])
    tf.summary.scalar("loss/q_log_prob", q_log_prob,
                      collections=[inference._summary_key])
    tf.summary.scalar("loss/reg_penalty", reg_penalty,
                      collections=[inference._summary_key])

  loss = -(p_log_prob - q_log_prob - reg_penalty)

  grads = tf.gradients(loss, var_list)
  grads_and_vars = list(zip(grads, var_list))
  return loss, grads_and_vars


def build_reparam_kl_loss_and_gradients(inference, var_list):
  """Build loss function. Its automatic differentiation
  is a stochastic gradient of

  .. math::

    -\\text{ELBO} =  - ( \mathbb{E}_{q(z; \lambda)} [ \log p(x \mid z) ]
          + \\text{KL}(q(z; \lambda) \| p(z)) )

  based on the reparameterization trick [@kingma2014auto].

  It assumes the KL is analytic.

  Computed by sampling from $q(z;\lambda)$ and evaluating the
  expectation using Monte Carlo sampling.
  """
  p_log_lik = [0.0] * inference.n_samples
  base_scope = tf.get_default_graph().unique_name("inference") + '/'
  for s in range(inference.n_samples):
    # Form dictionary in order to replace conditioning on prior or
    # observed variable with conditioning on a specific value.
    scope = base_scope + tf.get_default_graph().unique_name("sample")
    dict_swap = {}
    for x, qx in six.iteritems(inference.data):
      if isinstance(x, RandomVariable):
        if isinstance(qx, RandomVariable):
          qx_copy = copy(qx, scope=scope)
          dict_swap[x] = qx_copy.value()
        else:
          dict_swap[x] = qx

    for z, qz in six.iteritems(inference.latent_vars):
      # Copy q(z) to obtain new set of posterior samples.
      qz_copy = copy(qz, scope=scope)
      dict_swap[z] = qz_copy.value()

    for x in six.iterkeys(inference.data):
      if isinstance(x, RandomVariable):
        x_copy = copy(x, dict_swap, scope=scope)
        p_log_lik[s] += tf.reduce_sum(
            inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))

  p_log_lik = tf.reduce_mean(p_log_lik)

  kl_penalty = tf.reduce_sum([
      tf.reduce_sum(inference.kl_scaling.get(z, 1.0) * kl_divergence(qz, z))
      for z, qz in six.iteritems(inference.latent_vars)])

  reg_penalty = tf.reduce_sum(tf.losses.get_regularization_losses())

  if inference.logging:
    tf.summary.scalar("loss/p_log_lik", p_log_lik,
                      collections=[inference._summary_key])
    tf.summary.scalar("loss/kl_penalty", kl_penalty,
                      collections=[inference._summary_key])
    tf.summary.scalar("loss/reg_penalty", reg_penalty,
                      collections=[inference._summary_key])

  loss = -(p_log_lik - kl_penalty - reg_penalty)

  grads = tf.gradients(loss, var_list)
  grads_and_vars = list(zip(grads, var_list))
  return loss, grads_and_vars

def build_score_loss_and_gradients(inference, var_list):
  """Build loss function and gradients based on the score function
  estimator [@paisley2012variational].

  Computed by sampling from $q(z;\lambda)$ and evaluating the
  expectation using Monte Carlo sampling.
  """
  p_log_prob = [0.0] * inference.n_samples
  q_log_prob = [0.0] * inference.n_samples
  base_scope = tf.get_default_graph().unique_name("inference") + '/'
  for s in range(inference.n_samples):
    # Form dictionary in order to replace conditioning on prior or
    # observed variable with conditioning on a specific value.
    scope = base_scope + tf.get_default_graph().unique_name("sample")
    dict_swap = {}
    for x, qx in six.iteritems(inference.data):
      if isinstance(x, RandomVariable):
        if isinstance(qx, RandomVariable):
          qx_copy = copy(qx, scope=scope)
          dict_swap[x] = qx_copy.value()
        else:
          dict_swap[x] = qx

    for z, qz in six.iteritems(inference.latent_vars):
      # Copy q(z) to obtain new set of posterior samples.
      qz_copy = copy(qz, scope=scope)
      dict_swap[z] = qz_copy.value()
      q_log_prob[s] += tf.reduce_sum(
          inference.scale.get(z, 1.0) *
          qz_copy.log_prob(tf.stop_gradient(dict_swap[z])))

    for z in six.iterkeys(inference.latent_vars):
      z_copy = copy(z, dict_swap, scope=scope)
      p_log_prob[s] += tf.reduce_sum(
          inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))

    for x in six.iterkeys(inference.data):
      if isinstance(x, RandomVariable):
        x_copy = copy(x, dict_swap, scope=scope)
        p_log_prob[s] += tf.reduce_sum(
            inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))

  p_log_prob = tf.stack(p_log_prob)
  q_log_prob = tf.stack(q_log_prob)
  reg_penalty = tf.reduce_sum(tf.losses.get_regularization_losses())

  if inference.logging:
    tf.summary.scalar("loss/p_log_prob", tf.reduce_mean(p_log_prob),
                      collections=[inference._summary_key])
    tf.summary.scalar("loss/q_log_prob", tf.reduce_mean(q_log_prob),
                      collections=[inference._summary_key])
    tf.summary.scalar("loss/reg_penalty", reg_penalty,
                      collections=[inference._summary_key])

  losses = p_log_prob - q_log_prob
  loss = -(tf.reduce_mean(losses) - reg_penalty)

  q_rvs = list(six.itervalues(inference.latent_vars))
  q_vars = [v for v in var_list
            if len(get_descendants(tf.convert_to_tensor(v), q_rvs)) != 0]
  q_grads = tf.gradients(
      -(tf.reduce_mean(q_log_prob * tf.stop_gradient(losses)) - reg_penalty),
      q_vars)
  p_vars = [v for v in var_list if v not in q_vars]
  p_grads = tf.gradients(loss, p_vars)
  grads_and_vars = list(zip(q_grads, q_vars)) + list(zip(p_grads, p_vars))
  return loss, grads_and_vars