In [73]:
import pandas as pd
import numpy as np

In [112]:
import tensorflow as tf
from tensorflow_probability import edward2 as ed
import tensorflow_probability as tfp

In [113]:
tf.__version__

'2.0.0'

# Model

Some synthetic dataset

In [101]:
claims = dict()
claims['source_id'] = [0, 0, 1, 1]
claims['object_id'] = [0, 1, 1, 0]
claims['value'] = [0, 1, 0, 1]
claims = pd.DataFrame(data=claims)

In [161]:
def compute_problem_size(claims):
    problem_sizes = claims.nunique()
    n_sources = problem_sizes['source_id']
    n_objects = problem_sizes['object_id']
    domain_size = claims.groupby('object_id').max()['value'] + 1
    return n_sources, n_objects, domain_size.max()

def model(claims):
    """a generative model
    
    @TODO: how to perform subsample with this model. Is this possible?
    
    Ideas:
        1. because each object has different domain, it is not possible to batch all of their
        distribution, we relax this by expanding all objects' domain to be the biggest domain among 
        all objects. For example if we have three objects with the following domain sizes [3, 2, 5], then we
        just expand 
    
    Parameters
    ----------
    claims: pd.DataFrame
        a data frame that has columns [source_id, object_id, value]
    """
    n_sources, n_objects, domain_size = compute_problem_size(claims)
    
    # data sources: s ~ Bernoulli()
    honest_probs = tf.Variable(initial_value=tf.ones(n_sources) * 0.5, name='honest_probs')
    s = ed.Bernoulli(name=f's_honest', probs=honest_probs)
    
    # hidden truth
    object_probs = tf.Variable(initial_value=tf.ones((n_objects, domain_size))/domain_size)
    o = ed.Categorical(name=f'o_object', probs=object_probs)
    return o, o

In [171]:
with ed.tape() as trace:
    model(claims)
trace

OrderedDict([('s_honest',
              <ed.RandomVariable 's_honest' shape=(2,) dtype=int32 numpy=array([0, 0], dtype=int32)>),
             ('o_object',
              <ed.RandomVariable 'o_object' shape=(2,) dtype=int32 numpy=array([1, 1], dtype=int32)>)])

# $\log p(x,z)$

In [173]:
log_prob_fn = ed.make_log_joint_fn(model)

In [174]:
log_prob_fn(claims, **{'s_honest': [0, 0], 'o_object': [1,1]})

<tf.Tensor: id=1666, shape=(), dtype=float32, numpy=-2.7725887>

In [176]:
tf.math.log(0.5) + tf.math.log(0.5) + tf.math.log(0.5) + tf.math.log(0.5)

<tf.Tensor: id=1677, shape=(), dtype=float32, numpy=-2.7725887>