### Latent Dirichlet Allocation


In [1]:
import numpy as np
from scipy.special import digamma

import tensorflow as tf
import edward as ed
from edward.models import Dirichlet, Multinomial, Categorical, Binomial, Empirical

In [165]:
# Task: infer topic mixture in a document. For each word in document a random topic is selected 
# according to topic probabilities and than a random word is selected according to word 
# probabilities of the selected topic. Assuming word probabilities is known for each topic. Only
# topic mixture is unknown.

N = 100      # number of words in the document
T = 3        # number of topics
W = 5        # number of distinct words

# Word probabilities in topics
beta = np.array([
    [0.9, 0.1, 0.0, 0.0, 0.0],      # topic 0
    [0.0, 0.1, 0.8, 0.1, 0.0],      # topic 1
    [0.0, 0.0, 0.0, 0.1, 0.9]       # topic 2
]).astype(np.float32)

# Probabilities of topics
theta = np.array([0.1, 0.2, 0.70])

# Generate document
def gen_document(n):
    topics = np.random.choice(range(T), p=theta, size=n)
    document = np.array([np.random.choice(range(W), p=beta[t, :], size=1) for t in topics]).flatten()
    return document, topics
    
# sample a document
document, topics = gen_document(N)

(array([0, 1, 2]), array([ 8, 24, 68]))
[2 2 0 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 1 2 1 2 2 0 2 2 1 2 1 2 0 2 1 2 0 2
 2 1 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 0 0 2 2 2 1 2 1 1 2 2 2 2 2 1 2 2 2 1 2
 2 1 1 1 2 1 1 2 2 0 2 2 2 1 2 2 1 2 2 0 2 2 1 1 2 2]
