# PGF for LDA

In [1]:
import algopy
from algopy import UTPM

import numpy as np
import numpy.random as rn

from pyaudi import gdual_double as gdual
from pyaudi import exp, log

from scipy.stats import binom, multinomial, nbinom, poisson
from scipy.misc import factorial

### Define dummy data

In [2]:
# phi = np.array([[0.1, 0.8, 0.1], [0.5, 0.2, 0.3]]) # distribution over words
# theta = np.array([0.3, 0.7])                       # distribution over topics
# K, V = phi.shape      # K = number of topics, V = size of vocab
# N = 4                 # number of tokens in a document

N = 4   # number of tokens in a document
K = 2   # number of topics
V = 3   # number of unique word types

concen1 = 0.1 # concentration param. for topics.  when 0 < alpha < 1
              # topics are low entropy (i.e., peaked around a single val)
              # when alpha > 1, topics are high entropy
phi = rn.dirichlet(np.ones(V) * concen1, size=K)
assert phi.shape == (K, V)

concen2 = 1.  # concentration param. for document dist over topics
theta = rn.dirichlet(np.ones(K) * concen2)
assert theta.shape == (K,)

### True single-word marginals
Let $Y_v$ be the count of word type $w_v$ in a document, where $v$ is the word index. Compute the true $P(Y_v = y_v)$ for $y_v = 0, 1, .., N$.

In [3]:
p = theta.dot(phi).reshape(V, 1)
x = [range(N + 1) for _ in range(V)]
true_marginals = binom.pmf(x, N, p) # shape = (V, N + 1)

### PGF single-word marginals

In [4]:
def pgf_marginal(v, y_v, phi, theta): # Compute P(Y_v = y_v) = P(count(w_v) = y_v)
    D = y_v + 1
    u_v = UTPM(np.zeros((D, 1)))
    if D > 1:
        u_v.data[1, 0] = 1
        
    u = algopy.ones(V, dtype=u_v)
    u[v] = u_v
    t = phi.dot(u)
    s = theta.dot(t)
    h = np.power(s, N)
    return h.data[:, 0][y_v]

# Oberve 3 tokens of word type w_0
y_v, v = 3, 0
print 'PGF marginal:', pgf_marginal(v, y_v, phi, theta)
print 'True marginal:', true_marginals[0, 3]

PGF marginal: 0.000113297381357
True marginal: 0.000113297381357


Observe a document of length $N$ with word counts $y = [y_0, ..., y_V]$, where $\sum_{i=0}^V y_i = N$. Find the **single-word** marginal probabilities.

In [5]:
def pgf_marginals(y, phi, theta): # Compute [P(Y_0 = y[0]), ..., P(Y_V = y[V])]
    D = np.max(y) + 1
    u_v = UTPM(np.zeros((D, 1)))
    if D > 1:
        u_v.data[1, :] = 1
    
    u = algopy.ones((V, V), dtype=u_v)
    np.fill_diagonal(u, u_v)
    t = phi.dot(u)
    s = theta.dot(t)
    h = np.power(s, N)
    return [h_v.data[:, 0][y[i]] for i, h_v in enumerate(h)]

# Observe 2 tokens of w_0, 1 token of w_1, and 1 token of w_2
y = np.array([2, 1, 1])
print 'PGF marginals:', pgf_marginals(y, phi, theta)
print 'True marginals:', true_marginals[np.arange(V), y]

PGF marginals: [0.0053473798060094981, 0.39462619988631753, 0.02841427351342719]
True marginals: [ 0.00534738  0.3946262   0.02841427]


In [6]:
# Observe 1 w_0, 0 w_1, and 3 w_2's
y = np.array([1, 0, 3])
print 'PGF marginals:', pgf_marginals(y, phi, theta)
print 'True marginals:', true_marginals[np.arange(V), y]

PGF marginals: [0.11217076274951471, 0.45889452046541912, 0.41322547091094586]
True marginals: [ 0.11217076  0.45889452  0.41322547]


In [7]:
# Observe 0 w_0, 4 w_1's, and 0 w_2
y = np.array([0, 4, 0])
print 'PGF marginals:', pgf_marginals(y, phi, theta)
print 'True marginals:', true_marginals[np.arange(V), y]

PGF marginals: [0.88236765988183263, 0.00098031371918897445, 0.001862735974190883]
True marginals: [ 0.88236766  0.00098031  0.00186274]


### PGF joint marginals

In [8]:
def pgf_joint_marginal(v, w, y_v, y_w, phi, theta): # Compute P(Y_v = y_v, Y_w = y_w)
    # Init gdual objects
    order = y_v + y_w
    u_v = gdual(0, "v", order)
    u_w = gdual(0, "w", order)
    
    K, V = phi.shape
    u = [1] * V
    u[v] = u_v
    u[w] = u_w
    
    t = phi.dot(u)
    s = theta.dot(t)
    h = np.power(s, N)
    
    # Evaluate the derivative
    return h.get_derivative([y_v, y_w])/(factorial(y_v) * factorial(y_w))

def true_joint_marginal(v, w, y_v, y_w, N, p):
    K, V = phi.shape
    y = np.array([y_v, y_w, N-(y_v+y_w)])
    p = np.array([p[v], p[w], 1-(p[v]+p[w])])
    return multinomial.pmf(y, n=N, p=p)

y_v, v = 4, 0 # 1 count of word 0
y_w, w = 0, 2 # 2 counts of word 2
print 'PGF marginal:', pgf_joint_marginal(v, w, y_v, y_w, phi, theta)
print 'True marginal:', true_joint_marginal(v, w, y_v, y_w, N, p.reshape(-1))

PGF marginal: 9.00181286536e-07
True marginal: 9.00181286536e-07


## With growth
Add growth:
- $n = $ number of tokens in the document (observed)
- $m = \sum_{i=1}^n x_i$, where $x_i \sim log(\rho)$
- $\mathbf{y} \sim mult(m, \boldsymbol{\theta}^T \mathbf{\Phi})$

In [9]:
rho = 0.3 # growth parameter
def pgf_marginal_growth(v, y_v, phi, theta, rho): # Compute P(Y_v = y_v) = P(count(w_v) = y_v)
    D = y_v + 1
    u_v = UTPM(np.zeros((D, 1)))
    if D > 1:
        u_v.data[1, 0] = 1
        
    u = algopy.ones(V, dtype=u_v)
    u[v] = u_v
    t = phi.dot(u)
    s = theta.dot(t)
    r = np.log(1 - rho*s) / np.log(1 - rho)
    h = np.power(r, N)
    return h.data[:, 0][y_v]

# Oberve 3 tokens of word type w_0
y_v, v = 3, 0
print 'PGF marginal:', pgf_marginal_growth(v, y_v, phi, theta, rho)
#print 'True marginal:', true_marginals[0, 3]

PGF marginal: 0.000291974822456


In [10]:
def pgf_marginals_growth(y, phi, theta, rho): # Compute [P(Y_0 = y[0]), ..., P(Y_V = y[V])]
    D = np.max(y) + 1
    u_v = UTPM(np.zeros((D, 1)))
    if D > 1:
        u_v.data[1, :] = 1
    
    u = algopy.ones((V, V), dtype=u_v)
    np.fill_diagonal(u, u_v)
    t = phi.dot(u)
    s = theta.dot(t)
    r = np.log(1 - rho*s) / np.log(1 - rho)
    h = np.power(r, N)
    return [h_v.data[:, 0][y[i]] for i, h_v in enumerate(h)]

# Observe 3 tokens of w_0, 1 token of w_1, and 1 token of w_2
y = np.array([3, 1, 1])
print 'PGF marginals:', pgf_marginals_growth(y, phi, theta, rho)

PGF marginals: [0.00029197482245636443, 0.39729172458116163, 0.016694560632954176]


## NB LDA

In [11]:
a = 1    # shape parameter of theta_k
b = 0.5  # rate parameter of theta_k
phi_special = np.full((K, V), 1.0/V) # special case where distribution over word types is uniform

### Single-word marginals

In [12]:
def log_pgf(s, p):
    tmp = np.array([log(1 - p*s_k) for s_k in list(s)]) # b/c log(1-p*s) doesn't work
    return tmp / np.log(1 - p)

def pgf_marginal_nb(v, y_v, phi, a, b): # Compute P(Y_v = y_v)
    order = y_v
    K, V = phi.shape
    
    # Init gdual object
    u = [1] * V
    u_v = gdual(0, "v", order)
    u[v] = u_v
    
    # Compute the joint PGF
    t = phi.dot(u)
    s = log_pgf(t, 1.0 / (1+b))
    h = exp(a * (np.sum(s) - K) * np.log(1 + (1.0/b)))
    
    # Evaluate the derivative
    return h.get_derivative([y_v])/factorial(y_v)

def true_marginal_nb(v, y_v, phi, a, b): # only works for K = 2
    phi_v = phi[:, v].reshape((-1, 1))
    p1, p2 = nbinom.pmf([range(y_v + 1)] * 2, a, 1 - (phi_v/(b+phi_v)))
    return np.convolve(p1, p2)[y_v]

y_v, v = 5, 1

# Special case
print 'PGF marginal:', pgf_marginal_nb(v, y_v, phi_special, a, b)
print 'True marginal:', nbinom.pmf(y_v, K*a, 1 - (1.0 / (V*b + 1)))

# General case
print 'PGF marginal:', pgf_marginal_nb(v, y_v, phi, a, b)
print 'True marginal:', true_marginal_nb(v, y_v, phi, a, b)

PGF marginal: 0.0221184
True marginal: 0.0221184
PGF marginal: 0.0010046740107
True marginal: 0.0010046740107


### Joint marginals

In [13]:
def pgf_joint_marginal_nb(y, phi, a, b): # Compute P((Y_1, Y_2, Y_3) = y), assume V = 3
    order = np.sum(y)
    K, V = phi.shape
    
    # Init gdual objects
    u = [gdual(0, "v", order), gdual(0, "w", order), gdual(0, 'x', order)]
    
    # Compute the joint PGF
    t = phi.dot(u)
    s = log_pgf(t, 1.0 / (1+b))
    h = exp(a * (np.sum(s) - K) * np.log(1 + (1.0/b)))
    
    # Evaluate the derivative
    return h.get_derivative(y)/np.prod(factorial(y))

y = np.array([1, 2, 1]) # observe 1 word 0, 2 word 1's, 1 word 2
print 'PGF marginal:', pgf_joint_marginal_nb(y, phi_special, a, b)
print 'True marginal:', np.prod(nbinom.pmf(y, K*a, 1 - (1.0 / (V*b + 1))))

PGF marginal: 0.0162576842961
True marginal: 0.0143327232


### Deep NB LDA

For d = 1 to D:
- $l_k^{(0)} \sim Poisson(a \ln(1 + 1/b))$
- $y_k^{(d)}|l_k^{(d-1)} \sim SumLog(l_k^{(d-1)}, 1/(1 + b))$ (I think this is wrong for $d > 1$)
- $\{y_{kv}^{(d)}\}_v \sim Mult(y_k^{(d)}, \{\phi_{kv}^{(d)}\}_v)$
- $l_k^{(d)} = \sum_k y_{kv}^{(d)}$
- $y_v = l_k^{(D)}$

In [14]:
# D = 4
K0, K1, K2, K3, V = 4, 4, 5, 5, 3
phi1 = rn.dirichlet(np.ones(K1) * 0.8, size=K0) # (K0, K1)
phi2 = rn.dirichlet(np.ones(K2) * 0.5, size=K1) # (K1, K2)
phi3 = rn.dirichlet(np.ones(K3) * 0.2, size=K2) # (K2, K3)
phi4 = rn.dirichlet(np.ones(V) * 0.1, size=K3) #(K3, V)

phi_layer = [phi1, phi2, phi3, phi4]

In [15]:
def pgf_marginal_deep(v, y_v, phi, a, b):
    order = y_v
    D = len(phi)
    K0, _ = phi[0].shape
    _, V = phi[-1].shape
    
    # Init gdual objects
    s = [1] * V
    s[v] = gdual(0, "v", order)
    
    # Compute the joint PGF
    for d in range(D, 0, -1):
        t = phi[d-1].dot(s)
        s = log_pgf(t, 1.0/ (1+b))
    h = exp(a * (np.sum(s) - K0) * np.log(1 + (1.0/b)))
    
    # Evaluate the derivative
    return h.get_derivative([y_v])/factorial(y_v)

v, y_v = 1, 60
print 'PGF marginal:', pgf_marginal_deep(v, y_v, phi_layer, a, b)

PGF marginal: 0.00131163596697


In [16]:
def pgf_joint_marginal_deep(y, phi, a, b):
    order = np.sum(y)
    D = len(phi)
    K0, _ = phi[0].shape
    _, V = phi[-1].shape
    
    # Init gdual objects
    s = [gdual(0, "v", order), gdual(0, "w", order), gdual(0, 'x', order)]
    
    # Compute the joint PGF
    for d in range(D, 0, -1):
        t = phi[d-1].dot(s)
        s = log_pgf(t, 1.0/ (1+b))
    h = exp(a * (np.sum(s) - K0) * np.log(1 + (1.0/b)))
    
    # Evaluate the derivative
    return h.get_derivative(y)/np.prod(factorial(y))

y = np.array([5, 20, 10])
print 'PGF marginal:', pgf_joint_marginal_deep(y, phi_layer, a, b)

PGF marginal: 6.85912292561e-05
