Skip to content

Commit

Permalink
finish lda
Browse files Browse the repository at this point in the history
  • Loading branch information
sk1ll3r committed Jun 17, 2015
1 parent f9a34a0 commit 8f0a8c8
Show file tree
Hide file tree
Showing 6 changed files with 355 additions and 42 deletions.
Binary file modified bayes.pdf
Binary file not shown.
6 changes: 3 additions & 3 deletions models/lda.tex
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ \section{Latent Dirichlet allocation}
\begin{align}
\vec \alpha &\in (0, \infty)^T, \text{ usually} = \alpha\vec 1 \\
\vec \gamma &\in (0, \infty)^W, \text{ usually} = \gamma\vec 1 \\
\vec \pi_d \given \alpha &\sim \Dir(\vec \alpha), d = 1, \dotsc, D \\
\vec \pi_d \given \vec \alpha &\sim \Dir(\vec \alpha), d = 1, \dotsc, D \\
\vec \beta_t \given \vec \gamma &\sim \Dir(\vec \gamma), t = 1, \dotsc, T \\
z_{n, d} \given \{\vec\pi_{\delta}\} &\sim \Cat(\vec \pi_d), n = 1, \dotsc, N_d, d = 1, \dotsc, D \\
w_{n, d} \given z_{n, d}, \{\vec \beta_t\} &\sim \Cat\left(\vec \beta_{z_{n, d}}\right), n = 1, \dotsc, N_d, d = 1, \dotsc, D.
Expand All @@ -25,8 +25,8 @@ \section{Latent Dirichlet allocation}
The joint probability is
\begin{align}
\hspace{2em}&\hspace{-2em}
p\left(\{\vec \pi_d\}, \{z_{n, d}\}, \{w_{n, d}\}, \{\beta_t\} \given \alpha, \gamma\right) \\
&= \left(\prod_d p(\vec \pi_d \given \alpha)\right) \left(\prod_n \prod_d p(z_{n,d} \given \vec \pi_d)\right) \left(\prod_n \prod_d p(w_{n, d} \given z_{n, d}, \{\beta_t\})\right) \left(\prod_t p(\beta_t \given \gamma)\right) \\
p\left(\{\vec \pi_d\}, \{z_{n, d}\}, \{w_{n, d}\}, \{\beta_t\} \given \vec \alpha, \vec \gamma\right) \\
&= \left(\prod_d p(\vec \pi_d \given \vec \alpha)\right) \left(\prod_n \prod_d p(z_{n,d} \given \vec \pi_d)\right) \left(\prod_n \prod_d p(w_{n, d} \given z_{n, d}, \{\vec \beta_t\})\right) \left(\prod_t p(\vec \beta_t \given \vec \gamma)\right) \\
&= \left(\prod_d \Dir(\vec \pi_d \given \vec \alpha)\right) \left(\prod_n \prod_d \Cat(z_{n, d} \given \vec \pi_d) \Cat(w_{n, d} \given \vec \beta_{z_{n, d}})\right) \left(\prod_t \Dir(\vec \beta_t \given \vec \gamma)\right)
\end{align}

Expand Down
3 changes: 1 addition & 2 deletions models/lda/collapsed-gibbs.tex
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ \subsection{Collapsed Gibbs sampling for LDA}
\input{models/lda/collapsed-gibbs/marginalise}
\input{models/lda/collapsed-gibbs/beta-integral}
\input{models/lda/collapsed-gibbs/pi-integral}

\subsubsection{Gibbs updates}
\input{models/lda/collapsed-gibbs/gibbs-updates}
311 changes: 311 additions & 0 deletions models/lda/collapsed-gibbs/gibbs-updates.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
\subsubsection{Gibbs updates: word topics $z_{n, d}$}
We know that the posterior density $p\left(\{z_{n, d}\} \given \{w_{n, d}\}; \vec \alpha, \vec \gamma\right)$ is proportional to the joint $p\left(\{z_{n, d}\}, \{w_{n, d}\} \given \vec \alpha, \vec \gamma\right)$. Hence we can write
\begin{align}
p\left(\{z_{n, d}\} \given \{w_{n, d}\}; \vec \alpha, \vec \gamma\right)
&\propto
\left[
\left(
\frac
{
\Gamma(W \gamma)
}
{
\Gamma(\gamma)^W
}
\right)^T
\prod_t
\frac
{
\prod_w \Gamma\left(\gamma + \zeta_{w, t}\right)
}
{
\Gamma\left(C_t + W\gamma\right)
}
\right]
\left[
\left(
\frac
{
\Gamma(T \alpha)
}
{
\Gamma(\alpha)^T
}
\right)^D
\prod_d
\frac
{
\prod_t \Gamma\left(\alpha + \xi_{d, t}\right)
}
{
\Gamma\left(N_d + T \alpha\right)
}
\right].
\end{align}

Piggybacking on this result, we express $p\left(\{z_{\eta, \delta}\} \setminus z_{n, d} \given \{w_{n, d}\}; \vec \alpha, \vec \gamma\right)$. We pretend that $\{z_{\eta, \delta}\} \setminus z_{n, d}$ is $\{z_{n, d}\}$:
\begin{align}
&p\left(\{z_{\eta, \delta}\} \setminus z_{n, d} \given \{w_{n, d}\}; \vec \alpha, \vec \gamma\right) \nonumber\\
&\propto
\left[
\left(
\frac
{
\Gamma({W^-} \gamma)
}
{
\Gamma(\gamma)^{W^-}
}
\right)^{T^-}
\prod_t
\frac
{
\prod_w \Gamma\left(\gamma + \zeta_{w, t}^-\right)
}
{
\Gamma\left(C_t^- + {W^-}\gamma\right)
}
\right]
\left[
\left(
\frac
{
\Gamma({T^-} \alpha)
}
{
\Gamma(\alpha)^{T^-}
}
\right)^{D^-}
\prod_d
\frac
{
\prod_t \Gamma\left(\alpha + \xi_{d, t}^-\right)
}
{
\Gamma\left(N_d^- + {T^-} \alpha\right)
}
\right],
\end{align}
where
\begin{align}
W^- &= W \\
T^- &= T \\
D^- &= D \\
\zeta_{w, t}^- &= \sum_{\substack{\eta, \delta \\ z_{\eta, \delta} = t \\ (\eta, \delta) \neq (n, d)}} \I(w_{\eta, \delta} = w) \\
&=
\begin{cases}
\zeta_{w, t} - 1 & \text{ if $(w_{n, d}, z_{n, d}) = (w, t)$,} \\
\zeta_{w, t} & \text{ otherwise}.
\end{cases} \\
C_t^- &= \sum_{\substack{\eta, \delta \\ (\eta, \delta) \neq (n, d)}} \I(z_{\eta, \delta} = t) \\
&=
\begin{cases}
N - 1 & \text{ if $z_{n, d} = t$} \\
N & \text{ otherwise}.
\end{cases} \\
\xi_{d,t}^- &= \sum_{\substack{\eta \\ (\eta, \delta) \neq (n, d)}} \I(z_{\eta, \delta} = t) \\
&=
\begin{cases}
\xi_{\delta, t} - 1 & \text{ if $z_{n, d} = t$} \\
\xi_{\delta, t} & \text{ otherwise.}
\end{cases} \\
N_{\delta}^- &=
\begin{cases}
N_{\delta} - 1 & \text{ if $\delta = d$} \\
N_{\delta} & \text{ otherwise.}
\end{cases}
\end{align}

Hence, if $\omega := w_{n, d}$,
\begin{align}
&p\left(z_{n, d} = \tau \given \{z_{\eta, \delta}\} \setminus z_{n, d}, \{w_{\eta, \delta}\}; \vec \alpha, \vec \gamma\right) \\
&\propto \frac{p\left(\{z_{n, d}\} \given \{w_{\eta, \delta}\}; \vec \alpha, \vec \gamma\right)}{p\left(\{z_{\eta, \delta}\} \setminus z_{n, d} \given \{w_{\eta, \delta}\}; \vec \alpha, \vec \gamma\right)} \\
&=
\frac{
\left[
\left(
\frac
{
\Gamma(W \gamma)
}
{
\Gamma(\gamma)^W
}
\right)^T
\prod_t
\frac
{
\prod_w \Gamma\left(\gamma + \zeta_{w, t}\right)
}
{
\Gamma\left(C_t + W\gamma\right)
}
\right]
\left[
\left(
\frac
{
\Gamma(T \alpha)
}
{
\Gamma(\alpha)^T
}
\right)^D
\prod_d
\frac
{
\prod_t \Gamma\left(\alpha + \xi_{d, t}\right)
}
{
\Gamma\left(N_d + T \alpha\right)
}
\right]
}{
\left[
\left(
\frac
{
\Gamma({W^-} \gamma)
}
{
\Gamma(\gamma)^{W^-}
}
\right)^{T^-}
\prod_t
\frac
{
\prod_w \Gamma\left(\gamma + \zeta_{w, t}^-\right)
}
{
\Gamma\left(C_t^- + {W^-}\gamma\right)
}
\right]
\left[
\left(
\frac
{
\Gamma({T^-} \alpha)
}
{
\Gamma(\alpha)^{T^-}
}
\right)^{D^-}
\prod_d
\frac
{
\prod_t \Gamma\left(\alpha + \xi_{d, t}^-\right)
}
{
\Gamma\left(N_d^- + {T^-} \alpha\right)
}
\right]
} \\
&=
\frac{
\left[
\prod_t
\frac
{
\prod_w \Gamma\left(\gamma + \zeta_{w, t}\right)
}
{
\Gamma\left(C_t + W\gamma\right)
}
\right]
\left[
\prod_d
\frac
{
\prod_t \Gamma\left(\alpha + \xi_{d, t}\right)
}
{
\Gamma\left(N_d + T \alpha\right)
}
\right]
}{
\left[
\prod_t
\frac
{
\prod_w \Gamma\left(\gamma + \zeta_{w, t}^-\right)
}
{
\Gamma\left(C_t^- + {W^-}\gamma\right)
}
\right]
\left[
\prod_d
\frac
{
\prod_t \Gamma\left(\alpha + \xi_{d, t}^-\right)
}
{
\Gamma\left(N_d^- + {T^-} \alpha\right)
}
\right]
} \\
&=
\frac{
\left[
\prod_t \prod w \frac{
\Gamma(\gamma + \zeta_{w, t})
}{
\Gamma(\gamma + \zeta_{w, t}^-)
}
\right]
\left[
\prod_d \prod_t \frac{
\Gamma(\alpha + \xi_{d, t})
}{
\Gamma(\alpha + \xi_{d, t}^-)
}
\right]
}{
\left[
\prod_t \frac{
\Gamma(C_t + W \gamma)
}{
\Gamma(C_t^- + W \gamma)
}
\right]
\left[
\prod_d \frac{
\Gamma(N_d + T \alpha)
}{
\Gamma(N_d^- + T \alpha)
}
\right]
} \\
&=
\frac{
\frac{
\Gamma(\gamma + \zeta_{\tau, \omega})
}{
\Gamma(\gamma + \zeta_{\tau, \omega} - 1)
}
\cdot
\frac{
\Gamma(\alpha + \xi_{d, \tau})
}{
\Gamma(\alpha + \xi_{d, \tau} - 1)
}
}{
\frac{
\Gamma(C_{\tau} + W \gamma)
}{
\Gamma(C_{\tau} - 1 + W \gamma)
}
\cdot
\frac{
\Gamma(N_d + T \alpha)
}{
\Gamma(N_d - 1 + T \alpha)
}
} \\
&=
\frac{
(\gamma + \zeta_{\tau, \omega} - 1)(\alpha + \xi_{d, \tau} - 1)
}{
(W \gamma + C_{\tau} - 1)(N_d - 1 + T \alpha)
}.
\end{align}
Loading

0 comments on commit 8f0a8c8

Please sign in to comment.