In [None]:
import daft
import matplotlib.pyplot as plt
%matplotlib notebook

In [None]:
# declare the shape and size, and various styling
pgm = daft.PGM(shape=[4.5, 3.5], origin=[0, 0], grid_unit=4,
               label_params={'fontsize':18}, observed_style='shaded')



# observed data
pgm.add_node(daft.Node("y", r"$y_i$", 1, 1.4, scale=2, observed=True))
pgm.add_node(daft.Node("x", r"$\mathbf{x}_{i}$", 0.25, 1.4, scale=2, fixed=True, offset=(0,-30)))


pgm.add_plate(daft.Plate([0.0, 1, 1.5, 0.9], label=r"$i=1,2,\ldots,N$", shift=-0.1))


# parameters
pgm.add_node(daft.Node("w", r"$w$", 0.5, 2.4, scale=2))
pgm.add_node(daft.Node("w_mu", r"$\mu_w$", 0.5 - 0.2, 3, scale=2, fixed=True, offset=(0,10)))
pgm.add_node(daft.Node("w_sd", r"$\sigma_w^{2}$", 0.5 + 0.2, 3, scale=2, fixed=True, offset=(0,6)))

pgm.add_node(daft.Node("b", r"$b$", 1.5, 2.4, scale=2))
pgm.add_node(daft.Node("b_mu", r"$\mu_b$", 1.5 - 0.2, 3, scale=2, fixed=True, offset=(0,10)))
pgm.add_node(daft.Node("b_sd", r"$\sigma_b^{2}$", 1.5 + 0.2, 3, scale=2, fixed=True, offset=(0,6)))

# error
pgm.add_node(daft.Node("epsilon", r"$\sigma_\epsilon^2$", 2, 1.4, scale=2, fixed=True, offset=(0,10)))

# Draw arrows
pgm.add_edge("b_mu", "b")
pgm.add_edge("b_sd", "b")
pgm.add_edge("b", "y")
pgm.add_edge("w_mu", "w")
pgm.add_edge("w_sd", "w")
pgm.add_edge("w", "y")
pgm.add_edge("x", "y")
pgm.add_edge("epsilon", "y")

x0 = 3
pgm.add_node("latent", "", x0 - 0.5, 1, 1)
pgm.add_text(x0 - 0.225, 1 - 0.08, "= latent variable")
pgm.add_node("observed", "", x0 - 0.5, 1.3, 1, observed=True)
pgm.add_text(x0 - 0.225, 1.3 - 0.08, "= observed variable")
pgm.add_node("hyperparameter", "", x0 - 0.5, 1.6, 1, fixed=True)
pgm.add_text(x0 - 0.225, 1.6 - 0.08, "= hyperparameter")

pgm.render()

## Link dump 

https://www.inference.vc/maximum-likelihood-for-representation-learning-2/

https://www.reddit.com/r/MachineLearning/comments/9g1rxs/d_how_is_the_log_marginal_likelihood_of/

https://colinraffel.com/blog/gans-and-divergence-minimization.html

https://www.inference.vc/maximum-likelihood-for-representation-learning-2/

https://medium.com/@jonathan_hui/gan-why-it-is-so-hard-to-train-generative-advisory-networks-819a86b3750b


https://www.tuananhle.co.uk/notes/reverse-forward-kl.html

https://blog.evjang.com/2016/08/variational-bayes.html

https://dibyaghosh.com/blog/probability/kldivergence.html

https://wiseodd.github.io/techblog/2016/12/21/forward-reverse-kl/

http://willwolf.io/2018/03/31/gaussian-algebra-to-gaussian-processes-part-1/

http://andymiller.github.io/2016/11/23/vb.html

http://mlg.eng.cam.ac.uk/zoubin/talks/uai05tutorial-b.pdf