Merge remote branch 'origin/master'

jsalvatier · May 11, 2012 · 623dbaa · 623dbaa
2 parents 75fda8d + 517d3b2
commit 623dbaa
Show file tree

Hide file tree

Showing 3 changed files with 94 additions and 1 deletion.
diff --git a/examples/tutorial.py b/examples/tutorial.py
@@ -0,0 +1,88 @@
+from mcex import *
+import numpy as np 
+import theano 
+
+data = np.random.normal(size = (2, 20))
+"""
+the first thing you interact with is the model class
+The model class is a dead simple class (see core.py), it contains a list of random variables (model.vars)
+for parameters and a list of factors that go into the posterior (model.factors), and no methods.
+"""
+model = Model()
+
+"""
+To add a parameter to the model we use a function, in this case AddVar.
+We must pass AddVar the model we are adding a parameter to, the name of the parameter, 
+the prior distribution for the parameter and optionally the shape and dtype of the parameter.
+The Addvar returns a Theano variable which represents the parameter we have added to the model. 
+
+The AddVar function is also very simple (see core.py), it creates a Theano variable, adds it to the model
+list and adds the likelihood factor to the model list.
+
+The distribution functions (see distributions.py), such as the one we see here, Normal(mu, tau), take some parameters and return
+a function that takes a value for the parameter (here, x) and returns the log likelihood. The parameters to
+distributions may be either model parameters (see AddData section) or constants.
+"""
+x = AddVar(model, 'x', Normal(mu = .5, tau = 2.**-2), (2,1))
+z = AddVar(model, 'z', Beta(alpha = 10, beta =5.5))
+
+
+"""
+In order to add observed data to the model, we use the AddData function. It works much the same
+as the AddVar function. It takes the model we're adding the data to, the data, and the distribution for that data. 
+"""
+AddData(model, data, Normal(mu = x, tau = .75**-2))
+
+
+"""
+We need to specify a point in parameter space as our starting point for fitting.
+To specify a point in the parameter space, we use a 
+dictionary of parameter names-> parameter values
+"""
+chain = {'x' : np.array([[0.2],[.3]]),
+         'z' : np.array([.5])}
+
+"""
+The maximum a posteriori point (MAP) may be a better starting point than the one we have chosen. 
+The find_MAP function uses an optimization algorithm on the log posterior to find and return the local maximum.
+"""
+chain = find_MAP(model, chain)
+
+"""
+The step method we will use (Hamiltonian Monte Carlo) requires a covariance matrix. It is helpful if it 
+approximates the true covariance matrix. For distributions which are somewhat normal-like the inverse hessian 
+at the MAP (or points close to it) will approximate this covariance matrix. 
+
+the approx_cov() function works similarly to the find_MAP function and returns the inverse hessian at a given point.
+
+"""
+hmc_cov = approx_cov(model, chain)
+
+
+"""
+To sample from the posterior, we must choose a sampling method. 
+"""
+step_method = hmc_step(model, model.vars, hmc_cov)
+
+ndraw = 3e3
+"""
+In order to store the samples we get, we need a storage object. NpHistory stores the samples in history arrays.
+It needs a list of variables to care about and a maximum number of samples to store.
+"""
+history = NpHistory(model.vars, ndraw)
+
+"""
+To use the step functions, we use the sample function. It takes a number of steps to do, 
+a step method, a starting point and a storage object for the resulting samples. It returns the final state of 
+the step method and the total time in seconds that the sampling took. The samples are now in history.
+"""
+state, t = sample(ndraw, step_method, chain, history)
+print "took :", t, " seconds"
+
+"""
+To use more than one sampler, look at the compound_step step method which takes a list of step methods. 
+
+The history object can be indexed by names of variables returning an array with the first index being the sample
+and the other indexes the shape of the parameter. Thus the shape of history['x'].shape == (ndraw, 2,1)
+""" 
+print np.mean(history['x'], axis = 0)
diff --git a/mcex/core.py b/mcex/core.py
@@ -31,9 +31,11 @@ def __init__(self):
 def AddData(model, data, distribution):
     model.factors.append(distribution(data))
 
-def AddVar(model, name, distribution, shape = 1, dtype = 'float64'):
+def AddVar(model, name, distribution, shape = 1, dtype = 'float64', test = None):
     var = FreeVariable(name, shape, dtype)
     model.vars.append(var)
+    if test is not None: 
+        var.tag.test_value = test 
     model.factors.append(distribution(var))
     return var
 

diff --git a/readme.rst b/readme.rst
@@ -8,6 +8,8 @@ The hope is that some of the lessons learned in this experimental package lead t
 in PyMC. People are encouraged to fork MCEx to try out their own designs and improvements 
 as well as make criticisms.
 
+For a tutorial on basic inference, see tutorial.py in the examples folder.
+
 Some design decisions
 
 +----------------------------------+---------------------------------------+---------------------------------------------------+
@@ -28,4 +30,5 @@ Some design decisions
 | Functional style design          | - Allows easier exchange of           | - Design less similar to Object Oriented design   |      
 |                                  |   components                          |                                                   |
 +----------------------------------+---------------------------------------+---------------------------------------------------+ 
+