In [None]:
%matplotlib inline
# from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import seaborn
seaborn.set_style('darkgrid')

In [None]:
import numpy as np
import emcee
import scipy.stats as stats

Unknown constant intrinsic quantity $\alpha$,
measured via multiple noisy observations $\{x_i\}$;

$$
P(x_i~|~\alpha,  \sigma) = \frac{1}{\sqrt{2\pi\sigma^2}} \exp\left[\frac{-[x_i-\alpha]^2}{2\sigma^2}\right]
$$


Multiplying these for all $i$ gives the likelihood:
$$
P(\{x_i\}~|~\alpha, \sigma) = (2\pi\sigma^2)^{-N/2} \exp
    \left[- \frac{1}{2\sigma^2} \sum_{i-1}^N [x_i - \alpha]^2
    \right]
$$
so the likelihood can be rearranged as:
$$
ln\left[P(\{x_i\}~|~\alpha, \sigma)\right] =   -0.5*N*ln[2\pi\sigma^2] + \left[- 0.5 \sum_{i=1}^N [x_i - \alpha]^2 / \sigma^2 \right]
$$
or indeed 
$$
\qquad =  - 0.5 \sum_{i=1}^N \left[ ln[2\pi\sigma^2] + [x_i - \alpha]^2 / \sigma^2  \right]
$$

In [None]:
alpha = np.random.random()
# alpha=0.95
alpha

In [None]:
sigma = 0.2

def log_likelihood(alpha, x,sigma):
    return -0.5 * np.sum(np.log(2 * np.pi * sigma ** 2) + (x - alpha) ** 2 / sigma ** 2)
#     return -0.5 * np.sum( ((x - alpha)/sigma)**2 )

In [None]:
def log_prior(alpha):
    if 0. <= alpha < 1.:
        return 0.0
    return -np.inf

def log_prob(alpha, x, sigma):
    lp = log_prior(alpha)
    if not np.isfinite(lp):
        prob = -np.inf
    else:
        prob = lp+log_likelihood(alpha,x,sigma)
    return prob#, alpha*2

In [None]:
n_samples = 7
data = stats.norm.rvs(loc=alpha, scale=sigma, size=n_samples)
# data = np.array([ 0.80008131,  0.942178  ])
data

In [None]:
import scipy.optimize as op
neg_likelihood = lambda *args: -log_likelihood(*args)
alpha_guess=0.
result = op.minimize(neg_likelihood, alpha_guess, args=(data,sigma))
result.x
# result['x']

In [None]:
from statsmodels.nonparametric.kde import KDEUnivariate
if len(data)>5:
    dens = KDEUnivariate(data)
    dens.fit()
    plt.plot(dens.support,dens.density)
#     plt.plot(dens.support,dens.cdf)
# plt.plot(data,np.full_like(data, 0.1),'|', color='k', mew=2)
seaborn.rugplot(data, 
                color='k'
                )
plt.axvline(alpha, c='r', label='Truth')

In [None]:
# plt.hist(data, normed=True)
plt.axvline(alpha, c='r', label='Truth')
plt.axvline(result.x, ls='--', label='MAP', c='y')
pdf_support = np.linspace(alpha-3*sigma,alpha+3*sigma,100)
plt.plot(pdf_support,stats.norm(loc=alpha,scale=sigma).pdf(pdf_support))
if len(data)>5:
    plt.plot(dens.support,dens.density)
plt.legend()

In [None]:
ndim = 1  # number of parameters in the model
nwalkers = 50  # number of MCMC walkers
nburn = 100  # "burn-in" period to let chains stabilize
nsteps = 500  # number of MCMC steps to take

pos = [result.x + 1e-5*np.random.randn(ndim) for i in range(nwalkers)]
# pos = [0.5 + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]
# set theta near the maximum likelihood, with 
# sampler.reset()
# sampler = emcee.EnsembleSampler(nwalkers, ndim, log_likelihood, args=(data,))
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, args=(data,sigma))
sampler.reset()
_=sampler.run_mcmc(pos, nsteps)
for walker in sampler.chain:
    plt.plot(walker)

In [None]:
acorr = np.ceil(sampler.get_autocorr_time())
acorr

In [None]:
sampler.acceptance_fraction

In [None]:
thinned_samples = sampler.chain[:,200::acorr,:]
for walker in thinned_samples:
    plt.plot(walker)
print len(thinned_samples[0])

In [None]:
# samples = sampler.chain[:,150::,:].ravel()
samples = thinned_samples.ravel()
# plt.scatter(data, 2*np.ones_like(data))
plt.hist(samples, normed=True, alpha=0.5)
plt.axvline(alpha, c='r', ls=':',label='Truth')
plt.axvline(result.x, ls='--', lw=3, label='MAP')
# plt.ylim(0,5)
plt.axvline(np.mean(samples), c='k', label='MC mean', ls='--')

plt.plot(np.sort(samples), np.indices(samples.shape).ravel()/float(len(samples)), label='Cumulative Freq')
# plt.plot(np.sort(samples), np.indices(samples.shape).ravel()/float(len(samples)))
plt.axhline(0.16, ls=':')
plt.axhline(0.5, ls=':')
plt.axhline(0.84, ls=':')
pdf_support = np.linspace(alpha-3,alpha+3,100)
plt.plot(pdf_support,stats.norm(loc=alpha, scale=sigma).pdf(pdf_support), label="True dist", ls='-.')
plt.legend()


# plt.ylim(0,1)
plt.xlim(alpha-3*sigma,alpha+3*sigma)
print alpha, np.mean(samples), result.x
print (alpha - np.mean(samples))/sigma