# Physics 21, Spring 2021
# Shubh Agrawal, Class of 2022
# Assignment 4b

In [8]:
import numpy as np
from matplotlib import pyplot as pl
import pymc3 as pm

# Part 1

We redo the coin tossing problem with $H_{true} = 3$

In [33]:
# 0 = heads, 1 = tails
def tossNCoins(H, n):
    return np.random.choice([1, 0], size = n, p = [H, 1-H])

H_true = 0.3
N = 512

data = tossNCoins(H_true, N)

In [59]:
def vary_number_of_chains_uniform_prior(num_chains = [1, 2, 4, 8]):
    iters = 1000
    pl.figure()
    for i, num_chain in enumerate(num_chains):
        with pm.Model() as model:
            # define uniform prior
            theta = pm.Uniform('theta', lower=0, upper=1)

            # define the likelihood
            likelihood = pm.Bernoulli('likelihood', theta, observed=data)

            # get the samples
            trace = pm.sample(iters, chains=num_chain)
            
            pl.hist(trace['theta'], bins = 100, density=True, histtype="step", label=num_chain)
            
    pl.title(r"uniform prior, varying number of chains with $n_{iters}=1000$")
    pl.xlabel(r"$\theta$")
    pl.ylabel("posterior density")
    pl.legend()
            

In [63]:
def vary_number_of_chains_gaussian_prior(num_chains = [1, 2, 4, 8]):
    iters = 1000
    pl.figure()
    for i, num_chain in enumerate(num_chains):
        with pm.Model() as model:
            # define uniform prior
            theta = pm.Normal('theta', mu=0.5, sigma=1)

            # define the likelihood
            likelihood = pm.Bernoulli('likelihood', theta, observed=data)

            # get the samples
            trace = pm.sample(iters, chains=num_chain)
            
            pl.hist(trace['theta'], bins = 100, density=True, histtype="step", label=num_chain)
            
    pl.title(r"gaussian prior, varying number of chains with $n_{iters}=1000$")
    pl.xlabel(r"$\theta$")
    pl.ylabel("posterior density")
    pl.legend()
            

In [60]:
%matplotlib notebook
vary_number_of_chains_uniform_prior()

<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [theta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 1 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 8 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [theta]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 20 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (8 chains in 2 jobs)
NUTS: [theta]


Sampling 8 chains for 1_000 tune and 1_000 draw iterations (8_000 + 8_000 draws total) took 33 seconds.


In [64]:
%matplotlib notebook
vary_number_of_chains_gaussian_prior()

<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [theta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 1 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 8 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [theta]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 14 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (8 chains in 2 jobs)
NUTS: [theta]


Sampling 8 chains for 1_000 tune and 1_000 draw iterations (8_000 + 8_000 draws total) took 35 seconds.


With either case increases the effective number of samples gives us a smoother curve."

In [68]:
def vary_number_of_iters_uniform_prior(num_iters = [1000, 2000, 4000, 8000]):
    num_chain = 2
    pl.figure()
    for i, iters in enumerate(num_iters):
        with pm.Model() as model:
            # define uniform prior
            theta = pm.Uniform('theta', lower=0, upper=1)

            # define the likelihood
            likelihood = pm.Bernoulli('likelihood', theta, observed=data)

            # get the samples
            trace = pm.sample(iters, chains=num_chain)
            
            pl.hist(trace['theta'], bins = 100, density=True, histtype="step", label=iters)
            
    pl.title(r"uniform prior, varying number of iterations with $n_{chains}=2$")
    pl.xlabel(r"$\theta$")
    pl.ylabel("posterior density")
    pl.legend()
            

In [69]:
def vary_number_of_iters_gaussian_prior(num_iters = [1000, 2000, 4000, 8000]):
    num_chain = 2
    pl.figure()
    for i, iters in enumerate(num_iters):
        with pm.Model() as model:
            # define uniform prior
            theta = pm.Normal('theta', mu=0.5, sigma=1)

            # define the likelihood
            likelihood = pm.Bernoulli('likelihood', theta, observed=data)

            # get the samples
            trace = pm.sample(iters, chains=num_chain)
            
            pl.hist(trace['theta'], bins = 100, density=True, histtype="step", label=iters)
            
    pl.title(r"gaussian prior, varying number of iterations with $n_{chains}=2$")
    pl.xlabel(r"$\theta$")
    pl.ylabel("posterior density")
    pl.legend()
            

In [70]:
%matplotlib notebook
vary_number_of_iters_uniform_prior()

<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 8 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 10 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 4_000 draw iterations (2_000 + 8_000 draws total) took 10 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 8_000 draw iterations (2_000 + 16_000 draws total) took 14 seconds.


In [71]:
%matplotlib notebook
vary_number_of_iters_gaussian_prior()

<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 8 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 11 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 4_000 draw iterations (2_000 + 8_000 draws total) took 12 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 8_000 draw iterations (2_000 + 16_000 draws total) took 20 seconds.


With either case increases the effective number of samples gives us a smoother curve.

In [79]:
def vary_mu(mus = [0.2, 0.4, 0.6, 0.8]):
    sigma = 0.1
    num_chain = 2
    iters = 5000
    pl.figure()
    for i, mu in enumerate(mus):
        with pm.Model() as model:
            # define uniform prior
            theta = pm.Normal('theta', mu=mu, sigma=sigma)

            # define the likelihood
            likelihood = pm.Bernoulli('likelihood', theta, observed=data)

            # get the samples
            trace = pm.sample(iters, chains=num_chain)
            
            pl.hist(trace['theta'], bins = 100, density=True, histtype="step", label=mu)
            
    pl.title(r"gaussian prior, $\sigma = 0.1$ varying $\mu$")
    pl.xlabel(r"$\theta$")
    pl.ylabel("posterior density")
    pl.legend()
            

In [80]:
def vary_sigma(sigmas = [0.05, 0.1, 0.2, 0.5]):
    mu = 0.2
    num_chain = 2
    iters = 5000
    pl.figure()
    for i, sigma in enumerate(sigmas):
        with pm.Model() as model:
            # define uniform prior
            theta = pm.Normal('theta', mu=mu, sigma=sigma)

            # define the likelihood
            likelihood = pm.Bernoulli('likelihood', theta, observed=data)

            # get the samples
            trace = pm.sample(iters, chains=num_chain)
            
            pl.hist(trace['theta'], bins = 100, density=True, histtype="step", label=sigma)
            
    pl.title(r"gaussian prior, $\mu = 0.5$ varying $\sigma$")
    pl.xlabel(r"$\theta$")
    pl.ylabel("posterior density")
    pl.legend()

In [170]:
%matplotlib notebook
vary_mu()

<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 5_000 draw iterations (2_000 + 10_000 draws total) took 16 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 5_000 draw iterations (2_000 + 10_000 draws total) took 13 seconds.
The acceptance probability does not match the target. It is 0.8799827275249745, but should be close to 0.8. Try to increase the number of tuning steps.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 5_000 draw iterations (2_000 + 10_000 draws total) took 14 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 5_000 draw iterations (2_000 + 10_000 draws total) took 18 seconds.


The posteriors are sort-of biased towards the prior means, but they work pretty well still!

In [82]:
%matplotlib notebook
vary_sigma()

<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 5_000 draw iterations (2_000 + 10_000 draws total) took 18 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 5_000 draw iterations (2_000 + 10_000 draws total) took 13 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 5_000 draw iterations (2_000 + 10_000 draws total) took 11 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [theta]


Sampling 2 chains for 1_000 tune and 5_000 draw iterations (2_000 + 10_000 draws total) took 11 seconds.


The posteriors are sort-of biased towards the prior mean for narrower priors, but they work pretty well still!

# Part 2

As before, the random variable we deal with is $\theta_k$, which is distributed uniformly. We can write: $$\tan(\theta_k) = \frac{x_k - \alpha}{\beta}$$
$$x_k = \alpha + \beta\tan(\theta_k)$$

Now, as due to normalization $\int p(\theta)d\theta = 1 = \int p(x) dx$. Doing a substitution, $p(\theta) = p(x) \frac{dx}{d\theta}$, or:
$$p(x) = p(\theta) \frac{d\theta}{dx} \propto \frac{1}{\beta \sec^2(\theta_k)} = \frac{1}{\beta + \beta \tan^2(\theta_k)} = \frac{1}{\beta + (x-\alpha)^2/ \beta} = \frac{\beta}{\beta^2 + (x-\alpha)^2}$$
We will use this $p(x)$ to get our likelihood function.

In [83]:
def simulateX_k(alpha_true, beta_true, n):
    theta = np.random.random(n) * np.pi - (np.pi / 2)
    return alpha_true + beta_true * np.tan(theta)

In [84]:
alpha_true = 1
beta_true = 1 
n = 512

trials = simulateX_k(alpha_true, beta_true, n)

In [115]:
def vary_number_of_chains_lighthouse(num_chains = [1, 2, 4, 8]):
    iters = 1000
    for i, num_chain in enumerate(num_chains):
        pl.figure(i)
        with pm.Model() as model:
            # define uniform prior
            alpha = pm.Uniform('alpha', lower=-5, upper=5)
            beta = pm.Uniform('beta', lower=0, upper=5)

            # define the likelihood
            likelihood = pm.Cauchy('likelihood', alpha=alpha, beta=beta, observed=trials)

            # get the samples
            trace = pm.sample(iters, chains=num_chain)
            
            heatmap, xedges, yedges = np.histogram2d(trace["alpha"], trace["beta"], bins=50)
            extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
            pl.clf()
            pl.imshow(heatmap.T, extent=extent, origin='lower')
            pl.title(r'$\alpha-\beta$ posterior heatmap for {} chain(s)'.format(num_chain))
            pl.ylabel(r'$\alpha$')
            pl.xlabel(r'$\beta$')
            pl.show()
    

In [116]:
%matplotlib notebook
vary_number_of_chains_lighthouse()

<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta, alpha]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 2 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks


<IPython.core.display.Javascript object>

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 13 seconds.


<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 20 seconds.


<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (8 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 8 chains for 1_000 tune and 1_000 draw iterations (8_000 + 8_000 draws total) took 40 seconds.


In [131]:
def vary_number_of_iterations_lighthouse(num_iters = [1000, 2000, 4000, 8000]):
    num_chain = 2
    for i, iters in enumerate(num_iters):
        pl.figure(i)
        with pm.Model() as model:
            # define uniform prior
            alpha = pm.Uniform('alpha', lower=-5, upper=5)
            beta = pm.Uniform('beta', lower=0, upper=5)

            # define the likelihood
            likelihood = pm.Cauchy('likelihood', alpha=alpha, beta=beta, observed=trials)

            # get the samples
            trace = pm.sample(iters, chains=num_chain)
            
            heatmap, xedges, yedges = np.histogram2d(trace["alpha"], trace["beta"], bins=50)
            extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
            pl.clf()
            pl.imshow(heatmap.T, extent=extent, origin='lower')
            pl.title(r'$\alpha-\beta$ posterior heatmap for {} iterations'.format(iters))
            pl.ylabel(r'$\alpha$')
            pl.xlabel(r'$\beta$')
            pl.show()

In [132]:
%matplotlib notebook
vary_number_of_iterations_lighthouse()

<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 10 seconds.


<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 11 seconds.


<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 4_000 draw iterations (2_000 + 8_000 draws total) took 15 seconds.


<IPython.core.display.Javascript object>

  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 8_000 draw iterations (2_000 + 16_000 draws total) took 24 seconds.


Again, as before, the heat map peak becomes more and more significant when we incease the effective number of samples

In [158]:
def get_most_probable_xy(num_chains = [1, 2, 4], num_iters = [100, 500, 1000, 2000]):
    Xs = {}
    Ys = {}
    for num_chain in num_chains:
        max_xs = []
        max_ys = []
        for i, iters in enumerate(num_iters):
            with pm.Model() as model:
                # define uniform prior
                alpha = pm.Uniform('alpha', lower=-5, upper=5)
                beta = pm.Uniform('beta', lower=0, upper=5)

                # define the likelihood
                likelihood = pm.Cauchy('likelihood', alpha=alpha, beta=beta, observed=trials)

                # get the samples
                trace = pm.sample(iters, chains=num_chain)
                
                cx, bx = np.histogram(trace["alpha"], bins = 100)
                cy, by = np.histogram(trace["beta"], bins = 100)
                
                max_xs += [bx[np.argmax(cx)]]
                max_ys += [by[np.argmax(cy)]]
                
        Xs[num_chain] = max_xs
        Ys[num_chain] = max_ys
    return Xs, Ys

In [159]:
Xs, Ys = get_most_probable_xy()

  trace = pm.sample(iters, chains=num_chain)
Only 100 samples in chain.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta, alpha]


Sampling 1 chain for 1_000 tune and 100 draw iterations (1_000 + 100 draws total) took 1 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta, alpha]


Sampling 1 chain for 1_000 tune and 500 draw iterations (1_000 + 500 draws total) took 1 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta, alpha]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 2 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta, alpha]


Sampling 1 chain for 1_000 tune and 2_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks
Only 100 samples in chain.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 100 draw iterations (2_000 + 200 draws total) took 8 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 500 draw iterations (2_000 + 1_000 draws total) took 9 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 10 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 12 seconds.
  trace = pm.sample(iters, chains=num_chain)
Only 100 samples in chain.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 100 draw iterations (4_000 + 400 draws total) took 17 seconds.
The acceptance probability does not match the target. It is 0.8819922637384807, but should be close to 0.8. Try to increase the number of tuning steps.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 500 draw iterations (4_000 + 2_000 draws total) took 17 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 19 seconds.
  trace = pm.sample(iters, chains=num_chain)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 23 seconds.


In [173]:
num_iters = [100, 500, 1000, 2000]
for i, num_chain in enumerate(Xs.keys()):    
    max_xs = Xs[num_chain]
    max_ys = Ys[num_chain]
    pl.figure(1)
    pl.plot(num_iters, max_xs, ".-", label = f"{num_chain} chains")
    pl.figure(2)
    pl.plot(num_iters, max_ys, ".-", label = f"{num_chain} chains")

pl.figure(1)            
pl.title(r'most probable $\alpha$ in posterior')
pl.ylabel(r'$\alpha$')
pl.xlabel(r'number of iterations')
pl.axhline(y = alpha_true, color = "black")
pl.legend()
pl.show()
pl.figure(2)
pl.title(r'most probable $\beta$ in posterior')
pl.ylabel(r'$\beta$')
pl.xlabel(r'number of iterations')
pl.axhline(y = beta_true, color = "black")
pl.legend()
pl.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Increasing iteration number seems to have a much larger effect than increasing the number of chains.

# Adding an interloper

In [192]:
alpha_true = 3
beta_true = 3
n = 512

inter_alpha = 1
inter_beta = 1

trials = simulateX_k(alpha_true, beta_true, n)

In [193]:
with pm.Model() as model:
    # define uniform prior
    alpha = pm.Uniform('alpha', lower=-5, upper=5)
    beta = pm.Uniform('beta', lower=0, upper=5)

    # define the likelihood
    likelihood = pm.Cauchy('likelihood', alpha=alpha, beta=beta, observed=trials)

    # get the samples
    trace = pm.sample(5000, chains=4)

    heatmap, xedges, yedges = np.histogram2d(trace["alpha"], trace["beta"], bins=50)
    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
    pl.clf()
    pl.imshow(heatmap.T, extent=extent, origin='lower')
    pl.title(r'$\alpha-\beta$ without an interloper'.format(num_chain))
    pl.ylabel(r'$\alpha$')
    pl.xlabel(r'$\beta$')
    pl.show()

  trace = pm.sample(5000, chains=4)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 32 seconds.


<IPython.core.display.Javascript object>

In [194]:
trials += simulateX_k(inter_alpha, inter_beta, n)
np.random.shuffle(trials)

with pm.Model() as model:
    # define uniform prior
    alpha = pm.Uniform('alpha', lower=-5, upper=5)
    beta = pm.Uniform('beta', lower=0, upper=5)

    # define the likelihood
    likelihood = pm.Cauchy('likelihood', alpha=alpha, beta=beta, observed=trials)

    # get the samples
    trace = pm.sample(5000, chains=4)

    heatmap, xedges, yedges = np.histogram2d(trace["alpha"], trace["beta"], bins=50)
    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
    pl.clf()
    pl.imshow(heatmap.T, extent=extent, origin='lower')
    pl.title(r'$\alpha-\beta$ with an interloper'.format(num_chain))
    pl.ylabel(r'$\alpha$')
    pl.xlabel(r'$\beta$')
    pl.show()

  trace = pm.sample(5000, chains=4)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 33 seconds.
The acceptance probability does not match the target. It is 0.898001669967243, but should be close to 0.8. Try to increase the number of tuning steps.


<IPython.core.display.Javascript object>

MCMC is surprising not able to detect two sources of the flashes, instead it simply predicts a higher $\alpha$ and higher $\beta$. This might be because that sum of two Cauchy random variables is also a Cauchy distribution (as the trials are basically values from two independent Cauchy distributions). Notably, for if $X$ is sampled from Cauchy($\alpha_1$, $\beta_2$) and $Y$ from Cauchy($\alpha_2$, $\beta_2$), their sum $X+Y$ is from Cauchy($\alpha_1 + \alpha_2$, $\beta_1 + \beta_2$). This is the kind of behaviour we are seeing in this heatmap, as $\alpha_{t} + \alpha_{i} = 3 + 1 = 4 = \beta_{t} + \beta_{i}$, and this heat map has a peak close to $(4,4)$.

It might be important to note that if the interloper position changes with time, we will see this peak move (if the speed is low enough to get separate integration times for the trails). In that sense, we can extract both lighthouse and interloper position as a function of time.