In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import scipy.stats as ss
from scipy.interpolate import interp1d
from scipy.special import expit
from fractions import Fraction
from empiricaldist import Pmf, Cdf
import matplotlib.pyplot as plt
from collections import Counter
import statsmodels.formula.api as smfa
import pymc3 as pm

In [3]:
def pmf_from_dist(dist, qs):
    ps = dist.pdf(qs)
    pmf = Pmf(ps, qs)
    pmf.normalize()
    return pmf

In [4]:
alpha = 1.4
beta = 1
dist1 = ss.gamma(alpha)
dist2 = ss.gamma(alpha, scale=1/beta)

In [5]:
lams = np.linspace(0, 10, 101)
prior1 = pmf_from_dist(dist1, lams)
prior2 = pmf_from_dist(dist2, lams)

In [6]:
prior1 == prior2

0.0     True
0.1     True
0.2     True
0.3     True
0.4     True
        ... 
9.6     True
9.7     True
9.8     True
9.9     True
10.0    True
Name: , Length: 101, dtype: bool

In [7]:
prior_dist = dist1

In [8]:
sample_prior = prior_dist.rvs(1000)
sample_prior_pred = ss.poisson.rvs(sample_prior)

In [9]:
pmf_prior_pred = Pmf.from_seq(sample_prior_pred)

In [10]:
with pm.Model() as model:
    lam = pm.Gamma('lam', alpha=1.4, beta=1.0)
    goals = pm.Poisson('goals', lam)

In [11]:
with model:
    trace = pm.sample_prior_predictive(1000)

In [12]:
sample_prior_pymc = trace['lam']
sample_prior_pymc.shape

(1000,)

In [13]:
sample_prior_pred_pymc = trace['goals']
sample_prior_pred_pymc.shape

(1000,)

In [14]:
with pm.Model() as model2:
    lam = pm.Gamma('lam', alpha=1.4, beta=1.0)
    goals = pm.Poisson('goals', lam, observed=4)

In [15]:
options = dict(return_inferencedata=False)
with model2:
    trace2 = pm.sample(500, **options)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [lam]


Sampling 4 chains for 1_000 tune and 500 draw iterations (4_000 + 2_000 draws total) took 22 seconds.


In [16]:
sample_post_pymc = trace2['lam']

In [17]:
with model2:
    post_pred = pm.sample_posterior_predictive(trace2)

In [18]:
sample_post_pred_pymc = post_pred['goals']

In [21]:
filename = 'WHR20_DataForFigure2.1.xlsx'
df = pd.read_excel(Path.cwd() / 'data' / filename)

In [22]:
score = df['Ladder score']
score.head()

0    7.8087
1    7.6456
2    7.5599
3    7.5045
4    7.4880
Name: Ladder score, dtype: float64