In [1]:
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import seaborn as sns

### Code 6.13

In [2]:
from scipy.stats import norm, binom

# number of plants
N = 100

# initial heights
h0 = norm.rvs(loc=10, scale=2, size=N)

# assign treatment and fungus growth
treatment = np.zeros(N)
treatment[N//2:] = 1

fungus = binom.rvs(n=1, p=0.5 - 0.4 * treatment, loc=0, size=N)

h1 = h0 + norm.rvs(loc=5 - 3 * fungus, scale=1, size=N)

df = pd.DataFrame({'h0': h0, 'h1': h1, 'treatment': treatment, 'fungus': fungus})
df.describe([0.055, 0.945])

Unnamed: 0,h0,h1,treatment,fungus
count,100.0,100.0,100.0,100.0
mean,9.844935,13.744618,0.5,0.36
std,2.181787,2.907541,0.502519,0.482418
min,3.404173,7.1748,0.0,0.0
5.5%,7.042,9.21534,0.0,0.0
50%,9.547138,13.890746,0.5,0.0
94.5%,13.292273,18.189353,1.0,1.0
max,14.58672,20.29509,1.0,1.0


## Model 1 

$$
\begin{align*}
    h_{1 i} & \sim N(\mu_i, \sigma) \\
    \mu_i   & = h_{0 i} \times p \\
     p      & \sim \text{LogNormal}(0, 0.25)
\end{align*}
$$

### Code 6.14

In [3]:
from scipy.stats import lognorm


# mu=0 and sd=0.25 corresponds to s=0.25 and scale=np.exp(mu)
sim_p = lognorm.rvs(s=0.25, scale=np.exp(0), size=10000)

df_sim_p = pd.DataFrame({'sim_p': sim_p})
df_sim_p.describe([0.055, 0.945]).round(2)

Unnamed: 0,sim_p
count,10000.0
mean,1.04
std,0.26
min,0.37
5.5%,0.67
50%,1.01
94.5%,1.5
max,2.74


### Code 6.15

In [4]:
with pm.Model() as m_6_6:
    p = pm.Lognormal('p', mu=0, sigma=0.25)
    sigma = pm.Exponential('sigma', lam=1)
    
    mu = pm.Deterministic('mu', df['h0'].values * p)
    h1 = pm.Normal('h1', mu=mu, sigma=sigma, observed=df['h1'])
    
    trace_6_6 = pm.sample(2000, tune=2000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, p]


Sampling 4 chains for 2_000 tune and 2_000 draw iterations (8_000 + 8_000 draws total) took 6 seconds.


In [5]:
az.summary(trace_6_6, var_names=['p', 'sigma'], hdi_prob=0.89)



Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat
p,1.379,0.019,1.349,1.41,0.0,0.0,7122.0,7122.0,7124.0,5757.0,1.0
sigma,1.863,0.134,1.651,2.069,0.002,0.001,6176.0,6025.0,6454.0,5213.0,1.0


## Model 2

$$
\begin{align*}
    h_{1 i} & \sim N(\mu_i, \sigma) \\
    \mu_i   & = h_{0 i} \times p \\
    p       & = \alpha + \beta_T \cdot T_i + \beta_F \cdot F_i \\
    \alpha  & \sim \text{LogNormal}(0, 0.25) \\
    \beta_T & \sim N(0, 0.5) \\
    \beta_F & \sim N(0, 0.5) \\
    \sigma  & \sim \text{Exponential}(1)
\end{align*}
$$

### Code 6.16

In [6]:
with pm.Model() as m_6_7:
    a = pm.Lognormal('a', mu=0, sigma=0.25)
    b_T = pm.Normal('b_T', mu=0, sigma=0.5)
    b_F = pm.Normal('b_F', mu=0, sigma=0.5)
    sigma = pm.Exponential('sigma', lam=1)
    
    p = a + b_T * df['treatment'] + b_F * df['fungus']
    mu = pm.Deterministic('mu', df['h0'].values * p)
    h1 = pm.Normal('h1', mu=mu, sigma=sigma, observed=df['h1'])
    
    trace_6_7 = pm.sample(2000, tune=2000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, b_F, b_T, a]


Sampling 4 chains for 2_000 tune and 2_000 draw iterations (8_000 + 8_000 draws total) took 8 seconds.


In [7]:
az.summary(trace_6_7, var_names=['a', 'b_T', 'b_F', 'sigma'], hdi_prob=0.89)



Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat
a,1.504,0.027,1.459,1.544,0.0,0.0,3566.0,3565.0,3586.0,3767.0,1.0
b_T,-0.04,0.03,-0.087,0.009,0.0,0.0,4185.0,3868.0,4195.0,4713.0,1.0
b_F,-0.296,0.031,-0.344,-0.245,0.0,0.0,4117.0,4076.0,4115.0,5015.0,1.0
sigma,1.296,0.094,1.145,1.441,0.001,0.001,4496.0,4496.0,4462.0,3870.0,1.0


## Discussion

The treatment variable seems to have no association with the final height $h_i$ although we explicity set up the simulation data to make it so. Fungus seems to have had a negative effect on the final height, which is as expected. 