In [1]:
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pymc3 as pm
import warnings

# Ignore a bunch of Arviz/pymc3 deprecated warnings
warnings.filterwarnings('ignore')

In [2]:
DATA_FOLDER = '../../data'
TULIPS_DATA = 'tulips.csv'

### Code 8.19

In [3]:
df = pd.read_csv(f'{DATA_FOLDER}/{TULIPS_DATA}')
df.head()

Unnamed: 0,bed,water,shade,blooms
0,a,1,1,0.0
1,a,1,2,0.0
2,a,1,3,111.04
3,a,2,1,183.47
4,a,2,2,59.16


In [4]:
df[['water', 'shade', 'blooms']].describe()

Unnamed: 0,water,shade,blooms
count,27.0,27.0,27.0
mean,2.0,2.0,128.993704
std,0.83205,0.83205,92.683923
min,1.0,1.0,0.0
25%,1.0,1.0,71.115
50%,2.0,2.0,111.04
75%,3.0,3.0,190.3
max,3.0,3.0,361.66


## Model 1

$$
\begin{align*}
~ B_i & \sim N(\mu_i, \sigma) \\
\mu_i & = \alpha + \beta_W (W_i - \bar{W}) + \beta_S (S_i - \bar{S}) 
\end{align*}
$$

### Code 8.20

In [5]:
df['blooms_std'] = df['blooms'] / df['blooms'].max()
df['water_cent'] = df['water'] - df['water'].mean()
df['shade_cent'] = df['shade'] - df['shade'].mean()

At this point, $B_i \in [0, 1]$ and both $W_i - \bar{W}$ and $S_i - \bar{S}$ take on values in $\{-1, 0, 1\}$. The prior for $\alpha$ places $95\%$ of the probability mass in the interval $[0, 1]$.
$$
    \alpha \sim N(0.5, 0.25). 
$$
The priors for $\beta_W$ and $\beta_S$ are set to $N(0, 0.25)$ so that $95\%$ of the mass is on slopes in the interval $[-0.5, 0.5]$. 

In [6]:
with pm.Model() as m_8_4:
    a = pm.Normal('a', mu=0.5, sigma=0.25)
    bw = pm.Normal('bw', mu=0, sigma=0.25)
    bs = pm.Normal('bs', mu=0, sigma=0.25)
    
    mu = pm.Deterministic('mu', a + bw * df['water_cent'] + bs * df['shade_cent'])
    sigma = pm.Exponential('sigma', lam=1)
    
    blooms_std = pm.Normal('blooms_std', mu=mu, sigma=sigma, observed=df['blooms_std'])
    
    trace_8_4 = pm.sample(1000, tune=1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, bs, bw, a]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 2 seconds.


In [7]:
az.summary(trace_8_4, var_names=['a', 'bw', 'bs', 'sigma'], hdi_prob=0.89)

Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a,0.359,0.035,0.303,0.416,0.001,0.0,4927.0,2862.0,1.0
bw,0.203,0.043,0.134,0.27,0.001,0.0,4772.0,2888.0,1.0
bs,-0.112,0.042,-0.175,-0.044,0.001,0.0,4900.0,3058.0,1.0
sigma,0.177,0.027,0.135,0.217,0.0,0.0,4375.0,2559.0,1.0


## Model 2 (Simple Interaction)

$$
\begin{align*}
~ B_i      & \sim N(\mu_i, \sigma) \\
\mu_i      & = \alpha + \beta_W (W_i - \bar{W}) + \beta_S (S_i - \bar{S}) 
                      + \beta_{WS} (W_i - \bar{W}) (S_i - \bar{S}) \\
\alpha     & \sim N(0.5, 0.25) \\
\beta_W    & \sim N(0, 0.25) \\
\beta_S    & \sim N(0, 0.25) \\
\beta_{WS} & \sim N(0, 0.25) 
\end{align*}
$$

### Code 8.24

In [8]:
with pm.Model() as m_8_5:
    a = pm.Normal('a', mu=0.5, sigma=0.25)
    bw = pm.Normal('bw', mu=0, sigma=0.25)
    bs = pm.Normal('bs', mu=0, sigma=0.25)
    bws = pm.Normal('bws', mu=0, sigma=0.25)
    
    mu = pm.Deterministic('mu', a + bw * df['water_cent'] + bs * df['shade_cent'] + 
                          bws * df['water_cent'] * df['shade_cent'])
    sigma = pm.Exponential('sigma', lam=1)
    
    blooms_std = pm.Normal('blooms_std', mu=mu, sigma=sigma, observed=df['blooms_std'])
    
    trace_8_5 = pm.sample(1000, tune=1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, bws, bs, bw, a]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 3 seconds.


In [9]:
az.summary(trace_8_5, var_names=['a', 'bw', 'bs', 'bws', 'sigma'], hdi_prob=0.89)

Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a,0.359,0.027,0.317,0.403,0.0,0.0,5140.0,2919.0,1.0
bw,0.206,0.033,0.151,0.256,0.001,0.0,4370.0,2922.0,1.0
bs,-0.113,0.034,-0.165,-0.058,0.0,0.0,5407.0,2896.0,1.0
bws,-0.142,0.041,-0.216,-0.083,0.001,0.0,4824.0,3285.0,1.0
sigma,0.143,0.022,0.109,0.176,0.0,0.0,3548.0,2598.0,1.0
