# Hierarchical Bayesian Modeling

See introduction in lecture notes.

More details in *Bayesian Data Analysis* by Andrew Gelman et al.

In [None]:
library(rstan)

## The data

In [None]:
df = data.frame(school=letters[1:8],
                effect=c(28.39, 7.94, -2.75 , 6.82, -0.64, 0.63, 18.01, 12.16),
                stderr=c(14.9, 10.2, 16.3, 11.0, 9.4, 11.4, 10.4, 17.6))

In [None]:
library(ggplot2)

In [None]:
ggplot(df, aes(school, effect)) +
    geom_col() + geom_errorbar(aes(ymin=effect - 1.96*stderr, ymax=effect + 1.96*stderr))

## Pool the effects

In [None]:
effect_pool = sum(df$effect / df$stderr^2) / sum(1 / df$stderr^2)

In [None]:
stderr_pool = sqrt(1 / sum(1 / df$stderr^2))

In [None]:
df2 = rbind(cbind(group="original", df),
            data.frame(group="pooled", school=letters[1:8],
                       effect=effect_pool, stderr=stderr_pool))

In [None]:
ggplot(df2, aes(school, effect, fill=group)) +
    geom_col(position="dodge") + geom_errorbar(aes(ymin=effect - 1.96*stderr, ymax=effect + 1.96*stderr), position="dodge")

## Bayesian hierarchical model

In [None]:
stan.code = "
data {
  int<lower=0> J;         // number of schools
  real y[J];              // estimated treatment effects
  real<lower=0> sigma[J]; // s.e. of effect estimates
}
parameters {
  real theta[J];
  real mu;
  real<lower=0> tau;
}
model {
  theta ~ normal (mu, tau);
  y ~ normal (theta, sigma);
}"

In [None]:
stan.data = list(J=nrow(df), y=df$effect, sigma=df$stderr)

In [None]:
fit = stan(model_code=stan.code, data=stan.data)

In [None]:
summary(fit)

In [None]:
la = extract(fit)

In [None]:
names(la)

In [None]:
dim(la$theta)

In [None]:
effect_true = colMeans(la$theta)

In [None]:
stderr_true = apply(la$theta, 2, sd)

In [None]:
df2 = rbind(cbind(group="original", df),
            data.frame(group="bayes", school=letters[1:8],
                       effect=effect_true, stderr=stderr_true))

In [None]:
ggplot(df2, aes(school, effect, fill=group)) +
    geom_col(position="dodge") + geom_errorbar(aes(ymin=effect - 1.96*stderr, ymax=effect + 1.96*stderr), position="dodge")

## Better than std. errors, we have draws of uncertainty!

In [None]:
apply(la$theta, 2, function(th) mean(th > 0))