# Setup

In [14]:
import os
from pathlib import Path
import random

import numpy as np
import pandas as pd
import cmdstanpy
import arviz
import seaborn as sns

In [15]:
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()
readRDS = robjects.r['readRDS']

Using libraries at paths:
- /home/nels/R/x86_64-pc-linux-gnu-library/4.1
- /usr/local/lib/R/site-library
- /usr/lib/R/site-library
- /usr/lib/R/library


In [8]:
random.seed(42)
rng = np.random.default_rng(42)

# Course data

In [17]:
pest_data =  readRDS('../data/pest_data.RDS')
standata_hier = readRDS('../data/standata_hier.RDS')

In [18]:
pest_data

Unnamed: 0,building_id,date,traps,floors,sq_footage_p_floor,live_in_super,monthly_average_rent,average_tenant_age,age_of_building,total_sq_foot,month,complaints,log_sq_foot_1e4
1,37,17181.0,8.0,8.0,5149.008112,0.0,3846.949050,53.877424,47.0,41192.064892,1.0,1.0,1.415661
2,37,17211.0,8.0,8.0,5149.008112,0.0,3846.949050,53.877424,47.0,41192.064892,2.0,3.0,1.415661
3,37,17241.0,9.0,8.0,5149.008112,0.0,3846.949050,53.877424,47.0,41192.064892,3.0,0.0,1.415661
4,37,17271.0,10.0,8.0,5149.008112,0.0,3846.949050,53.877424,47.0,41192.064892,4.0,1.0,1.415661
5,37,17301.0,11.0,8.0,5149.008112,0.0,3846.949050,53.877424,47.0,41192.064892,5.0,0.0,1.415661
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,98,17391.0,3.0,13.0,4557.786883,1.0,3785.183548,42.138240,39.0,59251.229483,8.0,6.0,1.779201
117,98,17421.0,2.0,13.0,4557.786883,1.0,3785.183548,42.138240,39.0,59251.229483,9.0,16.0,1.779201
118,98,17451.0,2.0,13.0,4557.786883,1.0,3785.183548,42.138240,39.0,59251.229483,10.0,5.0,1.779201
119,98,17481.0,2.0,13.0,4557.786883,1.0,3785.183548,42.138240,39.0,59251.229483,11.0,5.0,1.779201


# Prior Predictive Checks

In [12]:
def simple_poisson_dgp(traps, alpha_mean, alpha_sd, beta_mean, beta_sd):
    n = len(traps)
    alpha = rng.normal(loc=alpha_mean, scale=alpha_sd, size=1)
    beta = rng.normal(loc=beta_mean, scale=beta_sd, size=1)
    complaints = rng.poisson(lam=np.exp(alpha + beta * traps), size=n)
    return complaints

## Sample from the priors

In [24]:
simple_poisson_dgp(
    traps=pest_data['traps'],
    alpha_mean=0,
    alpha_sd=1,
    beta_mean=0,
    beta_sd=1
)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0])

In [40]:
# take 1000 samples
prior_preds = pd.DataFrame(
    [
        simple_poisson_dgp(
            traps=pest_data['traps'],
            alpha_mean=0,
            alpha_sd=1,
            beta_mean=0,
            beta_sd=1
        ) for _ in range(1000)
    ]
)

In [41]:
prior_preds.shape

(1000, 120)

In [37]:
prior_preds.min().describe()

count    120.0
mean       0.0
std        0.0
min        0.0
25%        0.0
50%        0.0
75%        0.0
max        0.0
dtype: float64

In [38]:
prior_preds.max().describe()

count    1.200000e+02
mean     4.666883e+13
std      3.071287e+14
min      9.000000e+01
25%      4.275984e+08
50%      9.559405e+09
75%      2.137155e+11
max      2.388114e+15
dtype: float64

In [39]:
prior_preds.mean().describe()

count    1.200000e+02
mean     4.703973e+10
std      3.093706e+11
min      2.632000e+00
25%      4.616992e+05
50%      9.993859e+06
75%      2.196015e+08
max      2.405517e+12
dtype: float64