# Regression and Other Stories: Congress
Predictive uncertainty for congressional elections. See Chapters 10 and 15 in Regression and Other Stories.

In [1]:
import arviz as az
from bambi import Model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.formula.api as smf

In [2]:
congress = pd.read_csv("https://raw.githubusercontent.com/avehtari/ROS-Examples/master/Congress/data/congress.csv")
inconsistent = ((congress["inc88"] ==-1) & (congress["v86"] > 0.5) |  ((congress["inc88"]==1) & (congress["v86"] < 0.5)))

congress.head()

Unnamed: 0,inc86,inc88,inc90,v86,v88,v90,v86_adj,v88_adj,v90_adj
0,1,1,1,0.745036,0.772443,0.714029,0.745036,0.772443,0.714029
1,1,1,1,0.673845,0.636182,0.59705,0.673845,0.636182,0.59705
2,1,1,0,0.696457,0.664928,0.521043,0.696457,0.664928,0.521043
3,-1,-1,-1,0.46459,0.273834,0.234377,0.46459,0.273834,0.234377
4,-1,-1,0,0.391095,0.263613,0.477439,0.391095,0.263613,0.477439


In [3]:
data88 = pd.DataFrame(dict(vote=congress["v88_adj"], past_vote=congress["v86_adj"], inc=congress["inc88"]))
model = Model(data88)
fit88 = model.fit('vote ~ past_vote + inc', samples=1000, chains=4)

Auto-assigning NUTS sampler...
INFO:pymc3:Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
INFO:pymc3:Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [vote_sd, inc, past_vote, Intercept]
INFO:pymc3:NUTS: [vote_sd, inc, past_vote, Intercept]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 9 seconds.
INFO:pymc3:Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 9 seconds.
The number of effective samples is smaller than 25% for some parameters.
INFO:pymc3:The number of effective samples is smaller than 25% for some parameters.


In [4]:
func_dict = {"Median": np.median,
            "MAD_SD":stats.median_abs_deviation,
             }
coefs = az.summary(fit88, stat_funcs=func_dict, extend=False, round_to=2)
coefs

Unnamed: 0,Median,MAD_SD
Intercept[0],0.24,0.01
past_vote[0],0.52,0.02
inc[0],0.1,0.0
vote_sd,0.07,0.0


In [5]:
# TODO: Rest of this notebook
# Figure out how to run posterior predictive simulations in bambi