# Week 4 Lecture 1 - Backdoor paths

McElreath's lectures for the whole book are available here: https://github.com/rmcelreath/stat_rethinking_2022

An R/Stan repo of code is available here: https://vincentarelbundock.github.io/rethinking2/

An excellent port to Python/PyMC Code is available here: https://github.com/dustinstansbury/statistical-rethinking-2023

You are encouraged to work through both of these versions to re-enforce what we're doing in class.

In [None]:
# Import python packages
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import scipy as sp 
import random as rd
import pymc as pm
from matplotlib import pyplot as plt


# Helper functions
def stdize(x):
    return (x-np.mean(x))/np.std(x)


def indexall(L):
    poo = []
    for p in L:
        if not p in poo:
            poo.append(p)
    Ix = np.array([poo.index(p) for p in L])
    return poo,Ix

## Return to Waffle Houses

From the waffle house data we can assert a full causal graph:

![](wafflez.jpg)

And by importing the data, we can check the implications of our assumptions, and how playing with the elemental confounds changes the effect sizes we see in our statistical model.

In [None]:
# Import data
ddata = pd.read_csv('WaffleDivorce.csv',sep=";")
# Display top 5 rows
ddata.head()

In [None]:
# Standardize variables
AgeMarriage = stdize(ddata.MedianAgeMarriage.values)
MarriageRate = stdize(ddata.Marriage.values)
PropSlaves1860 = stdize(ddata.PropSlaves1860.values)
Divorce = stdize(ddata.Divorce.values)
WaffleHouses = stdize(ddata.WaffleHouses.values)
South = stdize(ddata.South.values)

With this diagram and data, let's see how blocking the backdor through conditioning on S versus A and M changes the effect of W on D.

In [None]:
with pm.Model() as Smod:
    # Baseline intercept
    β0 = pm.Normal('Intercept', 0, 0.2)
    # Waffle House effect
    βWH = pm.Normal('WaffleHouses', 0, 0.5)
    # Southern
    βS = pm.Normal('South', 0, 0.5)
    # Linear model
    μ = β0+βWH*WaffleHouses+βS*South
    # Error
    σ = pm.Uniform('SD_obs', 0, 10)
    # Likelihood
    Yi = pm.Normal('Yi', μ, σ, observed=Divorce)

In [None]:
with pm.Model() as AMmod:
    # Baseline intercept
    β0 = pm.Normal('Intercept', 0, 0.2)
    # Waffle House effect
    βWH = pm.Normal('WaffleHouses', 0, 0.5)
    # Marriage age
    βA = pm.Normal('AgeMarriage', 0, 0.5)
    # Marriage rate
    βM = pm.Normal('MarriageRate', 0, 0.5)
    # Linear model
    μ = β0+βWH*WaffleHouses+βA*AgeMarriage+βM*MarriageRate
    # Error
    σ = pm.Uniform('SD_obs', 0, 10)
    # Likelihood
    Yi = pm.Normal('Yi', μ, σ, observed=Divorce)

In [None]:
with pm.Model() as Amod:
    # Baseline intercept
    β0 = pm.Normal('Intercept', 0, 0.2)
    # Waffle House effect
    βWH = pm.Normal('WaffleHouses', 0, 0.5)
    # Marriage age
    βA = pm.Normal('AgeMarriage', 0, 0.5)
    # Linear model
    μ = β0+βWH*WaffleHouses+βA*AgeMarriage
    # Error
    σ = pm.Uniform('SD_obs', 0, 10)
    # Likelihood
    Yi = pm.Normal('Yi', μ, σ, observed=Divorce)

In [None]:
with pm.Model() as Mmod:
    # Baseline intercept
    β0 = pm.Normal('Intercept', 0, 0.2)
    # Waffle House effect
    βWH = pm.Normal('WaffleHouses', 0, 0.5)
    # Marriage reat
    βM = pm.Normal('MarriageRate', 0, 0.5)
    # Linear model
    μ = β0+βWH*WaffleHouses+βM*MarriageRate
    # Error
    σ = pm.Uniform('SD_obs', 0, 10)
    # Likelihood
    Yi = pm.Normal('Yi', μ, σ, observed=Divorce)

In [None]:
with Smod:
    trace_s = pm.sample(1000)
with AMmod:
    trace_am = pm.sample(1000)
with Amod:
    trace_a = pm.sample(1000)
with Mmod:
    trace_m = pm.sample(1000)

In [None]:
# Look at southern model results
pm.summary(trace_s)

In [None]:
pm.plot_forest(trace_s)
plt.axvline(0)
plt.tight_layout()
plt.savefig('waffleS.jpg',dpi=300)

In [None]:
pm.summary(trace_am)

In [None]:
pm.summary(trace_a)

In [None]:
pm.summary(trace_m)

In [None]:
az.style.use("arviz-darkgrid")

axes = az.plot_forest(
    [trace_s, trace_am, trace_m], model_names=["Southern", "Marriage A&M","Marriage M"])
axes[0].set_title("Waffle House effects model")
plt.axvline(0)
plt.show()

Humm, we can see that South negates the Waffle House effect as expected, however conditioning on M and A doesn't do the same thing...there must be other, unmeasured backdoor paths through W to have an effect. 