In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np

p_e = 0.2
N = 10000

delta = np.random.uniform(0,1,size=N)

w = np.random.normal(size=N)
z = np.random.binomial(1., p=1. / (1. + np.exp(-w)), size=N)
e = np.random.binomial(1., p=p_e, size=N)
u = np.random.normal(size=N)
x = (e==1)*np.random.binomial(1, p=1. / (1. + np.exp(-delta + u))) + (e==0)*z
y = u + w + delta*x + np.random.normal(size=N)

df = pd.DataFrame({"X": x, "Z": z, "E": e, "U": u, "Y": y, "W": w})

The naive effect is badly biased for the true effect, $\delta=0.5$.

In [3]:
df.groupby('X').mean()['Y'][1] - df.groupby('X').mean()['Y'][0]

1.0497250567995196

And the TSLS estimator is even worse,

In [4]:
import statsmodels.api as sm

df['intercept'] = 1.
model = sm.OLS(endog=df['X'], exog=df[['Z', 'intercept']])
result = model.fit()
df['$\hat{X}$'] = result.predict()

model = sm.OLS(endog=df['Y'], exog=df[['$\hat{X}$', 'intercept']])
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,Y,R-squared:,0.124
Model:,OLS,Adj. R-squared:,0.124
Method:,Least Squares,F-statistic:,1418.0
Date:,"Tue, 16 Mar 2021",Prob (F-statistic):,2.3299999999999998e-290
Time:,19:54:30,Log-Likelihood:,-19507.0
No. Observations:,10000,AIC:,39020.0
Df Residuals:,9998,BIC:,39030.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
$\hat{X}$,1.6012,0.043,37.659,0.000,1.518,1.685
intercept,-0.5677,0.028,-20.315,0.000,-0.623,-0.513

0,1,2,3
Omnibus:,5.776,Durbin-Watson:,2.017
Prob(Omnibus):,0.056,Jarque-Bera (JB):,5.754
Skew:,-0.052,Prob(JB):,0.0563
Kurtosis:,3.054,Cond. No.,3.27


Can we control for $W$ to block the path, and recover a conditional exclusion restriction?

In [8]:
import statsmodels.api as sm

df['intercept'] = 1.
model = sm.OLS(endog=df['X'], exog=df[['Z', 'W', 'intercept']])
result = model.fit()
df['$\hat{X}$'] = result.predict()

model = sm.OLS(endog=df['Y'], exog=df[['$\hat{X}$', 'W', 'intercept']])
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,Y,R-squared:,0.375
Model:,OLS,Adj. R-squared:,0.375
Method:,Least Squares,F-statistic:,2995.0
Date:,"Tue, 16 Mar 2021",Prob (F-statistic):,0.0
Time:,19:55:56,Log-Likelihood:,-17823.0
No. Observations:,10000,AIC:,35650.0
Df Residuals:,9997,BIC:,35670.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
$\hat{X}$,0.5542,0.039,14.040,0.000,0.477,0.632
W,1.0006,0.016,63.464,0.000,0.970,1.031
intercept,-0.0277,0.025,-1.107,0.268,-0.077,0.021

0,1,2,3
Omnibus:,2.961,Durbin-Watson:,1.996
Prob(Omnibus):,0.227,Jarque-Bera (JB):,2.932
Skew:,-0.041,Prob(JB):,0.231
Kurtosis:,3.02,Cond. No.,3.66


Yes! Does it work for the other DGP as well?

In [9]:
import pandas as pd
import numpy as np

p_e = 0.2
N = 10000

delta = np.random.uniform(0,1,size=N)

w = np.random.normal(size=N)
z = np.random.binomial(1., p=1. / (1. + np.exp(-w)), size=N)
e = np.random.binomial(1., p=p_e, size=N)
u = np.random.normal(w, size=N)
x = (e==1)*np.random.binomial(1, p=1. / (1. + np.exp(-delta + u))) + (e==0)*z
y = u + delta*x + np.random.normal(size=N)

df = pd.DataFrame({"X": x, "Z": z, "E": e, "U": u, "Y": y, "W": w})

In [10]:
import statsmodels.api as sm

df['intercept'] = 1.
model = sm.OLS(endog=df['X'], exog=df[['Z', 'W', 'intercept']])
result = model.fit()
df['$\hat{X}$'] = result.predict()

model = sm.OLS(endog=df['Y'], exog=df[['$\hat{X}$', 'W', 'intercept']])
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,Y,R-squared:,0.363
Model:,OLS,Adj. R-squared:,0.363
Method:,Least Squares,F-statistic:,2848.0
Date:,"Tue, 16 Mar 2021",Prob (F-statistic):,0.0
Time:,19:56:34,Log-Likelihood:,-17874.0
No. Observations:,10000,AIC:,35750.0
Df Residuals:,9997,BIC:,35780.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
$\hat{X}$,0.4754,0.040,11.941,0.000,0.397,0.553
W,1.0150,0.015,65.767,0.000,0.985,1.045
intercept,0.0068,0.025,0.270,0.787,-0.042,0.056

0,1,2,3
Omnibus:,0.163,Durbin-Watson:,2.012
Prob(Omnibus):,0.922,Jarque-Bera (JB):,0.137
Skew:,-0.002,Prob(JB):,0.934
Kurtosis:,3.018,Cond. No.,3.63


It does! We have a conditional instrument.