In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import numpy as np

p_e = 0.2
N = 1000000

u = np.random.uniform(0, 1, size=N)
z = np.random.binomial(1., p=0.5, size=N)

p_e = 0.2
e = np.random.binomial(1., p=p_e, size=N)
delta = np.random.normal(u)

x = (e==1)*np.random.binomial(1, p=1. / (1. + np.exp(-delta + u - 0.5))) + (e==0)*z
y = u + delta*x + np.random.normal(size=N)

df = pd.DataFrame({"X": x, "Z": z, "E": e, "U": u, "Y": y})

# Let's keep the same DGP, but make the potential causal states explicit

In [None]:
import pandas as pd
import numpy as np

N = 1000000

u = np.random.uniform(0, 1, size=N)
z = np.random.binomial(1., p=0.5, size=N)

p_e = 0.2
e = np.random.binomial(1., p=p_e, size=N)
delta = np.random.normal(u)

epsilon_x = np.random.binomial(1, p=1. / (1. + np.exp(-delta + u - 0.5)))
x_0 = (e==1)*epsilon_x + (e==0)*0
x_1 = (e==1)*epsilon_x + (e==0)*1
x = (z==1)*x_1 + (z==0)*x_0
y = u + delta*x + np.random.normal(size=N)

df = pd.DataFrame({"X": x, "Z": z, "E": e, "U": u, "Y": y, "X_0": x_0, "X_1": x_1, "delta": delta})

# what is $\kappa$ here?

X_1 - X_0 = (e==0)*Z

so $\kappa$ = (e==0).

# Do we have compliers? Defiers? Always and never takers?

In [None]:
df.groupby(['X_0', 'X_1']).count()[['X']]

Unnamed: 0_level_0,Unnamed: 1_level_0,X
X_0,X_1,Unnamed: 2_level_1
0,0,79363
0,1,800047
1,1,120590


So we have around 800 never-takers, 1000 always-takers, and 8000 compliers.

In [None]:
def naive_effect(df):
    return df.groupby('X').mean()['Y'][1] - df.groupby('X').mean()['Y'][0]

def wald_effect(df):
    return np.cov(df['Y'], df['Z'])[0][1] / np.cov(df['X'], df['Z'])[0][1]

naive_effect(df), wald_effect(df)

(0.5794967707954743, 0.5026175523594267)

In [None]:
delta.mean()

0.49956892722023577

Now we see that our IV estimator is biased for the true effect! What effect are we estimating?

In [None]:
df.groupby(['X_0', 'X_1']).mean()[['delta']]

Unnamed: 0_level_0,Unnamed: 1_level_0,delta
X_0,X_1,Unnamed: 2_level_1
0,0,0.002794
0,1,0.499815
1,1,0.825917


# in this example we had a real experiment for compliers, and a confounded experiment for always and never takers. The IV estimator converged to the true effect, because the complier effect was the true effect! What if the complier effect was confounded?

In [None]:
import pandas as pd
import numpy as np

N = 100000

u = np.random.uniform(0, 1, size=N)
z = np.random.binomial(1., p=0.5, size=N)

p_e = 0.2
e = np.random.binomial(1., p=p_e, size=N)
delta = np.random.normal(u)

epsilon_x = np.random.binomial(1, p=1. / (1. + np.exp(-delta + u - 0.5)))
x_0 = (e==1)*epsilon_x + (e==0)*0
x_1 = (e==1)*epsilon_x + (e==0)*np.random.binomial(1, p=u)
x = (z==1)*x_1 + (z==0)*x_0
y = u + delta*x + np.random.normal(size=N)

df = pd.DataFrame({"X": x, "Z": z, "E": e, "U": u, "Y": y, "X_0": x_0, "X_1": x_1, "delta": delta})

# what is $\kappa$ here?

X_1 - X_0 = (e==0)*np.random.binomial(1, p=u)*Z

so $\kappa$ = (e==0)*np.random.binomial(1, p=u)

# We still only have compliers ...

In [None]:
df.groupby(['X_0', 'X_1']).count()[['X']]

Unnamed: 0_level_0,Unnamed: 1_level_0,X
X_0,X_1,Unnamed: 2_level_1
0,0,47828
0,1,40132
1,1,12040


# but now the IV estimator is biased!!


In [None]:

def naive_effect(df):
    return df.groupby('X').mean()['Y'][1] - df.groupby('X').mean()['Y'][0]

def wald_effect(df):
    return np.cov(df['Y'], df['Z'])[0][1] / np.cov(df['X'], df['Z'])[0][1]

naive_effect(df), wald_effect(df)

(0.8837384697251055, 0.6536798918821379)

In [None]:
delta.mean()

0.5014072803110581

# Again, we've measured the effect only on the compliers

In [None]:
df.groupby(['X_0', 'X_1']).mean()[['delta']]

Unnamed: 0_level_0,Unnamed: 1_level_0,delta
X_0,X_1,Unnamed: 2_level_1
0,0,0.281969
0,1,0.665878
1,1,0.824895


In [None]:
Heterogeneous (\delta \not\perp {U, \epsilon_Y}) treatment effects --> E[\delta|G=d] != E[\delta|G=c] != E[\delta|G=a] != E[\delta|G=n],
we measure
E[\delta|G=g], rather than E[\delta]


In [None]:
Homogeneous treatment effects --> E[\delta|G=d] = E[\delta|G=c] = E[\delta|G=a] = E[\delta|G=n],
we measure
E[\delta]