## Matching estimators


In [1]:
import pandas as pd

from fastmatch import Matching
from sklearn.linear_model import LinearRegression


### lalonde observational

In [2]:
lalonde_obs = pd.read_parquet("lalonde_obs.parquet")
print(lalonde_obs.shape)
w, y = lalonde_obs.treatment.values, lalonde_obs.earnings1978.values
X = lalonde_obs.drop(columns=["treatment", "earnings1978"]).values

(16177, 10)


Naive diff-in-means

In [3]:
y[w == 1].mean() - y[w == 0].mean()

-8497.516142636992

Basic Matching estimate

In [4]:
m = Matching(
    "ATT",
    k=5,
)
m.fit(y, w, X)

(1003.3374105837856, nan)

### bias-corrected matching
recommended

In [5]:
omod = LinearRegression()
m = Matching(
    "ATT",
    k=5,
    bias_corr_mod=omod,
)
m.fit(y, w, X)

(1510.862970754748, 693.1519202024355)

### experimental

In [6]:
lalonde_exp = pd.read_parquet("lalonde_exp.parquet")
print(lalonde_exp.shape)
w, y = lalonde_exp.treatment.values, lalonde_exp.earnings1978.values
X = lalonde_exp.drop(columns=["treatment", "earnings1978"]).values

(445, 10)


In [7]:
y[w == 1].mean() - y[w == 0].mean()

1794.342404270271

In [8]:
m = Matching(
    "ATT",
    k=5,
)
m.fit(y, w, X)

(1718.2983673945955, nan)

In [9]:
omod = LinearRegression()
m = Matching(
    "ATE",
    k=5,
    bias_corr_mod=omod,
)
m.fit(y, w, X)

(1624.4637368736574, 611.5369266216888)