# Matching and Weighting - In Progress

In [38]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from causalml.match import NearestNeighborMatch

In [40]:
df = pd.read_csv('matchandweight.csv')
df.head()
len(df)

1000

In [42]:
model = LogisticRegression()
X = df[['age', 'income', 'education_years']]
y = df['treatment']
model.fit(X, y)
df['propensity_score'] = model.predict_proba(X)[:, 1]  #Probability of receiving treatment
df.head(10)

Unnamed: 0,age,income,education_years,treatment,outcome,propensity_score
0,55.96057,70990.331549,10.649643,1,152339.853676,0.494437
1,48.340828,63869.505244,11.710963,1,152799.125155,0.494995
2,57.772262,50894.455549,10.41516,0,121284.340854,0.496012
3,68.276358,40295.948334,11.384077,1,102945.315002,0.496842
4,47.19016,60473.349704,8.212771,0,128573.308851,0.495261
5,47.190357,55902.280781,12.426587,0,120951.910319,0.495619
6,68.950554,63427.8983,12.002411,0,137565.643316,0.495029
7,59.209217,59527.577025,10.365823,0,125282.460267,0.495335
8,44.366307,65743.29073,13.318491,1,141945.0215,0.494848
9,56.510721,41971.471827,13.87514,0,107429.889624,0.496711


In [44]:
#Matching based on propensity score
nnm = NearestNeighborMatch(replace=True, ratio=1, random_state=1663)
matched_data = nnm.match(data=df, treatment_col='treatment', score_cols=['propensity_score'])
matched_data.head(10)
matched_data.tail(10)

Unnamed: 0,age,income,education_years,treatment,outcome,propensity_score
411,36.504295,58343.45014,15.075864,0,133778.477345,0.495428
968,49.518105,47076.375795,8.358793,0,91428.496328,0.496311
591,44.268111,6179.742748,11.085397,0,39230.236002,0.499516
773,61.670653,52919.111829,8.902763,0,134722.435283,0.495853
363,54.820541,35403.965887,14.225377,0,106832.209186,0.497225
832,37.950311,64297.049509,15.125778,0,132727.534544,0.494961
536,39.67504,57704.001599,13.951633,0,118833.699388,0.495478
25,51.331071,56544.855045,12.367699,0,125861.346086,0.495569
798,50.339821,66140.107079,11.280741,0,148257.0283,0.494817
478,86.94657,36687.61733,9.322788,0,100305.170431,0.497125


In [46]:
#Calculating the Inverse Probability of Treatment Weight 
df['weight'] = np.where(df['treatment'] == 1, 1 / df['propensity_score'], 1 / (1 - df['propensity_score']))
df

Unnamed: 0,age,income,education_years,treatment,outcome,propensity_score,weight
0,55.960570,70990.331549,10.649643,1,152339.853676,0.494437,2.022503
1,48.340828,63869.505244,11.710963,1,152799.125155,0.494995,2.020223
2,57.772262,50894.455549,10.415160,0,121284.340854,0.496012,1.984172
3,68.276358,40295.948334,11.384077,1,102945.315002,0.496842,2.012712
4,47.190160,60473.349704,8.212771,0,128573.308851,0.495261,1.981222
...,...,...,...,...,...,...,...
995,46.626796,66052.253575,12.154961,1,133442.365413,0.494824,2.020922
996,71.572238,49602.181110,12.515505,0,145206.789933,0.496113,1.984571
997,57.690114,36771.880232,9.516479,1,95271.710205,0.497118,2.011594
998,43.145852,47553.995541,12.668353,0,117780.813728,0.496273,1.985204


In [48]:
#Using weights to estimate the Average Treatment Effect 
weighted_outcome_treated = np.average(df[df['treatment'] == 1]['outcome'], weights=df[df['treatment'] == 1]['weight'])
weighted_outcome_control = np.average(df[df['treatment'] == 0]['outcome'], weights=df[df['treatment'] == 0]['weight'])
ate = weighted_outcome_treated - weighted_outcome_control
print(f"Estimated ATE: {ate}")

Estimated ATE: 6795.826325162227
