In [49]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from causalml.match import NearestNeighborMatch

df = pd.read_csv('matchandweight.csv')

In [51]:
model = LogisticRegression()
X = df[['age', 'income', 'education_years']]
y = df['treatment']

model.fit(X, y)
df['propensity_score'] = model.predict_proba(X)[:, 1]  #Probability of receiving treatment
df.head()

Unnamed: 0,age,income,education_years,treatment,outcome,propensity_score
0,55.96057,70990.331549,10.649643,1,152339.853676,0.494437
1,48.340828,63869.505244,11.710963,1,152799.125155,0.494995
2,57.772262,50894.455549,10.41516,0,121284.340854,0.496012
3,68.276358,40295.948334,11.384077,1,102945.315002,0.496842
4,47.19016,60473.349704,8.212771,0,128573.308851,0.495261


In [55]:
#Matching based on propensity score
nnm = NearestNeighborMatch(replace=True, ratio=1, random_state=1663)
matched_data = nnm.match(data=df, treatment_col='treatment', score_cols=['propensity_score'])

matched_data.head()

Unnamed: 0,age,income,education_years,treatment,outcome,propensity_score
0,55.96057,70990.331549,10.649643,1,152339.853676,0.494437
1,48.340828,63869.505244,11.710963,1,152799.125155,0.494995
3,68.276358,40295.948334,11.384077,1,102945.315002,0.496842
8,44.366307,65743.29073,13.318491,1,141945.0215,0.494848
11,44.411243,52963.99407,10.47455,1,111423.439722,0.495849


In [57]:
#Calculating the Inverse Probability of Treatment Weight 
df['weight'] = np.where(df['treatment'] == 1, 1 / df['propensity_score'], 1 / (1 - df['propensity_score']))
df

Unnamed: 0,age,income,education_years,treatment,outcome,propensity_score,weight
0,55.960570,70990.331549,10.649643,1,152339.853676,0.494437,2.022503
1,48.340828,63869.505244,11.710963,1,152799.125155,0.494995,2.020223
2,57.772262,50894.455549,10.415160,0,121284.340854,0.496012,1.984172
3,68.276358,40295.948334,11.384077,1,102945.315002,0.496842,2.012712
4,47.190160,60473.349704,8.212771,0,128573.308851,0.495261,1.981222
...,...,...,...,...,...,...,...
995,46.626796,66052.253575,12.154961,1,133442.365413,0.494824,2.020922
996,71.572238,49602.181110,12.515505,0,145206.789933,0.496113,1.984571
997,57.690114,36771.880232,9.516479,1,95271.710205,0.497118,2.011594
998,43.145852,47553.995541,12.668353,0,117780.813728,0.496273,1.985204


In [45]:
#Using weights to estimate the Average Treatment Effect 
weighted_outcome_treated = np.average(df[df['treatment'] == 1]['outcome'], weights=df[df['treatment'] == 1]['weight'])
weighted_outcome_control = np.average(df[df['treatment'] == 0]['outcome'], weights=df[df['treatment'] == 0]['weight'])
ate = weighted_outcome_treated - weighted_outcome_control
print(f"Estimated ATE: {ate}")

Estimated ATE: 6795.826325161266
