In [5]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from causalml.match import NearestNeighborMatch

df = pd.read_csv('matchandweight.csv')

          age        income  education_years  treatment        outcome  \
0   55.960570  70990.331549        10.649643          1  152339.853676   
1   48.340828  63869.505244        11.710963          1  152799.125155   
3   68.276358  40295.948334        11.384077          1  102945.315002   
8   44.366307  65743.290730        13.318491          1  141945.021478   
11  44.411243  52963.994070        10.474550          1  111423.439722   

    propensity_score  
0           0.494437  
1           0.494995  
3           0.496842  
8           0.494848  
11          0.495849  


In [11]:
model = LogisticRegression()
X = df[['age', 'income', 'education_years']]
y = df['treatment']

model.fit(X, y)
data['propensity_score'] = model.predict_proba(X)[:, 1]  #Probability of receiving treatment

In [13]:
#Matching based on propensity score
nnm = NearestNeighborMatch(replace=True, ratio=1, random_state=1663)
matched_data = nnm.match(data=data, treatment_col='treatment', score_cols=['propensity_score'])

print(matched_data.head())

          age        income  education_years  treatment        outcome  \
0   55.960570  70990.331549        10.649643          1  152339.853676   
1   48.340828  63869.505244        11.710963          1  152799.125155   
3   68.276358  40295.948334        11.384077          1  102945.315002   
8   44.366307  65743.290730        13.318491          1  141945.021478   
11  44.411243  52963.994070        10.474550          1  111423.439722   

    propensity_score    weight  
0           0.494437  2.022503  
1           0.494995  2.020223  
3           0.496842  2.012712  
8           0.494848  2.020823  
11          0.495849  2.016742  


In [15]:
#Calculating the Inverse Probability of Treatment Weight (IPTW)
data['weight'] = np.where(data['treatment'] == 1,
                          1 / data['propensity_score'],
                          1 / (1 - data['propensity_score']))

#Using weights to estimate the Average Treatment Effect (ATE)
weighted_outcome_treated = np.average(data[data['treatment'] == 1]['outcome'], weights=data[data['treatment'] == 1]['weight'])
weighted_outcome_control = np.average(data[data['treatment'] == 0]['outcome'], weights=data[data['treatment'] == 0]['weight'])

ate = weighted_outcome_treated - weighted_outcome_control
print(f"Estimated ATE using IPTW: {ate}")

Estimated ATE using IPTW: 6795.826325286602
