In [1]:
import pandas as pd
from causal_inference.models import *

In [2]:
df = pd.read_csv('data/course/online_classroom.csv')
df = df[df.format_blended == 0].drop(columns=['format_blended',])
df = df.dropna()
df.head()

Unnamed: 0,gender,asian,black,hawaiian,hispanic,unknown,white,format_ol,falsexam
0,0,0.0,0.0,0.0,0.0,0.0,1.0,0,63.29997
1,1,0.0,0.0,0.0,0.0,0.0,1.0,0,79.96
4,1,0.0,0.0,0.0,0.0,0.0,1.0,1,83.3
5,0,1.0,0.0,0.0,0.0,0.0,0.0,1,88.34996
7,1,1.0,0.0,0.0,0.0,0.0,0.0,0,90.0


In [3]:
df.groupby('format_ol').agg({'falsexam': 'mean'})

Unnamed: 0_level_0,falsexam
format_ol,Unnamed: 1_level_1
0,78.779073
1,74.383355


In [4]:
T = df.format_ol
y = df.falsexam
X = df.drop(columns=['falsexam', 'format_ol'])

# Biased Model

In [5]:
biased_model = BiasedModel()
biased_model = biased_model.fit(T, y)
ci_lower, ci_upper = biased_model.confidence_interval
print(f'ATE: {biased_model.ate:.3f}')
print(f'Confidence Interval: [{ci_lower:.3f}, {ci_upper:.3f}]')

ATE: -4.396
Confidence Interval: [-7.872, -0.920]


In [6]:
linear_model = LinearModel()
linear_model = linear_model.fit(X, T, y)
ci_lower, ci_upper = linear_model.confidence_interval
print(f'ATE: {linear_model.ate:.3f}')
print(f'Confidence Interval: [{ci_lower:.3f}, {ci_upper:.3f}]')

ATE: -4.241
Confidence Interval: [-7.745, -0.738]


# Matching

In [7]:
knn_model = KNNModel()
knn_model = knn_model.fit(X, T, y)
ci_lower, ci_upper = knn_model.confidence_interval
print(f'ATE: {knn_model.ate:.3f}')
print(f'Confidence Interval: [{ci_lower:.3f}, {ci_upper:.3f}]')

ATE: -5.520
Confidence Interval: [-9.467, 0.351]


# Modelo de propensão

In [8]:
from sklearn.linear_model import LogisticRegression
propensity_model = PropensityModel(
    LogisticRegression(),
)
propensity_model = propensity_model.fit(X, T, y)
ci_lower, ci_upper = propensity_model.confidence_interval
print(f'ATE: {propensity_model.ate:.3f}')
print(f'Confidence Interval: [{ci_lower:.3f}, {ci_upper:.3f}]')

ATE: -4.300
Confidence Interval: [-8.655, -0.901]


# Doubly Robust Estimator

In [9]:
from sklearn.linear_model import LogisticRegression
dre_model = DoublyRobustEstimator(
    LogisticRegression(), 
)
dre_model = dre_model.fit(X, T, y)
ci_lower, ci_upper = dre_model.confidence_interval
print(f'ATE: {dre_model.ate:.3f}')
print(f'Confidence Interval: [{ci_lower:.3f}, {ci_upper:.3f}]')

ATE: -4.064
Confidence Interval: [-7.756, -0.522]
