In [155]:
import sklearn
import econml
import numpy as np
import scipy as sp

* Linear Case

In [156]:
# Example 1
import numpy as np
N = 100
σ_t = 1
σ_y = 1
σ_x = 1
θ = 0.5
α_t = 0.5
α_y = 0.5
x = np.random.normal(0,σ_x,N)
e_t = np.random.normal(0,σ_t,N)
e_y = np.random.normal(0,σ_y,N)
t = α_t * x + e_t
y = α_y * x + θ * t + e_y
x = x.reshape(-1, 1)
y = y.reshape(-1, 1)
t = t.reshape(-1, 1)

In [157]:
# Simple Regression - Yeilds biased estimates of ATE
import statsmodels.api as sm
mod = sm.OLS(y, np.c_[t])
res = mod.fit()
print(res.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.485
Model:                            OLS   Adj. R-squared (uncentered):              0.480
Method:                 Least Squares   F-statistic:                              93.13
Date:                Sat, 03 Dec 2022   Prob (F-statistic):                    6.34e-16
Time:                        18:59:10   Log-Likelihood:                         -129.43
No. Observations:                 100   AIC:                                      260.9
Df Residuals:                      99   BIC:                                      263.5
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [158]:
# Multivariate regression - yeilds unbiased estimate of ATE 
import statsmodels.api as sm
mod = sm.OLS(y, np.c_[t,x])
res = mod.fit()
print(res.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.564
Model:                            OLS   Adj. R-squared (uncentered):              0.555
Method:                 Least Squares   F-statistic:                              63.47
Date:                Sat, 03 Dec 2022   Prob (F-statistic):                    2.09e-18
Time:                        18:59:11   Log-Likelihood:                         -121.04
No. Observations:                 100   AIC:                                      246.1
Df Residuals:                      98   BIC:                                      251.3
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [159]:
# Multivariate regression - yeilds unbiased estimate of ATE 
from econml.dml import LinearDML
est = LinearDML(random_state=1)
est.fit(y, t, X=None,W=x)
est.summary()

Coefficient Results:  X is None, please call intercept_inference to learn the constant!


0,1,2,3,4,5,6
,point_estimate,stderr,zstat,pvalue,ci_lower,ci_upper
cate_intercept,0.617,0.071,8.707,0.0,0.478,0.756


* Non-linear Case

In [160]:
# Example 2
import numpy as np

# Hyperparameters
N = 50000
σ_t = 1
σ_y = 1
σ_x = 1
θ = 0.5
α_t = 0.5
α_y = 0.5

x = np.random.normal(0,σ_x,N)
e_t = np.random.normal(0,σ_t,N)
e_y = np.random.normal(0,σ_y,N)
t = np.exp(α_t * x) + e_t
y = np.exp(α_y * x) + θ * t + e_y

x = x.reshape(-1, 1)
y = y.reshape(-1, 1)
t = t.reshape(-1, 1)

In [161]:
# Multivariate regression - gives biased estimates of ATE
import statsmodels.api as sm
mod = sm.OLS(y, np.c_[t,x])
res = mod.fit()
print(res.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.680
Model:                            OLS   Adj. R-squared (uncentered):              0.680
Method:                 Least Squares   F-statistic:                          5.324e+04
Date:                Sat, 03 Dec 2022   Prob (F-statistic):                        0.00
Time:                        18:59:15   Log-Likelihood:                         -82368.
No. Observations:               50000   AIC:                                  1.647e+05
Df Residuals:                   49998   BIC:                                  1.648e+05
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [162]:
# DML regression - still yeilds unbiased estimate of ATE 
from econml.dml import LinearDML
est = LinearDML(random_state=1)
est.fit(y, t, X=None,W=x)
est.summary()

Coefficient Results:  X is None, please call intercept_inference to learn the constant!


0,1,2,3,4,5,6
,point_estimate,stderr,zstat,pvalue,ci_lower,ci_upper
cate_intercept,0.536,0.005,117.357,0.0,0.527,0.545


* High Dimensional Case

In [149]:
# Example 1
import numpy as np
N = 10000
σ_t = 1
σ_y = 1
σ_x = 1
θ = 0.5
K = 50
x = np.random.normal(0,σ_x,(N,K))
α_t = np.random.normal(0,1,K)
α_y = np.random.normal(0,1,K)
e_t = np.random.normal(0,σ_t,N)
e_y = np.random.normal(0,σ_y,N)
t = np.dot(α_t,x.T) + e_t
y = np.dot(α_y,x.T) + θ * t + e_y
#x = x.reshape(-1, 1)
y = y.reshape(-1, 1)
t = t.reshape(-1, 1)

In [152]:
# Multivariate regression - works!
import statsmodels.api as sm
mod = sm.OLS(y, np.c_[t,x])
res = mod.fit_regularized()
print(res.RegularizedResults)

AttributeError: 'RegularizedResults' object has no attribute 'RegularizedResults'

In [153]:
print(res.RegularizedResults())

AttributeError: 'RegularizedResults' object has no attribute 'RegularizedResults'

In [154]:
# DML regression - still yeilds unbiased estimate of ATE 
from econml.dml import LinearDML
est = LinearDML(random_state=1)
est.fit(y, t, X=None,W=x)
est.summary()

Coefficient Results:  X is None, please call intercept_inference to learn the constant!


0,1,2,3,4,5,6
,point_estimate,stderr,zstat,pvalue,ci_lower,ci_upper
cate_intercept,0.514,0.01,51.964,0.0,0.494,0.533


* Example

In [9]:
from econml.dml import LinearDML
import numpy as np
import scipy.special
np.set_printoptions(suppress=True)
np.random.seed(123)
X = np.random.normal(size=(1000, 5))
T = np.random.binomial(1, scipy.special.expit(X[:, 0]))
y = (1 + .5*X[:, 0]) * T + X[:, 0] + np.random.normal(size=(1000,))
est = LinearDML(discrete_treatment=True)
est.fit(y, T, X=X, W=None)
est.summary()

0,1,2,3,4,5,6
,point_estimate,stderr,zstat,pvalue,ci_lower,ci_upper
X0,0.397,0.082,4.858,0.0,0.237,0.558
X1,-0.003,0.068,-0.046,0.963,-0.136,0.13
X2,0.013,0.067,0.202,0.84,-0.117,0.144
X3,0.014,0.068,0.206,0.837,-0.12,0.148
X4,-0.091,0.07,-1.297,0.195,-0.228,0.046

0,1,2,3,4,5,6
,point_estimate,stderr,zstat,pvalue,ci_lower,ci_upper
cate_intercept,0.992,0.068,14.572,0.0,0.859,1.125


In [15]:
import statsmodels.api as sm
mod = sm.OLS(y, sm.add_constant(np.c_[T, X], prepend=False))
res = mod.fit()
print(res.summary())
print(res.params[0])
print(res.bse[0])

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.130
Model:                            OLS   Adj. R-squared:                  0.124
Method:                 Least Squares   F-statistic:                     24.65
Date:                Tue, 06 Dec 2022   Prob (F-statistic):           2.47e-27
Time:                        13:20:40   Log-Likelihood:                -1411.5
No. Observations:                1000   AIC:                             2837.
Df Residuals:                     993   BIC:                             2871.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.5114      0.071      7.221      0.0

* Example

In [19]:
from econml.dml import LinearDML
import numpy as np
import scipy.special
np.set_printoptions(suppress=True)
np.random.seed(123)
X = np.random.normal(size=(1000, 5))
T = np.random.binomial(1, scipy.special.expit(X[:, 0]))
y = 0.5 * T + np.log(X[:, 0]+5) + np.random.normal(size=(1000,))
est = LinearDML(discrete_treatment=True)
est.fit(y, T, X=None, W=X)
est.summary()

Coefficient Results:  X is None, please call intercept_inference to learn the constant!


0,1,2,3,4,5,6
,point_estimate,stderr,zstat,pvalue,ci_lower,ci_upper
cate_intercept,0.511,0.067,7.585,0.0,0.379,0.643


In [18]:
import statsmodels.api as sm
mod = sm.OLS(y, sm.add_constant(np.c_[T, X], prepend=False))
res = mod.fit()
print(res.summary())
print(res.params[0])
print(res.bse[0])

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.130
Model:                            OLS   Adj. R-squared:                  0.124
Method:                 Least Squares   F-statistic:                     24.65
Date:                Tue, 06 Dec 2022   Prob (F-statistic):           2.47e-27
Time:                        13:20:56   Log-Likelihood:                -1411.5
No. Observations:                1000   AIC:                             2837.
Df Residuals:                     993   BIC:                             2871.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.5114      0.071      7.221      0.0

In [55]:
from econml.dml import LinearDML
import numpy as np
import scipy.special
np.set_printoptions(suppress=True)
np.random.seed(123)
X = np.random.normal(size=(1000, 5))
β1 = np.random.normal(size=(1, 5))
β2 = np.random.normal(size=(1, 5))
T = np.random.binomial(1, scipy.special.expit(np.dot(β1, X.T)))
y = 0.5 * T + np.log(np.dot(β2,X.T)+10) + np.random.normal(size=(1000,))
est = LinearDML(discrete_treatment=True)
print(y.shape, T.shape, X.shape)
est.fit(y.T, T.T, X=None, W=X)
est.summary()

(1, 1000) (1, 1000) (1000, 5)
Coefficient Results:  X is None, please call intercept_inference to learn the constant!


0,1,2,3,4,5,6
,point_estimate,stderr,zstat,pvalue,ci_lower,ci_upper
cate_intercept,0.305,0.077,3.938,0.0,0.153,0.457


In [56]:
help(LinearDML)

Help on class LinearDML in module econml.dml.dml:

class LinearDML(econml._cate_estimator.StatsModelsCateEstimatorMixin, DML)
 |  LinearDML(*, model_y='auto', model_t='auto', featurizer=None, treatment_featurizer=None, fit_cate_intercept=True, linear_first_stages=True, discrete_treatment=False, categories='auto', cv=2, mc_iters=None, mc_agg='mean', random_state=None)
 |  
 |  The Double ML Estimator with a low-dimensional linear final stage implemented as a statsmodel regression.
 |  
 |  Parameters
 |  ----------
 |  model_y: estimator or 'auto', default 'auto'
 |      The estimator for fitting the response to the features. Must implement
 |      `fit` and `predict` methods.
 |      If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 |  
 |  model_t: estimator or 'auto', default 'auto'
 |      The estimator for fitting the treatment to the features.
 |      If estimator, it must implement `fit` and `predict` methods;
 |      If 'auto', :class:`~skle

In [60]:
from econml.dml import LinearDML
import numpy as np
import scipy.special
np.set_printoptions(suppress=True)
np.random.seed(123)
X = np.random.normal(size=(1000, 5))
β1 = np.random.normal(size=(1, 5))
β2 = np.random.normal(size=(1, 5))
T = np.random.binomial(1, scipy.special.expit(np.dot(β1, X.T)))
y = 0.5 * T + np.log(np.dot(β2,X.T)+10) + np.random.normal(size=(1000,))
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier

est = LinearDML(discrete_treatment=True, model_y = RandomForestRegressor(500), model_t = RandomForestClassifier(500))
print(y.shape, T.shape, X.shape)
est.fit(y.T.ravel(), T.T.ravel(), X=None, W=X)
est.summary()

(1, 1000) (1, 1000) (1000, 5)
Coefficient Results:  X is None, please call intercept_inference to learn the constant!


0,1,2,3,4,5,6
,point_estimate,stderr,zstat,pvalue,ci_lower,ci_upper
cate_intercept,0.299,0.088,3.407,0.001,0.127,0.471


In [61]:
import statsmodels.api as sm
mod = sm.OLS(y.T, sm.add_constant(np.c_[T.T, X], prepend=False))
res = mod.fit()
print(res.summary())
print(res.params[0])
print(res.bse[0])

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.061
Model:                            OLS   Adj. R-squared:                  0.055
Method:                 Least Squares   F-statistic:                     10.73
Date:                Tue, 06 Dec 2022   Prob (F-statistic):           1.40e-11
Time:                        13:31:56   Log-Likelihood:                -1410.1
No. Observations:                1000   AIC:                             2834.
Df Residuals:                     993   BIC:                             2869.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.3145      0.085      3.711      0.0

In [None]:
df = np.c_

In [52]:
import numpy as np
from doubleml.datasets import make_plr_CCDDHNR2018
from doubleml import DoubleMLData

np.random.seed(1234)
dml_data_bonus = DoubleMLData(df, y_col=y.T,
                                  d_cols=T.T,
                                  x_cols=X)
print(dml_data_bonus)
from sklearn.base import clone
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LassoCV
learner = RandomForestRegressor(n_estimators = 500, max_features = 'sqrt', max_depth= 8)
ml_l_bonus = clone(learner)
ml_m_bonus = clone(learner)
learner = LassoCV()
ml_l_sim = clone(learner)
ml_m_sim = clone(learner)
def non_orth_score(y, d, l_hat, m_hat, g_hat, smpls):
    u_hat = y - g_hat
    psi_a = -np.multiply(d, d)
    psi_b = np.multiply(d, u_hat)
    return psi_a, psi_b

from doubleml import DoubleMLPLR
np.random.seed(3141)
obj_dml_plr_bonus = DoubleMLPLR(dml_data_bonus, ml_l_bonus, ml_m_bonus)
obj_dml_plr_bonus.fit();
print(obj_dml_plr_bonus)

NameError: name 'df' is not defined