In [2]:
%load_ext autoreload
%autoreload 2
from econml._ortho_learner import _crossfit

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Lasso
class Wrapper:
    def __init__(self, model):
        self._model = model
    def fit(self, X, y, W=None):
        self._model.fit(X, y)
        return self
    def predict(self, X, y, W=None):
        return self._model.predict(X)
np.random.seed(123)
X = np.random.normal(size=(5000, 3))
y = X[:, 0] + np.random.normal(size=(5000,))
folds = list(KFold(2).split(X, y))
model = Lasso(alpha=0.01)
nuisance, model_list, fitted_inds = _crossfit(Wrapper(model),
                                 folds,
                                 X, y, W=y, Z=None)
print(nuisance)
print(model_list)
fitted_inds

(array([-1.1057289 , -1.53756637, -2.4518278 , ...,  1.10628792,
       -1.82966233, -1.78227335]),)
[<__main__.Wrapper object at 0x1297feef0>, <__main__.Wrapper object at 0x112869c50>]


array([   0,    1,    2, ..., 4997, 4998, 4999])

In [71]:
import numpy as np
from sklearn.linear_model import LinearRegression
from econml._ortho_learner import _OrthoLearner
class ModelNuisance:
    def __init__(self, model_t, model_y):
        self._model_t = model_t
        self._model_y = model_y
    def fit(self, Y, T, W=None):
        self._model_t.fit(W, T)
        self._model_y.fit(W, Y)
        return self
    def predict(self, Y, T, W=None):
        return Y - self._model_y.predict(W), T - self._model_t.predict(W)
class ModelFinal:
    def __init__(self):
        return
    def fit(self, Y, T, W=None, nuisances=None):
        Y_res, T_res = nuisances
        self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)
        return self
    def predict(self, X=None):
        return self.model.coef_[0]
    def score(self, Y, T, W=None, nuisances=None):
        Y_res, T_res = nuisances
        return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)
np.random.seed(123)
X = np.random.normal(size=(100, 3))
y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.1, size=(100,))
est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()),
                    ModelFinal(),
                    n_splits=2, discrete_treatment=False, random_state=None)
est.fit(y, X[:, 0], W=X[:, 1:])

<econml._ortho_learner._OrthoLearner at 0x12d8b6be0>

In [72]:
est.const_marginal_effect()

1.0236499258047582

In [73]:
est.effect(T0=0, T1=1)

array([1.02364993])

In [74]:
est.score(y, X[:, 0], W=X[:, 1:])

0.00727995424098179

In [75]:
est.model_final.model

LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,
         normalize=False)

In [76]:
est.model_final.model.coef_

array([1.02364993])

In [77]:
est.score_

0.007568302109999707

In [11]:
class ModelNuisance:
    def __init__(self, model_t, model_y):
        self._model_t = model_t
        self._model_y = model_y

    def fit(self, Y, T, W=None):
        self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1]+1)))
        self._model_y.fit(W, Y)
        return self

    def predict(self, Y, T, W=None):
        return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:]

class ModelFinal:

    def __init__(self):
        return

    def fit(self, Y, T, W=None, nuisances=None):
        Y_res, T_res = nuisances
        self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)
        return self

    def predict(self):
        # theta needs to be of dimension (1, d_t) if T is (n, d_t)
        return np.array([[self.model.coef_[0]]])

    def score(self, Y, T, W=None, nuisances=None):
        Y_res, T_res = nuisances
        return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)

np.random.seed(123)
X = np.random.normal(size=(100, 3))
import scipy.special
from sklearn.linear_model import LogisticRegression
T = np.random.binomial(1, scipy.special.expit(X[:, 0]))
sigma = 0.01
y = T + X[:, 0] + np.random.normal(0, sigma, size=(100,))
est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), ModelFinal(),
                    n_splits=2, discrete_treatment=True, random_state=None)
est.fit(y, T, W=X)

<econml._ortho_learner._OrthoLearner at 0x12981d048>

In [12]:
est.const_marginal_effect()

array([[1.00123159]])

In [13]:
est.effect()

array([1.00123159])

In [14]:
est.score(y, T, W=X)

0.002569588332146612

In [15]:
est.model_final.model.coef_[0]

1.0012315874866917

In [16]:
est.model_final.model

LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,
         normalize=False)

In [17]:
est.model_final.model.coef_

array([1.00123159])

In [18]:
est.score_

0.0031604059708364245

In [30]:
est.models_nuisance[0]._model_y.coef_

array([1.28171346, 0.03749846, 0.10120681])

In [31]:
from econml.dml import LinearDMLCateEstimator
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, LassoCV, Lasso
import numpy as np
X = np.random.normal(size=(100000, 3))
y = X[:, 0] + np.random.normal(size=(100000,))
est = LinearDMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())
est.fit(y, X[:, 0], X[:, [1]], X[:, 2:], inference='statsmodels')
print(X[:1])
print(est.effect(X[:1, [1]]))
print(est.effect_interval(X[:1, [1]]))
print(est.coef_)
print(est.coef__interval())
print(est.const_marginal_effect(X[:1, [1]]))

[[1.64721067 1.68104532 0.07458696]]
[1.00979079]
(array([0.99962896]), array([1.01995263]))
[1.00096897 0.00524782]
(array([9.95750740e-01, 5.09187652e-05]), array([1.0061872 , 0.01044472]))
[1.00979079]


In [32]:
from econml.dml import SparseLinearDMLCateEstimator
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, LassoCV, Lasso, MultiTaskLassoCV
import numpy as np
X = np.random.normal(size=(5000, 10))
y = X[:, 0] + np.random.normal(size=(5000,))
est = SparseLinearDMLCateEstimator(model_y=MultiTaskLassoCV(cv=3), model_t=LassoCV(cv=3),
                                   model_final=MultiTaskLassoCV(cv=3, fit_intercept=False))
dx = 8
est.fit(np.hstack([y.reshape(-1,1), y.reshape(-1,1)]), X[:, 0], X[:, 1:dx], X[:, dx:])
print(X[:1])
print(est.effect(X[:1, 1:dx]))
print(est.model_final.coef_)
print(est.const_marginal_effect(X[:1, 1:dx]))

[[ 0.89256841  0.27329632  1.20577242 -1.19175328  0.15390398 -0.23511674
  -0.55378668 -2.64852199  0.73121095 -0.73312251]]
[[0.99664838 0.99664838]]
[[ 0.99040771  0.00504131  0.         -0.0034206  -0.01300061 -0.01033077
  -0.01303745  0.00259075]
 [ 0.99040771  0.00504131  0.         -0.0034206  -0.01300061 -0.01033077
  -0.01303745  0.00259075]]
[[0.99664838 0.99664838]]


In [101]:
import numpy as np
from sklearn.linear_model import LinearRegression
from econml._rlearner import _RLearner
from sklearn.base import clone
class ModelFirst:
    def __init__(self, model):
        self._model = clone(model, safe=False)
    def fit(self, X, W, Y, sample_weight=None):
        self._model.fit(np.hstack([X, W]), Y)
        return self
    def predict(self, X, W):
        return self._model.predict(np.hstack([X, W]))
class ModelFinal:
    def fit(self, X, T_res, Y_res, sample_weight=None, sample_var=None):
        self.model = LinearRegression(fit_intercept=False).fit(X * T_res.reshape(-1, 1), Y_res)
        return self
    def predict(self, X):
        return self.model.predict(X)
np.random.seed(123)
X = np.random.normal(size=(1000, 3))
y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,))
est = _RLearner(ModelFirst(LinearRegression()),
                ModelFirst(LinearRegression()),
                ModelFinal(),
                n_splits=2, discrete_treatment=False, random_state=None)
est.fit(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:])

<econml._rlearner._RLearner at 0x12db09dd8>

In [102]:
est.const_marginal_effect(np.ones((1,1)))

array([0.99963147])

In [110]:
est.effect(np.ones((1,1)), T0=0, T1=10)

array([9.99631472])

In [111]:
est.score(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:])

9.736380060274913e-05

In [105]:
est.model_final.model

LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,
         normalize=False)

In [106]:
est.model_final.model.coef_

array([0.99963147])

In [107]:
est.score_

9.826232040878233e-05

In [108]:
[mdl._model for mdl in est.models_y]

[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
          normalize=False),
 LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
          normalize=False)]

In [109]:
[mdl._model for mdl in est.models_t]

[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
          normalize=False),
 LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
          normalize=False)]

In [114]:
from econml.dml import LinearDMLCateEstimator

np.random.seed(123)
X = np.random.normal(size=(1000, 3))
y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,))
est = LinearDMLCateEstimator(model_y=LinearRegression(),
                             model_t=LinearRegression())
est.fit(y, X[:, 0], W=X[:, 1:], inference='statsmodels')

<econml.dml.LinearDMLCateEstimator at 0x12db2a208>

In [115]:
est.effect()

array([1.00089549])

In [116]:
est.effect_interval()

(array([0.99404817]), array([1.0077428]))

In [117]:
est.models_y

[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
          normalize=False),
 LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
          normalize=False)]

In [118]:
est.models_t

[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
          normalize=False),
 LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
          normalize=False)]

In [119]:
est.coef_

array([1.00089549])

In [121]:
est.coef__interval()

(array([0.99404817]), array([1.0077428]))

In [127]:
est.model_final._param_stderr

array([0.00416287])