Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vasilis/randomstate #325

Merged
merged 7 commits into from
Nov 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 7 additions & 2 deletions econml/_ortho_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ def __init__(self, model_nuisance, model_final, *,
self._n_splits = n_splits
self._discrete_treatment = discrete_treatment
self._discrete_instrument = discrete_instrument
self._init_random_state = random_state
self._random_state = check_random_state(random_state)
if discrete_treatment:
if categories != 'auto':
Expand Down Expand Up @@ -535,6 +536,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, *, sample_weight=None, sample_var=No
-------
self : _OrthoLearner instance
"""
self._random_state = check_random_state(self._init_random_state)
Y, T, X, W, Z, sample_weight, sample_var, groups = check_input_arrays(
Y, T, X, W, Z, sample_weight, sample_var, groups)
self._check_input_dims(Y, T, X, W, Z, sample_weight, sample_var, groups)
Expand Down Expand Up @@ -651,7 +653,7 @@ def effect_inference(self, X=None, *, T0=0, T1=1):
return super().effect_inference(X, T0=T0, T1=T1)
effect_inference.__doc__ = LinearCateEstimator.effect_inference.__doc__

def score(self, Y, T, X=None, W=None, Z=None):
def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None):
"""
Score the fitted CATE model on a new data set. Generates nuisance parameters
for the new data set based on the fitted nuisance models created at fit time.
Expand All @@ -673,6 +675,8 @@ def score(self, Y, T, X=None, W=None, Z=None):
Controls for each sample
Z: optional (n, d_z) matrix or None (Default=None)
Instruments for each sample
sample_weight: optional(n,) vector or None (Default=None)
Weights for each samples

Returns
-------
Expand Down Expand Up @@ -703,7 +707,8 @@ def score(self, Y, T, X=None, W=None, Z=None):
for it in range(len(nuisances)):
nuisances[it] = np.mean(nuisances[it], axis=0)

return self._model_final.score(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z, nuisances=nuisances))
return self._model_final.score(Y, T, nuisances=nuisances,
**filter_none_kwargs(X=X, W=W, Z=Z, sample_weight=sample_weight))

@property
def model_final(self):
Expand Down
40 changes: 24 additions & 16 deletions econml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,10 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn

Parameters
----------
model_y: estimator
model_y: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods. Must be a linear model for correctness when linear_first_stages is ``True``.
`fit` and `predict` methods.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.

model_t: estimator or 'auto' (default is 'auto')
The estimator for fitting the treatment to the features.
Expand Down Expand Up @@ -435,11 +436,14 @@ def __init__(self,

# TODO: consider whether we need more care around stateful featurizers,
# since we clone it and fit separate copies
if model_y == 'auto':
model_y = WeightedLassoCVWrapper(random_state=random_state)
if model_t == 'auto':
if discrete_treatment:
model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold(random_state=random_state),
random_state=random_state)
else:
model_t = WeightedLassoCVWrapper()
model_t = WeightedLassoCVWrapper(random_state=random_state)
self.bias_part_of_coef = fit_cate_intercept
self.fit_cate_intercept = fit_cate_intercept
super().__init__(model_y=_FirstStageWrapper(model_y, True,
Expand Down Expand Up @@ -490,9 +494,10 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML):

Parameters
----------
model_y: estimator, optional (default is :class:`.WeightedLassoCVWrapper`)
model_y: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.

model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
Expand Down Expand Up @@ -545,7 +550,7 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML):
"""

def __init__(self,
model_y=WeightedLassoCVWrapper(), model_t='auto',
model_y='auto', model_t='auto',
featurizer=None,
fit_cate_intercept=True,
linear_first_stages=True,
Expand Down Expand Up @@ -615,10 +620,10 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):

Parameters
----------
model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
model_y: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.

model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
Expand Down Expand Up @@ -686,7 +691,7 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
"""

def __init__(self,
model_y=WeightedLassoCVWrapper(), model_t='auto',
model_y='auto', model_t='auto',
alpha='auto',
max_iter=1000,
tol=1e-4,
Expand All @@ -701,7 +706,8 @@ def __init__(self,
alpha=alpha,
fit_intercept=False,
max_iter=max_iter,
tol=tol)
tol=tol,
random_state=random_state)
super().__init__(model_y=model_y,
model_t=model_t,
model_final=model_final,
Expand Down Expand Up @@ -765,11 +771,12 @@ class _RandomFeatures(TransformerMixin):
def __init__(self, dim, bw, random_state):
self._dim = dim
self._bw = bw
self._random_state = check_random_state(random_state)
self._random_state = random_state

def fit(self, X):
self.omegas = self._random_state.normal(0, 1 / self._bw, size=(shape(X)[1], self._dim))
self.biases = self._random_state.uniform(0, 2 * np.pi, size=(1, self._dim))
random_state = check_random_state(self._random_state)
self.omegas = random_state.normal(0, 1 / self._bw, size=(shape(X)[1], self._dim))
self.biases = random_state.uniform(0, 2 * np.pi, size=(1, self._dim))
return self

def transform(self, X):
Expand All @@ -782,9 +789,10 @@ class KernelDML(DML):

Parameters
----------
model_y: estimator, optional (default is :class:`<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
model_y: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.

model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
Expand Down Expand Up @@ -834,10 +842,10 @@ class KernelDML(DML):
by :mod:`np.random<numpy.random>`.
"""

def __init__(self, model_y=WeightedLassoCVWrapper(), model_t='auto', fit_cate_intercept=True,
def __init__(self, model_y='auto', model_t='auto', fit_cate_intercept=True,
dim=20, bw=1.0, discrete_treatment=False, categories='auto', n_splits=2, random_state=None):
super().__init__(model_y=model_y, model_t=model_t,
model_final=ElasticNetCV(fit_intercept=False),
model_final=ElasticNetCV(fit_intercept=False, random_state=random_state),
featurizer=_RandomFeatures(dim, bw, random_state),
fit_cate_intercept=fit_cate_intercept,
discrete_treatment=discrete_treatment,
Expand Down
35 changes: 23 additions & 12 deletions econml/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,16 +200,18 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc

Parameters
----------
model_propensity : scikit-learn classifier
model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
where T is a shape (n, ) array.
If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.

model_regression : scikit-learn regressor
model_regression : scikit-learn regressor or 'auto', optional (default='auto')
Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
`predict` methods. If different models per treatment arm are desired, see the
:class:`.MultiModelWrapper` helper class.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.

model_final :
estimator for the final cate model. Trained on regressing the doubly robust potential outcomes
Expand Down Expand Up @@ -358,15 +360,20 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc

"""

def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCVWrapper(cv=3),
def __init__(self, model_propensity='auto',
model_regression='auto',
model_final=StatsModelsLinearRegression(),
multitask_model_final=False,
featurizer=None,
min_propensity=1e-6,
categories='auto',
n_splits=2,
random_state=None):
if model_propensity == 'auto':
model_propensity = LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto',
random_state=random_state)
if model_regression == 'auto':
model_regression = WeightedLassoCVWrapper(cv=3, random_state=random_state)
self._multitask_model_final = multitask_model_final
super().__init__(_ModelNuisance(model_propensity, model_regression, min_propensity),
_ModelFinal(model_final, featurizer, multitask_model_final),
Expand Down Expand Up @@ -585,16 +592,18 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):

Parameters
----------
model_propensity : scikit-learn classifier
model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
where T is a shape (n, ) array.
If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.

model_regression : scikit-learn regressor
model_regression : scikit-learn regressor or 'auto', optional (default='auto')
Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
`predict` methods. If different models per treatment arm are desired, see the
:class:`.MultiModelWrapper` helper class.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.

featurizer : :term:`transformer`, optional, default None
Must support fit_transform and transform. Used to create composite features in the final CATE regression.
Expand Down Expand Up @@ -678,8 +687,8 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
"""

def __init__(self,
model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCVWrapper(cv=3),
model_propensity='auto',
model_regression='auto',
featurizer=None,
fit_cate_intercept=True,
min_propensity=1e-6,
Expand Down Expand Up @@ -781,16 +790,18 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):

Parameters
----------
model_propensity : scikit-learn classifier
model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
where T is a shape (n, ) array.
If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.

model_regression : scikit-learn regressor
model_regression : scikit-learn regressor or 'auto', optional (default='auto')
Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
`predict` methods. If different models per treatment arm are desired, see the
:class:`.MultiModelWrapper` helper class.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.

featurizer : :term:`transformer`, optional, default None
Must support fit_transform and transform. Used to create composite features in the final CATE regression.
Expand Down Expand Up @@ -887,8 +898,8 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
"""

def __init__(self,
model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCVWrapper(cv=3),
model_propensity='auto',
model_regression='auto',
featurizer=None,
fit_cate_intercept=True,
alpha='auto',
Expand Down