Vasilis/randomstate (#325)

* fixed random state in dml and ortho learner * fixed random state to be stateless and enable refitting the same instance with no change. Fixed some bugs in orthoiv related to passing W and to passing sample_weights to score in IntentToTreatDRIV. Fixed some bugs related to random_state in orthoiv. * fixed intenttotreatdriv scoring. fixed use of sample_weight at fit time in driv Co-authored-by: Vasilis <vasy@microsoft.com>
py-why · Nov 20, 2020 · e1abf19 · e1abf19
1 parent 697c595
commit e1abf19
Show file tree

Hide file tree

Showing 6 changed files with 253 additions and 64 deletions.
diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py
@@ -449,6 +449,7 @@ def __init__(self, model_nuisance, model_final, *,
         self._n_splits = n_splits
         self._discrete_treatment = discrete_treatment
         self._discrete_instrument = discrete_instrument
+        self._init_random_state = random_state
         self._random_state = check_random_state(random_state)
         if discrete_treatment:
             if categories != 'auto':
@@ -535,6 +536,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, *, sample_weight=None, sample_var=No
         -------
         self : _OrthoLearner instance
         """
+        self._random_state = check_random_state(self._init_random_state)
         Y, T, X, W, Z, sample_weight, sample_var, groups = check_input_arrays(
             Y, T, X, W, Z, sample_weight, sample_var, groups)
         self._check_input_dims(Y, T, X, W, Z, sample_weight, sample_var, groups)
@@ -651,7 +653,7 @@ def effect_inference(self, X=None, *, T0=0, T1=1):
         return super().effect_inference(X, T0=T0, T1=T1)
     effect_inference.__doc__ = LinearCateEstimator.effect_inference.__doc__
 
-    def score(self, Y, T, X=None, W=None, Z=None):
+    def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None):
         """
         Score the fitted CATE model on a new data set. Generates nuisance parameters
         for the new data set based on the fitted nuisance models created at fit time.
@@ -673,6 +675,8 @@ def score(self, Y, T, X=None, W=None, Z=None):
             Controls for each sample
         Z: optional (n, d_z) matrix or None (Default=None)
             Instruments for each sample
+        sample_weight: optional(n,) vector or None (Default=None)
+            Weights for each samples
 
         Returns
         -------
@@ -703,7 +707,8 @@ def score(self, Y, T, X=None, W=None, Z=None):
         for it in range(len(nuisances)):
             nuisances[it] = np.mean(nuisances[it], axis=0)
 
-        return self._model_final.score(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z, nuisances=nuisances))
+        return self._model_final.score(Y, T, nuisances=nuisances,
+                                       **filter_none_kwargs(X=X, W=W, Z=Z, sample_weight=sample_weight))
 
     @property
     def model_final(self):

diff --git a/econml/dml.py b/econml/dml.py
@@ -362,9 +362,10 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn
 
     Parameters
     ----------
-    model_y: estimator
+    model_y: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the response to the features. Must implement
-        `fit` and `predict` methods.  Must be a linear model for correctness when linear_first_stages is ``True``.
+        `fit` and `predict` methods.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_t: estimator or 'auto' (default is 'auto')
         The estimator for fitting the treatment to the features.
@@ -435,11 +436,14 @@ def __init__(self,
 
         # TODO: consider whether we need more care around stateful featurizers,
         #       since we clone it and fit separate copies
+        if model_y == 'auto':
+            model_y = WeightedLassoCVWrapper(random_state=random_state)
         if model_t == 'auto':
             if discrete_treatment:
-                model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
+                model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold(random_state=random_state),
+                                               random_state=random_state)
             else:
-                model_t = WeightedLassoCVWrapper()
+                model_t = WeightedLassoCVWrapper(random_state=random_state)
         self.bias_part_of_coef = fit_cate_intercept
         self.fit_cate_intercept = fit_cate_intercept
         super().__init__(model_y=_FirstStageWrapper(model_y, True,
@@ -490,9 +494,10 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML):
 
     Parameters
     ----------
-    model_y: estimator, optional (default is :class:`.WeightedLassoCVWrapper`)
+    model_y: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_t: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the treatment to the features.
@@ -545,7 +550,7 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML):
     """
 
     def __init__(self,
-                 model_y=WeightedLassoCVWrapper(), model_t='auto',
+                 model_y='auto', model_t='auto',
                  featurizer=None,
                  fit_cate_intercept=True,
                  linear_first_stages=True,
@@ -615,10 +620,10 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
 
     Parameters
     ----------
-    model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
-        <econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
+    model_y: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_t: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the treatment to the features.
@@ -686,7 +691,7 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
     """
 
     def __init__(self,
-                 model_y=WeightedLassoCVWrapper(), model_t='auto',
+                 model_y='auto', model_t='auto',
                  alpha='auto',
                  max_iter=1000,
                  tol=1e-4,
@@ -701,7 +706,8 @@ def __init__(self,
             alpha=alpha,
             fit_intercept=False,
             max_iter=max_iter,
-            tol=tol)
+            tol=tol,
+            random_state=random_state)
         super().__init__(model_y=model_y,
                          model_t=model_t,
                          model_final=model_final,
@@ -765,11 +771,12 @@ class _RandomFeatures(TransformerMixin):
     def __init__(self, dim, bw, random_state):
         self._dim = dim
         self._bw = bw
-        self._random_state = check_random_state(random_state)
+        self._random_state = random_state
 
     def fit(self, X):
-        self.omegas = self._random_state.normal(0, 1 / self._bw, size=(shape(X)[1], self._dim))
-        self.biases = self._random_state.uniform(0, 2 * np.pi, size=(1, self._dim))
+        random_state = check_random_state(self._random_state)
+        self.omegas = random_state.normal(0, 1 / self._bw, size=(shape(X)[1], self._dim))
+        self.biases = random_state.uniform(0, 2 * np.pi, size=(1, self._dim))
         return self
 
     def transform(self, X):
@@ -782,9 +789,10 @@ class KernelDML(DML):
 
     Parameters
     ----------
-    model_y: estimator, optional (default is :class:`<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
+    model_y: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_t: estimator or 'auto', optional (default is 'auto')
         The estimator for fitting the treatment to the features.
@@ -834,10 +842,10 @@ class KernelDML(DML):
         by :mod:`np.random<numpy.random>`.
     """
 
-    def __init__(self, model_y=WeightedLassoCVWrapper(), model_t='auto', fit_cate_intercept=True,
+    def __init__(self, model_y='auto', model_t='auto', fit_cate_intercept=True,
                  dim=20, bw=1.0, discrete_treatment=False, categories='auto', n_splits=2, random_state=None):
         super().__init__(model_y=model_y, model_t=model_t,
-                         model_final=ElasticNetCV(fit_intercept=False),
+                         model_final=ElasticNetCV(fit_intercept=False, random_state=random_state),
                          featurizer=_RandomFeatures(dim, bw, random_state),
                          fit_cate_intercept=fit_cate_intercept,
                          discrete_treatment=discrete_treatment,

diff --git a/econml/drlearner.py b/econml/drlearner.py
@@ -200,16 +200,18 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
 
     Parameters
     ----------
-    model_propensity : scikit-learn classifier
+    model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
         Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
         Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
         where T is a shape (n, ) array.
+        If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
 
-    model_regression : scikit-learn regressor
+    model_regression : scikit-learn regressor or 'auto', optional (default='auto')
         Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
         concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
         `predict` methods. If different models per treatment arm are desired, see the
         :class:`.MultiModelWrapper` helper class.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     model_final :
         estimator for the final cate model. Trained on regressing the doubly robust potential outcomes
@@ -358,15 +360,20 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
 
     """
 
-    def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCVWrapper(cv=3),
+    def __init__(self, model_propensity='auto',
+                 model_regression='auto',
                  model_final=StatsModelsLinearRegression(),
                  multitask_model_final=False,
                  featurizer=None,
                  min_propensity=1e-6,
                  categories='auto',
                  n_splits=2,
                  random_state=None):
+        if model_propensity == 'auto':
+            model_propensity = LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto',
+                                                    random_state=random_state)
+        if model_regression == 'auto':
+            model_regression = WeightedLassoCVWrapper(cv=3, random_state=random_state)
         self._multitask_model_final = multitask_model_final
         super().__init__(_ModelNuisance(model_propensity, model_regression, min_propensity),
                          _ModelFinal(model_final, featurizer, multitask_model_final),
@@ -585,16 +592,18 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
 
     Parameters
     ----------
-    model_propensity : scikit-learn classifier
+    model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
         Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
         Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
         where T is a shape (n, ) array.
+        If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
 
-    model_regression : scikit-learn regressor
+    model_regression : scikit-learn regressor or 'auto', optional (default='auto')
         Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
         concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
         `predict` methods. If different models per treatment arm are desired, see the
         :class:`.MultiModelWrapper` helper class.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     featurizer : :term:`transformer`, optional, default None
         Must support fit_transform and transform. Used to create composite features in the final CATE regression.
@@ -678,8 +687,8 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
     """
 
     def __init__(self,
-                 model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCVWrapper(cv=3),
+                 model_propensity='auto',
+                 model_regression='auto',
                  featurizer=None,
                  fit_cate_intercept=True,
                  min_propensity=1e-6,
@@ -781,16 +790,18 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
 
     Parameters
     ----------
-    model_propensity : scikit-learn classifier
+    model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
         Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
         Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
         where T is a shape (n, ) array.
+        If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
 
-    model_regression : scikit-learn regressor
+    model_regression : scikit-learn regressor or 'auto', optional (default='auto')
         Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
         concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
         `predict` methods. If different models per treatment arm are desired, see the
         :class:`.MultiModelWrapper` helper class.
+        If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
 
     featurizer : :term:`transformer`, optional, default None
         Must support fit_transform and transform. Used to create composite features in the final CATE regression.
@@ -887,8 +898,8 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
     """
 
     def __init__(self,
-                 model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCVWrapper(cv=3),
+                 model_propensity='auto',
+                 model_regression='auto',
                  featurizer=None,
                  fit_cate_intercept=True,
                  alpha='auto',