scikit-learn · jnothman · Dec 13, 2017 · Jun 5, 2017 · Jun 5, 2017 · Jun 5, 2017
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -1200,6 +1200,7 @@ See the :ref:`metrics` section of the user guide for further details.
    preprocessing.PolynomialFeatures
    preprocessing.RobustScaler
    preprocessing.StandardScaler
+   preprocessing.TransformTargetRegressor
 
 .. autosummary::
    :toctree: generated/

diff --git a/doc/modules/preprocessing_targets.rst b/doc/modules/preprocessing_targets.rst
@@ -1,4 +1,3 @@
-
 .. currentmodule:: sklearn.preprocessing
 
 .. _preprocessing_targets:
@@ -7,6 +6,72 @@
 Transforming the prediction target (``y``)
 ==========================================
 
+Transforming target in regression
+---------------------------------
+
+:class:`TransformTargetRegressor` transforms the target before fitting a
+regression model and inverting back the prediction to the original space. It
+takes as an argument the regressor that will be used for prediction, and the
+transformer that will be applied to the target variable::
+
+  >>> import numpy as np
+  >>> from sklearn.datasets import load_boston
+  >>> from sklearn import preprocessing
+  >>> from sklearn.linear_model import LinearRegression
+  >>> from sklearn.model_selection import train_test_split
+  >>> boston = load_boston()
+  >>> X = boston.data
+  >>> y = boston.target
+  >>> transformer = preprocessing.StandardScaler()
+  >>> regressor = LinearRegression()
+  >>> regr = preprocessing.TransformTargetRegressor(regressor=regressor,
+  ...                                               transformer=transformer)
+  >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+  >>> regr.fit(X_train, y_train) # doctest: +ELLIPSIS
+  TransformTargetRegressor(...)
+  >>> print('R2 score:', regr.score(X_test, y_test)) # doctest : +ELLIPSIS
+  R2 score: 0.63...
+
+The transformer can also be replaced by a function and an inverse function. We
+can define the following two functions::
+
+  >>> from __future__ import division
+  >>> def func(x):
+  ...     return np.log(x)
+  >>> def inverse_func(x):
+  ...     return np.exp(x)
+
+Subsequently, the object is created as::
+
+  >>> regr = preprocessing.TransformTargetRegressor(regressor=regressor,
+  ...                                               func=func,
+  ...                                               inverse_func=inverse_func)
+  >>> regr.fit(X_train, y_train) # doctest: +ELLIPSIS
+  TransformTargetRegressor(...)
+  >>> print('R2 score:', regr.score(X_test, y_test)) # doctest: +ELLIPSIS
+  R2 score: 0.64...
+
+By default, the provided function are checked at each fit to be the inverse of
+each other. However, it is possible to bypass this checking by setting
+``check_inverse`` to ``False``::
+
+  >>> def inverse_func(x):
+  ...     return x
+  >>> regr = preprocessing.TransformTargetRegressor(regressor=regressor,
+  ...                                               func=func,
+  ...                                               inverse_func=inverse_func,
+  ...                                               check_inverse=False)
+  >>> regr.fit(X_train, y_train) # doctest: +ELLIPSIS
+  TransformTargetRegressor(...)
+  >>> print('R2 score:', regr.score(X_test, y_test)) # doctest: +ELLIPSIS
+  R2 score: -4.50...
+
+.. note::
+
+   The transformation can be triggered by setting either ``transformer`` or the
+   functions ``func`` and ``inverse_func``. However, setting both options
+   will raise an error.
+
 Label binarization
 ------------------
 

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -31,6 +31,15 @@ Changelog
 New features
 ............
 
+   - Added the :class:`sklearn.preprocessing.TransformTargetRegressor` wraps
+     a regressor and applies a transformation to the target before fitting,
+     finally transforming the regressor's predictions back to the original
+     space. :issue:`9041` by `Andreas Müller`_ and
+   - Added the :class:`sklearn.preprocessing.TransformedTargetRegressor` which
+     is a meta-estimator to regress on a modified ``y`` for example, to perform
+     regression in log-space. :issue:`9041` by `Andreas Müller`_ and
+     :user:`Guillaume Lemaitre <glemaitre>`.
+
    - Validation that input data contains no NaN or inf can now be suppressed
      using :func:`config_context`, at your own risk. This will save on runtime,
      and may be particularly useful for prediction time. :issue:`7548` by

diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py
@@ -28,6 +28,8 @@
 from .label import LabelEncoder
 from .label import MultiLabelBinarizer
 
+from .target import TransformTargetRegressor
+
 from .imputation import Imputer
 
 
@@ -45,6 +47,7 @@
     'OneHotEncoder',
     'RobustScaler',
     'StandardScaler',
+    'TransformTargetRegressor',
     'add_dummy_feature',
     'PolynomialFeatures',
     'binarize',

diff --git a/sklearn/preprocessing/target.py b/sklearn/preprocessing/target.py
@@ -0,0 +1,203 @@
+# Authors: Andreas Mueller < andreas.mueller@columbia.edu>
+#          Guillaume Lemaitre <guillaume.lemaitre@inria.fr>
+# License: BSD 3 clause
+
+import numpy as np
+
+from ..base import BaseEstimator, RegressorMixin, clone
+from ..linear_model import LinearRegression
+from ..utils.fixes import signature
+from ..utils.validation import check_is_fitted, check_array
+from ._function_transformer import FunctionTransformer
+
+__all__ = ['TransformTargetRegressor']
+
+
+class TransformTargetRegressor(BaseEstimator, RegressorMixin):
+    """Meta-estimator to regress on a transformed target.
+
+    Useful for applying a non-linear transformation in regression
+    problems. This transformation can be given as a Transformer such as the
+    QuantileTransformer or as a function and its inverse such as ``np.log`` and
+    ``np.exp``.
+
+    The computation during ``fit`` is::
+
+        regressor.fit(X, func(y))
+
+    or::
+
+        regressor.fit(X, transformer.transform(y))
+
+    The computation during ``predict`` is::
+
+        inverse_func(regressor.predict(X))
+
+    or::
+
+        transformer.inverse_transform(regressor.predict(X))
+
+    Parameters
+    ----------
+    regressor : object, (default=LinearRegression())
+        Regressor object such as derived from ``RegressorMixin``. This
+        regressor will be cloned during fitting.
+
+    transformer : object, (default=None)
+        Estimator object such as derived from ``TransformerMixin``. Cannot be
+        set at the same time as ``func`` and ``inverse_func``. If ``None`` and
+        ``func`` and ``inverse_func`` are ``None`` as well, the transformer
+        will be an identity transformer. The transformer will be cloned during
+        fitting.
+
+    func : function, optional
+        Function to apply to ``y`` before passing to ``fit``. Cannot be set at
+        the same time than ``transformer``. If ``None`` and ``transformer`` is
+        ``None`` as well, the function used will be the identity function.
+
+    inverse_func : function, optional
+        Function to apply to the prediction of the regressor. Cannot be set at
+        the same time than ``transformer``. If ``None`` and ``transformer`` as
+        well, the function used will be the identity function. The inverse
+        function is used to return to the same space of the original training
+        labels during prediction.
+
+    check_inverse : bool, (default=True)
+        Whether to check that ``transform`` followed by ``inverse_transform``
+        or ``func`` followed by ``inverse_func`` leads to the original data.
+
+    Attributes
+    ----------
+    regressor_ : object
+        Fitted regressor.
+
+    transformer_ : object
+        Used transformer in ``fit`` and ``predict``.
+
+    y_ndim_ : int
+        Number of targets.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.linear_model import LinearRegression
+    >>> from sklearn.preprocessing import TransformTargetRegressor
+    >>> tt = TransformTargetRegressor(regressor=LinearRegression(),
+    ...                                 func=np.log, inverse_func=np.exp)
+    >>> X = np.arange(4).reshape(-1, 1)
+    >>> y = np.exp(2 * X).ravel()
+    >>> tt.fit(X, y)
+    ... #doctest: +NORMALIZE_WHITESPACE
+    TransformTargetRegressor(check_inverse=True,
+                               func=<ufunc 'log'>,
+                               inverse_func=<ufunc 'exp'>,
+                               regressor=LinearRegression(copy_X=True,
+                                                          fit_intercept=True,
+                                                          n_jobs=1,
+                                                          normalize=False),
+                               transformer=None)
+    >>> tt.score(X, y)
+    1.0
+    >>> tt.regressor_.coef_
+    array([ 2.])
+
+    """
+    def __init__(self, regressor=None, transformer=None,
+                 func=None, inverse_func=None, check_inverse=True):
+        self.regressor = regressor
+        self.transformer = transformer
+        self.func = func
+        self.inverse_func = inverse_func
+        self.check_inverse = check_inverse
+
+    def _fit_transformer(self, y, sample_weight):
+        if (self.transformer is not None and
+                (self.func is not None or self.inverse_func is not None)):
+            raise ValueError("Both 'transformer' and functions 'func'/"
+                             "'inverse_func' cannot be set at the same time.")
+        elif self.transformer is not None:
+            self.transformer_ = clone(self.transformer)
+        else:
+            self.transformer_ = FunctionTransformer(
+                func=self.func, inverse_func=self.inverse_func, validate=False)
+        fit_parameters = signature(self.transformer_.fit).parameters
+        if "sample_weight" in fit_parameters:
+            self.transformer_.fit(y, sample_weight=sample_weight)
+        else:
+            self.transformer_.fit(y)
+        if self.check_inverse:
+            n_subsample = min(10, y.shape[0])
+            subsample_idx = np.random.choice(range(y.shape[0]),
+                                             size=n_subsample, replace=False)
+            if not np.allclose(
+                    y[subsample_idx],
+                    self.transformer_.inverse_transform(
+                        self.transformer_.transform(y[subsample_idx])),
+                    atol=1e-4):
+                raise ValueError("The provided functions or transformer are"
+                                 " not strictly inverse of each other. If"
+                                 " you are sure you want to proceed regardless"
+                                 ", set 'check_inverse=False'")
+
+    def fit(self, X, y, sample_weight=None):
+        """Fit the model according to the given training data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Training vector, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        sample_weight : array-like, shape (n_samples,) optional
+            Array of weights that are assigned to individual samples.
+            If not provided, then each sample is given unit weight.
+
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
+        y = check_array(y, ensure_2d=False)
+        self.y_ndim_ = y.ndim
+        if y.ndim == 1 and self.func is None:
+            y_2d = y.reshape(-1, 1)
+        else:
+            y_2d = y
+        self._fit_transformer(y_2d, sample_weight)
+        if self.regressor is None:
+            self.regressor_ = LinearRegression()
+        else:
+            self.regressor_ = clone(self.regressor)
+        if sample_weight is not None:
+            self.regressor_.fit(X, self.transformer_.fit_transform(y_2d),
+                                sample_weight=sample_weight)
+        else:
+            self.regressor_.fit(X, self.transformer_.fit_transform(y_2d))
+        return self
+
+    def predict(self, X):
+        """Predict using the base regressor, applying inverse.
+
+        The regressor is used to predict and the ``inverse_func`` or
+        ``inverse_transform`` is applied before returning the prediction.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
+            Samples.
+
+        Returns
+        -------
+        y_hat : array, shape = (n_samples,)
+            Predicted values.
+
+        """
+        check_is_fitted(self, "regressor_")
+        pred = self.transformer_.inverse_transform(self.regressor_.predict(X))
+        if self.y_ndim_ == 1 and self.func is None:
+            return pred.ravel()
+        else:
+            return pred