Merge pull request #299 from rasbt/store_meta

Attribute and method store/compute meta features of StackingRegressor
rasbt · Dec 4, 2017 · a1e6cfb · a1e6cfb
2 parents fa5d1f7 + c525f36
commit a1e6cfb
Show file tree

Hide file tree

Showing 7 changed files with 191 additions and 32 deletions.
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -7,6 +7,33 @@ The CHANGELOG for the current development version is available at
 
 ---
 
+
+
+
+
+### Version 0.9.2dev
+
+##### Downloads
+
+- [Source code (zip)](https://github.com/rasbt/mlxtend/archive/v0.9.2.zip)
+- [Source code (tar.gz)](https://github.com/rasbt/mlxtend/archive/v0.9.2.tar.gz)
+
+##### New Features
+
+- New `store_train_meta_features` parameter for `fit` in StackingCVRegressor. if True, train meta-features are stored in `self.train_meta_features_`.
+    New `pred_meta_features` method for `StackingCVRegressor`. People can get test meta-features using this method. ([#294](https://github.com/rasbt/mlxtend/pull/294))
+    via [takashioya](https://github.com/takashioya))
+- The new `store_train_meta_features` attribute and `pred_meta_features` method for the `StackingCVRegressor` were also added to the `StackingRegressor` ([#299](https://github.com/rasbt/mlxtend/pull/299)) 
+
+##### Changes
+
+- -
+
+
+##### Bug Fixes
+
+- -
+
 ### Version 0.9.1 (2017-11-19)
 
 ##### Downloads
@@ -18,9 +45,6 @@ The CHANGELOG for the current development version is available at
 
 - Added `mlxtend.evaluate.bootstrap_point632_score` to evaluate the performance of estimators using the .632 bootstrap. ([#283](https://github.com/rasbt/mlxtend/pull/283))
 - New `max_len` parameter for the frequent itemset generation via the `apriori` function to allow for early stopping. ([#270](https://github.com/rasbt/mlxtend/pull/270))
-- New `store_train_meta_features` parameter for `fit` in StackingCVRegressor. if True, train meta-features are stored in `self.train_meta_features_`.
-    New `pred_meta_features` method for StackingCVRegressor. People can get test meta-features using this method. ([#294](https://github.com/rasbt/mlxtend/pull/294))
-    via [takashioya](https://github.com/takashioya))
 
 ##### Changes
 

diff --git a/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb b/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb
@@ -301,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -337,7 +337,7 @@
       "- `meta_regressor` : object\n",
       "\n",
       "    The meta-regressor to be fitted on the ensemble of\n",
-      "    regressors\n",
+      "    regressor\n",
       "\n",
       "- `cv` : int, cross-validation generator or iterable, optional (default: 5)\n",
       "\n",
@@ -366,18 +366,19 @@
       "\n",
       "- `store_train_meta_features` : bool (default: False)\n",
       "\n",
-      "    If True, the meta-features computed from the training data used\n",
-      "    for fitting the meta-regressor stored in the\n",
-      "    `self.train_meta_features_` array, which can be\n",
+      "    If True, the meta-features computed from the training data\n",
+      "    used for fitting the\n",
+      "    meta-regressor stored in the `self.train_meta_features_` array,\n",
+      "    which can be\n",
       "    accessed after calling `fit`.\n",
       "\n",
       "**Attributes**\n",
       "\n",
-      "- `train_meta_features` : numpy array, shape=[n_samples, len(self.regressors)]\n",
+      "- `train_meta_features` : numpy array, shape = [n_samples, len(self.regressors)]\n",
       "\n",
-      "    meta-features for training data, where n_samples is the number of\n",
-      "    samples in training data and len(self.regressors) is\n",
-      "    the number of regressors.\n",
+      "    meta-features for training data, where n_samples is the\n",
+      "    number of samples\n",
+      "    in training data and len(self.regressors) is the number of regressors.\n",
       "\n",
       "### Methods\n",
       "\n",
@@ -459,7 +460,20 @@
       "\n",
       "*predict(X)*\n",
       "\n",
-      "None\n",
+      "Predict target values for X.\n",
+      "\n",
+      "**Parameters**\n",
+      "\n",
+      "- `X` : {array-like, sparse matrix}, shape = [n_samples, n_features]\n",
+      "\n",
+      "    Training vectors, where n_samples is the number of samples and\n",
+      "    n_features is the number of features.\n",
+      "\n",
+      "**Returns**\n",
+      "\n",
+      "- `y_target` : array-like, shape = [n_samples] or [n_samples, n_targets]\n",
+      "\n",
+      "    Predicted target values.\n",
       "\n",
       "<hr>\n",
       "\n",
@@ -479,8 +493,8 @@
       "- `meta-features` : numpy array, shape = [n_samples, len(self.regressors)]\n",
       "\n",
       "    meta-features for test data, where n_samples is the number of\n",
-      "    samples in test data and len(self.regressors) is the number of\n",
-      "    regressors.\n",
+      "    samples in test data and len(self.regressors) is the number\n",
+      "    of regressors.\n",
       "\n",
       "<hr>\n",
       "\n",
@@ -543,6 +557,15 @@
     "with open('../../api_modules/mlxtend.regressor/StackingCVRegressor.md', 'r') as f:\n",
     "    print(f.read())"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/docs/sources/user_guide/regressor/StackingRegressor.ipynb b/docs/sources/user_guide/regressor/StackingRegressor.ipynb
@@ -634,7 +634,7 @@
      "text": [
       "## StackingRegressor\n",
       "\n",
-      "*StackingRegressor(regressors, meta_regressor, verbose=0)*\n",
+      "*StackingRegressor(regressors, meta_regressor, verbose=0, store_train_meta_features=False)*\n",
       "\n",
       "A Stacking regressor for scikit-learn estimators for regression.\n",
       "\n",
@@ -663,6 +663,14 @@
       "    - `verbose>2`: Changes `verbose` param of the underlying regressor to\n",
       "    self.verbose - 2\n",
       "\n",
+      "- `store_train_meta_features` : bool (default: False)\n",
+      "\n",
+      "    If True, the meta-features computed from the training data\n",
+      "    used for fitting the\n",
+      "    meta-regressor stored in the `self.train_meta_features_` array,\n",
+      "    which can be\n",
+      "    accessed after calling `fit`.\n",
+      "\n",
       "**Attributes**\n",
       "\n",
       "- `regr_` : list, shape=[n_regressors]\n",
@@ -681,6 +689,12 @@
       "\n",
       "    Intercept of the fitted meta-estimator\n",
       "\n",
+      "- `train_meta_features` : numpy array, shape = [n_samples, len(self.regressors)]\n",
+      "\n",
+      "    meta-features for training data, where n_samples is the\n",
+      "    number of samples\n",
+      "    in training data and len(self.regressors) is the number of regressors.\n",
+      "\n",
       "### Methods\n",
       "\n",
       "<hr>\n",
@@ -758,15 +772,36 @@
       "\n",
       "<hr>\n",
       "\n",
+      "*predict_meta_features(X)*\n",
+      "\n",
+      "Get meta-features of test-data.\n",
+      "\n",
+      "**Parameters**\n",
+      "\n",
+      "- `X` : numpy array, shape = [n_samples, n_features]\n",
+      "\n",
+      "    Test vectors, where n_samples is the number of samples and\n",
+      "    n_features is the number of features.\n",
+      "\n",
+      "**Returns**\n",
+      "\n",
+      "- `meta-features` : numpy array, shape = [n_samples, len(self.regressors)]\n",
+      "\n",
+      "    meta-features for test data, where n_samples is the number of\n",
+      "    samples in test data and len(self.regressors) is the number\n",
+      "    of regressors.\n",
+      "\n",
+      "<hr>\n",
+      "\n",
       "*score(X, y, sample_weight=None)*\n",
       "\n",
       "Returns the coefficient of determination R^2 of the prediction.\n",
       "\n",
-      "The coefficient R^2 is defined as (1 - u/v), where u is the regression\n",
-      "sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual\n",
+      "The coefficient R^2 is defined as (1 - u/v), where u is the residual\n",
+      "sum of squares ((y_true - y_pred) ** 2).sum() and v is the total\n",
       "sum of squares ((y_true - y_true.mean()) ** 2).sum().\n",
       "\n",
-      "Best possible score is 1.0 and it can be negative (because the\n",
+      "The best possible score is 1.0 and it can be negative (because the\n",
       "\n",
       "model can be arbitrarily worse). A constant model that always\n",
       "predicts the expected value of y, disregarding the input features,\n",

diff --git a/mlxtend/__init__.py b/mlxtend/__init__.py
@@ -4,4 +4,4 @@
 #
 # License: BSD 3 clause
 
-__version__ = '0.9.1'
+__version__ = '0.9.2dev'
diff --git a/mlxtend/regressor/stacking_cv_regression.py b/mlxtend/regressor/stacking_cv_regression.py
@@ -40,13 +40,13 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
     Parameters
     ----------
     regressors : array-like, shape = [n_regressors]
-        A list of classifiers.
+        A list of regressors.
         Invoking the `fit` method on the `StackingCVRegressor` will fit clones
         of these original regressors that will
         be stored in the class attribute `self.regr_`.
     meta_regressor : object
-        The meta-classifier to be fitted on the ensemble of
-        classifiers
+        The meta-regressor to be fitted on the ensemble of
+        regressor
     cv : int, cross-validation generator or iterable, optional (default: 5)
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -56,7 +56,7 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
         - An iterable yielding train, test splits.
         For integer/None inputs, it will use `KFold` cross-validation
     use_features_in_secondary : bool (default: False)
-        If True, the meta-classifier will be trained both on
+        If True, the meta-regressor will be trained both on
         the predictions of the original regressors and the
         original dataset.
         If False, the meta-regressor will be trained only on
@@ -67,14 +67,17 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
         argument is a specific cross validation technique, this argument is
         omitted.
     store_train_meta_features : bool (default: False)
-        If True, the meta-features computed from the training data used for fitting the
-        meta-regressor stored in the `self.train_meta_features_` array, which can be
+        If True, the meta-features computed from the training data
+        used for fitting the
+        meta-regressor stored in the `self.train_meta_features_` array,
+        which can be
         accessed after calling `fit`.
 
     Attributes
     ----------
     train_meta_features : numpy array, shape = [n_samples, len(self.regressors)]
-        meta-features for training data, where n_samples is the number of samples
+        meta-features for training data, where n_samples is the
+        number of samples
         in training data and len(self.regressors) is the number of regressors.
 
     """
@@ -167,6 +170,20 @@ def fit(self, X, y, groups=None):
         return self
 
     def predict(self, X):
+        """ Predict target values for X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Returns
+        ----------
+        y_target : array-like, shape = [n_samples] or [n_samples, n_targets]
+            Predicted target values.
+        """
+
         #
         # First we make predictions with the base-models then we predict with
         # the meta-model from that info.
@@ -193,7 +210,8 @@ def predict_meta_features(self, X):
         -------
         meta-features : numpy array, shape = [n_samples, len(self.regressors)]
             meta-features for test data, where n_samples is the number of
-            samples in test data and len(self.regressors) is the number of regressors.
+            samples in test data and len(self.regressors) is the number
+            of regressors.
 
         """
         return np.column_stack([regr.predict(X) for regr in self.regr_])

diff --git a/mlxtend/regressor/stacking_regression.py b/mlxtend/regressor/stacking_regression.py
@@ -40,6 +40,12 @@ class StackingRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
                        regressor being fitted
         - `verbose>2`: Changes `verbose` param of the underlying regressor to
            self.verbose - 2
+    store_train_meta_features : bool (default: False)
+        If True, the meta-features computed from the training data
+        used for fitting the
+        meta-regressor stored in the `self.train_meta_features_` array,
+        which can be
+        accessed after calling `fit`.
 
     Attributes
     ----------
@@ -51,9 +57,14 @@ class StackingRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
         Model coefficients of the fitted meta-estimator
     intercept_ : float
         Intercept of the fitted meta-estimator
+    train_meta_features : numpy array, shape = [n_samples, len(self.regressors)]
+        meta-features for training data, where n_samples is the
+        number of samples
+        in training data and len(self.regressors) is the number of regressors.
 
     """
-    def __init__(self, regressors, meta_regressor, verbose=0):
+    def __init__(self, regressors, meta_regressor, verbose=0,
+                 store_train_meta_features=False):
 
         self.regressors = regressors
         self.meta_regressor = meta_regressor
@@ -64,6 +75,7 @@ def __init__(self, regressors, meta_regressor, verbose=0):
                                      key, value in
                                      _name_estimators([meta_regressor])}
         self.verbose = verbose
+        self.store_train_meta_features = store_train_meta_features
 
     def fit(self, X, y):
         """Learn weight coefficients from training data for each regressor.
@@ -102,8 +114,12 @@ def fit(self, X, y):
 
             regr.fit(X, y)
 
-        meta_features = self._predict_meta_features(X)
+        meta_features = self.predict_meta_features(X)
         self.meta_regr_.fit(meta_features, y)
+
+        # save meta-features for training data
+        if self.store_train_meta_features:
+            self.train_meta_features_ = meta_features
         return self
 
     @property
@@ -135,7 +151,23 @@ def get_params(self, deep=True):
 
             return out
 
-    def _predict_meta_features(self, X):
+    def predict_meta_features(self, X):
+        """ Get meta-features of test-data.
+
+        Parameters
+        ----------
+        X : numpy array, shape = [n_samples, n_features]
+            Test vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Returns
+        -------
+        meta-features : numpy array, shape = [n_samples, len(self.regressors)]
+            meta-features for test data, where n_samples is the number of
+            samples in test data and len(self.regressors) is the number
+            of regressors.
+
+        """
         return np.column_stack([r.predict(X) for r in self.regr_])
 
     def predict(self, X):
@@ -152,5 +184,5 @@ def predict(self, X):
         y_target : array-like, shape = [n_samples] or [n_samples, n_targets]
             Predicted target values.
         """
-        meta_features = self._predict_meta_features(X)
+        meta_features = self.predict_meta_features(X)
         return self.meta_regr_.predict(meta_features)