From 5efe121fcea99f1fbafbbfce2039d6f2f1946575 Mon Sep 17 00:00:00 2001
From: rasbt <mail@sebastianraschka.com>
Date: Mon, 4 Dec 2017 12:54:45 -0500
Subject: [PATCH 1/2] store meta features in stackingregressor

---
 .../regressor/StackingCVRegressor.ipynb       | 47 ++++++++++++++-----
 mlxtend/__init__.py                           |  2 +-
 mlxtend/regressor/stacking_cv_regression.py   | 34 ++++++++++----
 mlxtend/regressor/stacking_regression.py      | 40 ++++++++++++++--
 .../tests/test_stacking_regression.py         | 27 +++++++++++
 5 files changed, 125 insertions(+), 25 deletions(-)
diff --git a/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb b/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb
index 96262d08b..cc3ba7a8b 100644
--- a/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb
+++ b/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb
@@ -301,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -337,7 +337,7 @@
       "- `meta_regressor` : object\n",
       "\n",
       "    The meta-regressor to be fitted on the ensemble of\n",
-      "    regressors\n",
+      "    regressor\n",
       "\n",
       "- `cv` : int, cross-validation generator or iterable, optional (default: 5)\n",
       "\n",
@@ -366,18 +366,19 @@
       "\n",
       "- `store_train_meta_features` : bool (default: False)\n",
       "\n",
-      "    If True, the meta-features computed from the training data used\n",
-      "    for fitting the meta-regressor stored in the\n",
-      "    `self.train_meta_features_` array, which can be\n",
+      "    If True, the meta-features computed from the training data\n",
+      "    used for fitting the\n",
+      "    meta-regressor stored in the `self.train_meta_features_` array,\n",
+      "    which can be\n",
       "    accessed after calling `fit`.\n",
       "\n",
       "**Attributes**\n",
       "\n",
-      "- `train_meta_features` : numpy array, shape=[n_samples, len(self.regressors)]\n",
+      "- `train_meta_features` : numpy array, shape = [n_samples, len(self.regressors)]\n",
       "\n",
-      "    meta-features for training data, where n_samples is the number of\n",
-      "    samples in training data and len(self.regressors) is\n",
-      "    the number of regressors.\n",
+      "    meta-features for training data, where n_samples is the\n",
+      "    number of samples\n",
+      "    in training data and len(self.regressors) is the number of regressors.\n",
       "\n",
       "### Methods\n",
       "\n",
@@ -459,7 +460,20 @@
       "\n",
       "*predict(X)*\n",
       "\n",
-      "None\n",
+      "Predict target values for X.\n",
+      "\n",
+      "**Parameters**\n",
+      "\n",
+      "- `X` : {array-like, sparse matrix}, shape = [n_samples, n_features]\n",
+      "\n",
+      "    Training vectors, where n_samples is the number of samples and\n",
+      "    n_features is the number of features.\n",
+      "\n",
+      "**Returns**\n",
+      "\n",
+      "- `y_target` : array-like, shape = [n_samples] or [n_samples, n_targets]\n",
+      "\n",
+      "    Predicted target values.\n",
       "\n",
       "<hr>\n",
       "\n",
@@ -479,8 +493,8 @@
       "- `meta-features` : numpy array, shape = [n_samples, len(self.regressors)]\n",
       "\n",
       "    meta-features for test data, where n_samples is the number of\n",
-      "    samples in test data and len(self.regressors) is the number of\n",
-      "    regressors.\n",
+      "    samples in test data and len(self.regressors) is the number\n",
+      "    of regressors.\n",
       "\n",
       "<hr>\n",
       "\n",
@@ -543,6 +557,15 @@
     "with open('../../api_modules/mlxtend.regressor/StackingCVRegressor.md', 'r') as f:\n",
     "    print(f.read())"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/mlxtend/__init__.py b/mlxtend/__init__.py
index 63a598bc9..841e82252 100644
--- a/mlxtend/__init__.py
+++ b/mlxtend/__init__.py
@@ -4,4 +4,4 @@
 #
 # License: BSD 3 clause
 
-__version__ = '0.9.1'
+__version__ = '0.9.2dev'
diff --git a/mlxtend/regressor/stacking_cv_regression.py b/mlxtend/regressor/stacking_cv_regression.py
index 25f778545..b76b6a5bc 100644
--- a/mlxtend/regressor/stacking_cv_regression.py
+++ b/mlxtend/regressor/stacking_cv_regression.py
@@ -40,13 +40,13 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
     Parameters
     ----------
     regressors : array-like, shape = [n_regressors]
-        A list of classifiers.
+        A list of regressors.
         Invoking the `fit` method on the `StackingCVRegressor` will fit clones
         of these original regressors that will
         be stored in the class attribute `self.regr_`.
     meta_regressor : object
-        The meta-classifier to be fitted on the ensemble of
-        classifiers
+        The meta-regressor to be fitted on the ensemble of
+        regressor
     cv : int, cross-validation generator or iterable, optional (default: 5)
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -56,7 +56,7 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
         - An iterable yielding train, test splits.
         For integer/None inputs, it will use `KFold` cross-validation
     use_features_in_secondary : bool (default: False)
-        If True, the meta-classifier will be trained both on
+        If True, the meta-regressor will be trained both on
         the predictions of the original regressors and the
         original dataset.
         If False, the meta-regressor will be trained only on
@@ -67,14 +67,17 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
         argument is a specific cross validation technique, this argument is
         omitted.
     store_train_meta_features : bool (default: False)
-        If True, the meta-features computed from the training data used for fitting the
-        meta-regressor stored in the `self.train_meta_features_` array, which can be
+        If True, the meta-features computed from the training data
+        used for fitting the
+        meta-regressor stored in the `self.train_meta_features_` array,
+        which can be
         accessed after calling `fit`.
 
     Attributes
     ----------
     train_meta_features : numpy array, shape = [n_samples, len(self.regressors)]
-        meta-features for training data, where n_samples is the number of samples
+        meta-features for training data, where n_samples is the
+        number of samples
         in training data and len(self.regressors) is the number of regressors.
 
     """
@@ -167,6 +170,20 @@ def fit(self, X, y, groups=None):
         return self
 
     def predict(self, X):
+        """ Predict target values for X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Returns
+        ----------
+        y_target : array-like, shape = [n_samples] or [n_samples, n_targets]
+            Predicted target values.
+        """
+
         #
         # First we make predictions with the base-models then we predict with
         # the meta-model from that info.
@@ -193,7 +210,8 @@ def predict_meta_features(self, X):
         -------
         meta-features : numpy array, shape = [n_samples, len(self.regressors)]
             meta-features for test data, where n_samples is the number of
-            samples in test data and len(self.regressors) is the number of regressors.
+            samples in test data and len(self.regressors) is the number
+            of regressors.
 
         """
         return np.column_stack([regr.predict(X) for regr in self.regr_])
diff --git a/mlxtend/regressor/stacking_regression.py b/mlxtend/regressor/stacking_regression.py
index cb5f58932..8bc788d72 100644
--- a/mlxtend/regressor/stacking_regression.py
+++ b/mlxtend/regressor/stacking_regression.py
@@ -40,6 +40,12 @@ class StackingRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
                        regressor being fitted
         - `verbose>2`: Changes `verbose` param of the underlying regressor to
            self.verbose - 2
+    store_train_meta_features : bool (default: False)
+        If True, the meta-features computed from the training data
+        used for fitting the
+        meta-regressor stored in the `self.train_meta_features_` array,
+        which can be
+        accessed after calling `fit`.
 
     Attributes
     ----------
@@ -51,9 +57,14 @@ class StackingRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
         Model coefficients of the fitted meta-estimator
     intercept_ : float
         Intercept of the fitted meta-estimator
+    train_meta_features : numpy array, shape = [n_samples, len(self.regressors)]
+        meta-features for training data, where n_samples is the
+        number of samples
+        in training data and len(self.regressors) is the number of regressors.
 
     """
-    def __init__(self, regressors, meta_regressor, verbose=0):
+    def __init__(self, regressors, meta_regressor, verbose=0,
+                 store_train_meta_features=False):
 
         self.regressors = regressors
         self.meta_regressor = meta_regressor
@@ -64,6 +75,7 @@ def __init__(self, regressors, meta_regressor, verbose=0):
                                      key, value in
                                      _name_estimators([meta_regressor])}
         self.verbose = verbose
+        self.store_train_meta_features = store_train_meta_features
 
     def fit(self, X, y):
         """Learn weight coefficients from training data for each regressor.
@@ -102,8 +114,12 @@ def fit(self, X, y):
 
             regr.fit(X, y)
 
-        meta_features = self._predict_meta_features(X)
+        meta_features = self.predict_meta_features(X)
         self.meta_regr_.fit(meta_features, y)
+
+        # save meta-features for training data
+        if self.store_train_meta_features:
+            self.train_meta_features_ = meta_features
         return self
 
     @property
@@ -135,7 +151,23 @@ def get_params(self, deep=True):
 
             return out
 
-    def _predict_meta_features(self, X):
+    def predict_meta_features(self, X):
+        """ Get meta-features of test-data.
+
+        Parameters
+        ----------
+        X : numpy array, shape = [n_samples, n_features]
+            Test vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Returns
+        -------
+        meta-features : numpy array, shape = [n_samples, len(self.regressors)]
+            meta-features for test data, where n_samples is the number of
+            samples in test data and len(self.regressors) is the number
+            of regressors.
+
+        """
         return np.column_stack([r.predict(X) for r in self.regr_])
 
     def predict(self, X):
@@ -152,5 +184,5 @@ def predict(self, X):
         y_target : array-like, shape = [n_samples] or [n_samples, n_targets]
             Predicted target values.
         """
-        meta_features = self._predict_meta_features(X)
+        meta_features = self.predict_meta_features(X)
         return self.meta_regr_.predict(meta_features)
diff --git a/mlxtend/regressor/tests/test_stacking_regression.py b/mlxtend/regressor/tests/test_stacking_regression.py
index ee708e989..56ea85af6 100644
--- a/mlxtend/regressor/tests/test_stacking_regression.py
+++ b/mlxtend/regressor/tests/test_stacking_regression.py
@@ -12,6 +12,7 @@
 from numpy.testing import assert_almost_equal
 from nose.tools import raises
 from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import train_test_split
 
 # Generating a sample dataset
 np.random.seed(1)
@@ -158,6 +159,7 @@ def test_get_params():
               'meta_regressor',
               'regressors',
               'ridge',
+              'store_train_meta_features',
               'verbose']
     assert got == expect, got
 
@@ -178,3 +180,28 @@ def test_regressor_gridsearch():
     grid.fit(X1, y)
 
     assert len(grid.best_params_['regressors']) == 2
+
+
+def test_predict_meta_features():
+    lr = LinearRegression()
+    svr_rbf = SVR(kernel='rbf')
+    ridge = Ridge(random_state=1)
+    stregr = StackingRegressor(regressors=[lr, ridge],
+                               meta_regressor=svr_rbf)
+    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
+    stregr.fit(X_train, y_train)
+    test_meta_features = stregr.predict(X_test)
+    assert test_meta_features.shape[0] == X_test.shape[0]
+
+
+def test_train_meta_features_():
+    lr = LinearRegression()
+    svr_rbf = SVR(kernel='rbf')
+    ridge = Ridge(random_state=1)
+    stregr = StackingRegressor(regressors=[lr, ridge],
+                               meta_regressor=svr_rbf,
+                               store_train_meta_features=True)
+    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
+    stregr.fit(X_train, y_train)
+    train_meta_features = stregr.train_meta_features_
+    assert train_meta_features.shape[0] == X_train.shape[0]

From c525f36309e014b1b492578e4a9a3ea47cdab9a3 Mon Sep 17 00:00:00 2001
From: rasbt <mail@sebastianraschka.com>
Date: Mon, 4 Dec 2017 13:00:19 -0500
Subject: [PATCH 2/2] add meta feat to stackingregressor

---
 docs/sources/CHANGELOG.md                     | 30 +++++++++++--
 .../regressor/StackingRegressor.ipynb         | 43 +++++++++++++++++--
 2 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
index 091ccf73d..3709339e2 100755
--- a/docs/sources/CHANGELOG.md
+++ b/docs/sources/CHANGELOG.md
@@ -7,6 +7,33 @@ The CHANGELOG for the current development version is available at
 
 ---
 
+
+
+
+
+### Version 0.9.2dev
+
+##### Downloads
+
+- [Source code (zip)](https://github.com/rasbt/mlxtend/archive/v0.9.2.zip)
+- [Source code (tar.gz)](https://github.com/rasbt/mlxtend/archive/v0.9.2.tar.gz)
+
+##### New Features
+
+- New `store_train_meta_features` parameter for `fit` in StackingCVRegressor. if True, train meta-features are stored in `self.train_meta_features_`.
+    New `pred_meta_features` method for `StackingCVRegressor`. People can get test meta-features using this method. ([#294](https://github.com/rasbt/mlxtend/pull/294))
+    via [takashioya](https://github.com/takashioya))
+- The new `store_train_meta_features` attribute and `pred_meta_features` method for the `StackingCVRegressor` were also added to the `StackingRegressor` ([#299](https://github.com/rasbt/mlxtend/pull/299)) 
+
+##### Changes
+
+- -
+
+
+##### Bug Fixes
+
+- -
+
 ### Version 0.9.1 (2017-11-19)
 
 ##### Downloads
@@ -18,9 +45,6 @@ The CHANGELOG for the current development version is available at
 
 - Added `mlxtend.evaluate.bootstrap_point632_score` to evaluate the performance of estimators using the .632 bootstrap. ([#283](https://github.com/rasbt/mlxtend/pull/283))
 - New `max_len` parameter for the frequent itemset generation via the `apriori` function to allow for early stopping. ([#270](https://github.com/rasbt/mlxtend/pull/270))
-- New `store_train_meta_features` parameter for `fit` in StackingCVRegressor. if True, train meta-features are stored in `self.train_meta_features_`.
-    New `pred_meta_features` method for StackingCVRegressor. People can get test meta-features using this method. ([#294](https://github.com/rasbt/mlxtend/pull/294))
-    via [takashioya](https://github.com/takashioya))
 
 ##### Changes
 
diff --git a/docs/sources/user_guide/regressor/StackingRegressor.ipynb b/docs/sources/user_guide/regressor/StackingRegressor.ipynb
index 8bde45a11..bec46fcbb 100644
--- a/docs/sources/user_guide/regressor/StackingRegressor.ipynb
+++ b/docs/sources/user_guide/regressor/StackingRegressor.ipynb
@@ -634,7 +634,7 @@
      "text": [
       "## StackingRegressor\n",
       "\n",
-      "*StackingRegressor(regressors, meta_regressor, verbose=0)*\n",
+      "*StackingRegressor(regressors, meta_regressor, verbose=0, store_train_meta_features=False)*\n",
       "\n",
       "A Stacking regressor for scikit-learn estimators for regression.\n",
       "\n",
@@ -663,6 +663,14 @@
       "    - `verbose>2`: Changes `verbose` param of the underlying regressor to\n",
       "    self.verbose - 2\n",
       "\n",
+      "- `store_train_meta_features` : bool (default: False)\n",
+      "\n",
+      "    If True, the meta-features computed from the training data\n",
+      "    used for fitting the\n",
+      "    meta-regressor stored in the `self.train_meta_features_` array,\n",
+      "    which can be\n",
+      "    accessed after calling `fit`.\n",
+      "\n",
       "**Attributes**\n",
       "\n",
       "- `regr_` : list, shape=[n_regressors]\n",
@@ -681,6 +689,12 @@
       "\n",
       "    Intercept of the fitted meta-estimator\n",
       "\n",
+      "- `train_meta_features` : numpy array, shape = [n_samples, len(self.regressors)]\n",
+      "\n",
+      "    meta-features for training data, where n_samples is the\n",
+      "    number of samples\n",
+      "    in training data and len(self.regressors) is the number of regressors.\n",
+      "\n",
       "### Methods\n",
       "\n",
       "<hr>\n",
@@ -758,15 +772,36 @@
       "\n",
       "<hr>\n",
       "\n",
+      "*predict_meta_features(X)*\n",
+      "\n",
+      "Get meta-features of test-data.\n",
+      "\n",
+      "**Parameters**\n",
+      "\n",
+      "- `X` : numpy array, shape = [n_samples, n_features]\n",
+      "\n",
+      "    Test vectors, where n_samples is the number of samples and\n",
+      "    n_features is the number of features.\n",
+      "\n",
+      "**Returns**\n",
+      "\n",
+      "- `meta-features` : numpy array, shape = [n_samples, len(self.regressors)]\n",
+      "\n",
+      "    meta-features for test data, where n_samples is the number of\n",
+      "    samples in test data and len(self.regressors) is the number\n",
+      "    of regressors.\n",
+      "\n",
+      "<hr>\n",
+      "\n",
       "*score(X, y, sample_weight=None)*\n",
       "\n",
       "Returns the coefficient of determination R^2 of the prediction.\n",
       "\n",
-      "The coefficient R^2 is defined as (1 - u/v), where u is the regression\n",
-      "sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual\n",
+      "The coefficient R^2 is defined as (1 - u/v), where u is the residual\n",
+      "sum of squares ((y_true - y_pred) ** 2).sum() and v is the total\n",
       "sum of squares ((y_true - y_true.mean()) ** 2).sum().\n",
       "\n",
-      "Best possible score is 1.0 and it can be negative (because the\n",
+      "The best possible score is 1.0 and it can be negative (because the\n",
       "\n",
       "model can be arbitrarily worse). A constant model that always\n",
       "predicts the expected value of y, disregarding the input features,\n",