From 5efe121fcea99f1fbafbbfce2039d6f2f1946575 Mon Sep 17 00:00:00 2001 From: rasbt Date: Mon, 4 Dec 2017 12:54:45 -0500 Subject: [PATCH 1/2] store meta features in stackingregressor --- .../regressor/StackingCVRegressor.ipynb | 47 ++++++++++++++----- mlxtend/__init__.py | 2 +- mlxtend/regressor/stacking_cv_regression.py | 34 ++++++++++---- mlxtend/regressor/stacking_regression.py | 40 ++++++++++++++-- .../tests/test_stacking_regression.py | 27 +++++++++++ 5 files changed, 125 insertions(+), 25 deletions(-) diff --git a/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb b/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb index 96262d08b..cc3ba7a8b 100644 --- a/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb +++ b/docs/sources/user_guide/regressor/StackingCVRegressor.ipynb @@ -301,7 +301,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -337,7 +337,7 @@ "- `meta_regressor` : object\n", "\n", " The meta-regressor to be fitted on the ensemble of\n", - " regressors\n", + " regressor\n", "\n", "- `cv` : int, cross-validation generator or iterable, optional (default: 5)\n", "\n", @@ -366,18 +366,19 @@ "\n", "- `store_train_meta_features` : bool (default: False)\n", "\n", - " If True, the meta-features computed from the training data used\n", - " for fitting the meta-regressor stored in the\n", - " `self.train_meta_features_` array, which can be\n", + " If True, the meta-features computed from the training data\n", + " used for fitting the\n", + " meta-regressor stored in the `self.train_meta_features_` array,\n", + " which can be\n", " accessed after calling `fit`.\n", "\n", "**Attributes**\n", "\n", - "- `train_meta_features` : numpy array, shape=[n_samples, len(self.regressors)]\n", + "- `train_meta_features` : numpy array, shape = [n_samples, len(self.regressors)]\n", "\n", - " meta-features for training data, where n_samples is the number of\n", - " samples in training data and len(self.regressors) is\n", - " the number of regressors.\n", + " meta-features for training data, where n_samples is the\n", + " number of samples\n", + " in training data and len(self.regressors) is the number of regressors.\n", "\n", "### Methods\n", "\n", @@ -459,7 +460,20 @@ "\n", "*predict(X)*\n", "\n", - "None\n", + "Predict target values for X.\n", + "\n", + "**Parameters**\n", + "\n", + "- `X` : {array-like, sparse matrix}, shape = [n_samples, n_features]\n", + "\n", + " Training vectors, where n_samples is the number of samples and\n", + " n_features is the number of features.\n", + "\n", + "**Returns**\n", + "\n", + "- `y_target` : array-like, shape = [n_samples] or [n_samples, n_targets]\n", + "\n", + " Predicted target values.\n", "\n", "
\n", "\n", @@ -479,8 +493,8 @@ "- `meta-features` : numpy array, shape = [n_samples, len(self.regressors)]\n", "\n", " meta-features for test data, where n_samples is the number of\n", - " samples in test data and len(self.regressors) is the number of\n", - " regressors.\n", + " samples in test data and len(self.regressors) is the number\n", + " of regressors.\n", "\n", "
\n", "\n", @@ -543,6 +557,15 @@ "with open('../../api_modules/mlxtend.regressor/StackingCVRegressor.md', 'r') as f:\n", " print(f.read())" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/mlxtend/__init__.py b/mlxtend/__init__.py index 63a598bc9..841e82252 100644 --- a/mlxtend/__init__.py +++ b/mlxtend/__init__.py @@ -4,4 +4,4 @@ # # License: BSD 3 clause -__version__ = '0.9.1' +__version__ = '0.9.2dev' diff --git a/mlxtend/regressor/stacking_cv_regression.py b/mlxtend/regressor/stacking_cv_regression.py index 25f778545..b76b6a5bc 100644 --- a/mlxtend/regressor/stacking_cv_regression.py +++ b/mlxtend/regressor/stacking_cv_regression.py @@ -40,13 +40,13 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin): Parameters ---------- regressors : array-like, shape = [n_regressors] - A list of classifiers. + A list of regressors. Invoking the `fit` method on the `StackingCVRegressor` will fit clones of these original regressors that will be stored in the class attribute `self.regr_`. meta_regressor : object - The meta-classifier to be fitted on the ensemble of - classifiers + The meta-regressor to be fitted on the ensemble of + regressor cv : int, cross-validation generator or iterable, optional (default: 5) Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -56,7 +56,7 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin): - An iterable yielding train, test splits. For integer/None inputs, it will use `KFold` cross-validation use_features_in_secondary : bool (default: False) - If True, the meta-classifier will be trained both on + If True, the meta-regressor will be trained both on the predictions of the original regressors and the original dataset. If False, the meta-regressor will be trained only on @@ -67,14 +67,17 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin): argument is a specific cross validation technique, this argument is omitted. store_train_meta_features : bool (default: False) - If True, the meta-features computed from the training data used for fitting the - meta-regressor stored in the `self.train_meta_features_` array, which can be + If True, the meta-features computed from the training data + used for fitting the + meta-regressor stored in the `self.train_meta_features_` array, + which can be accessed after calling `fit`. Attributes ---------- train_meta_features : numpy array, shape = [n_samples, len(self.regressors)] - meta-features for training data, where n_samples is the number of samples + meta-features for training data, where n_samples is the + number of samples in training data and len(self.regressors) is the number of regressors. """ @@ -167,6 +170,20 @@ def fit(self, X, y, groups=None): return self def predict(self, X): + """ Predict target values for X. + + Parameters + ---------- + X : {array-like, sparse matrix}, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ---------- + y_target : array-like, shape = [n_samples] or [n_samples, n_targets] + Predicted target values. + """ + # # First we make predictions with the base-models then we predict with # the meta-model from that info. @@ -193,7 +210,8 @@ def predict_meta_features(self, X): ------- meta-features : numpy array, shape = [n_samples, len(self.regressors)] meta-features for test data, where n_samples is the number of - samples in test data and len(self.regressors) is the number of regressors. + samples in test data and len(self.regressors) is the number + of regressors. """ return np.column_stack([regr.predict(X) for regr in self.regr_]) diff --git a/mlxtend/regressor/stacking_regression.py b/mlxtend/regressor/stacking_regression.py index cb5f58932..8bc788d72 100644 --- a/mlxtend/regressor/stacking_regression.py +++ b/mlxtend/regressor/stacking_regression.py @@ -40,6 +40,12 @@ class StackingRegressor(BaseEstimator, RegressorMixin, TransformerMixin): regressor being fitted - `verbose>2`: Changes `verbose` param of the underlying regressor to self.verbose - 2 + store_train_meta_features : bool (default: False) + If True, the meta-features computed from the training data + used for fitting the + meta-regressor stored in the `self.train_meta_features_` array, + which can be + accessed after calling `fit`. Attributes ---------- @@ -51,9 +57,14 @@ class StackingRegressor(BaseEstimator, RegressorMixin, TransformerMixin): Model coefficients of the fitted meta-estimator intercept_ : float Intercept of the fitted meta-estimator + train_meta_features : numpy array, shape = [n_samples, len(self.regressors)] + meta-features for training data, where n_samples is the + number of samples + in training data and len(self.regressors) is the number of regressors. """ - def __init__(self, regressors, meta_regressor, verbose=0): + def __init__(self, regressors, meta_regressor, verbose=0, + store_train_meta_features=False): self.regressors = regressors self.meta_regressor = meta_regressor @@ -64,6 +75,7 @@ def __init__(self, regressors, meta_regressor, verbose=0): key, value in _name_estimators([meta_regressor])} self.verbose = verbose + self.store_train_meta_features = store_train_meta_features def fit(self, X, y): """Learn weight coefficients from training data for each regressor. @@ -102,8 +114,12 @@ def fit(self, X, y): regr.fit(X, y) - meta_features = self._predict_meta_features(X) + meta_features = self.predict_meta_features(X) self.meta_regr_.fit(meta_features, y) + + # save meta-features for training data + if self.store_train_meta_features: + self.train_meta_features_ = meta_features return self @property @@ -135,7 +151,23 @@ def get_params(self, deep=True): return out - def _predict_meta_features(self, X): + def predict_meta_features(self, X): + """ Get meta-features of test-data. + + Parameters + ---------- + X : numpy array, shape = [n_samples, n_features] + Test vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + meta-features : numpy array, shape = [n_samples, len(self.regressors)] + meta-features for test data, where n_samples is the number of + samples in test data and len(self.regressors) is the number + of regressors. + + """ return np.column_stack([r.predict(X) for r in self.regr_]) def predict(self, X): @@ -152,5 +184,5 @@ def predict(self, X): y_target : array-like, shape = [n_samples] or [n_samples, n_targets] Predicted target values. """ - meta_features = self._predict_meta_features(X) + meta_features = self.predict_meta_features(X) return self.meta_regr_.predict(meta_features) diff --git a/mlxtend/regressor/tests/test_stacking_regression.py b/mlxtend/regressor/tests/test_stacking_regression.py index ee708e989..56ea85af6 100644 --- a/mlxtend/regressor/tests/test_stacking_regression.py +++ b/mlxtend/regressor/tests/test_stacking_regression.py @@ -12,6 +12,7 @@ from numpy.testing import assert_almost_equal from nose.tools import raises from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import train_test_split # Generating a sample dataset np.random.seed(1) @@ -158,6 +159,7 @@ def test_get_params(): 'meta_regressor', 'regressors', 'ridge', + 'store_train_meta_features', 'verbose'] assert got == expect, got @@ -178,3 +180,28 @@ def test_regressor_gridsearch(): grid.fit(X1, y) assert len(grid.best_params_['regressors']) == 2 + + +def test_predict_meta_features(): + lr = LinearRegression() + svr_rbf = SVR(kernel='rbf') + ridge = Ridge(random_state=1) + stregr = StackingRegressor(regressors=[lr, ridge], + meta_regressor=svr_rbf) + X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3) + stregr.fit(X_train, y_train) + test_meta_features = stregr.predict(X_test) + assert test_meta_features.shape[0] == X_test.shape[0] + + +def test_train_meta_features_(): + lr = LinearRegression() + svr_rbf = SVR(kernel='rbf') + ridge = Ridge(random_state=1) + stregr = StackingRegressor(regressors=[lr, ridge], + meta_regressor=svr_rbf, + store_train_meta_features=True) + X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3) + stregr.fit(X_train, y_train) + train_meta_features = stregr.train_meta_features_ + assert train_meta_features.shape[0] == X_train.shape[0] From c525f36309e014b1b492578e4a9a3ea47cdab9a3 Mon Sep 17 00:00:00 2001 From: rasbt Date: Mon, 4 Dec 2017 13:00:19 -0500 Subject: [PATCH 2/2] add meta feat to stackingregressor --- docs/sources/CHANGELOG.md | 30 +++++++++++-- .../regressor/StackingRegressor.ipynb | 43 +++++++++++++++++-- 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md index 091ccf73d..3709339e2 100755 --- a/docs/sources/CHANGELOG.md +++ b/docs/sources/CHANGELOG.md @@ -7,6 +7,33 @@ The CHANGELOG for the current development version is available at --- + + + + +### Version 0.9.2dev + +##### Downloads + +- [Source code (zip)](https://github.com/rasbt/mlxtend/archive/v0.9.2.zip) +- [Source code (tar.gz)](https://github.com/rasbt/mlxtend/archive/v0.9.2.tar.gz) + +##### New Features + +- New `store_train_meta_features` parameter for `fit` in StackingCVRegressor. if True, train meta-features are stored in `self.train_meta_features_`. + New `pred_meta_features` method for `StackingCVRegressor`. People can get test meta-features using this method. ([#294](https://github.com/rasbt/mlxtend/pull/294)) + via [takashioya](https://github.com/takashioya)) +- The new `store_train_meta_features` attribute and `pred_meta_features` method for the `StackingCVRegressor` were also added to the `StackingRegressor` ([#299](https://github.com/rasbt/mlxtend/pull/299)) + +##### Changes + +- - + + +##### Bug Fixes + +- - + ### Version 0.9.1 (2017-11-19) ##### Downloads @@ -18,9 +45,6 @@ The CHANGELOG for the current development version is available at - Added `mlxtend.evaluate.bootstrap_point632_score` to evaluate the performance of estimators using the .632 bootstrap. ([#283](https://github.com/rasbt/mlxtend/pull/283)) - New `max_len` parameter for the frequent itemset generation via the `apriori` function to allow for early stopping. ([#270](https://github.com/rasbt/mlxtend/pull/270)) -- New `store_train_meta_features` parameter for `fit` in StackingCVRegressor. if True, train meta-features are stored in `self.train_meta_features_`. - New `pred_meta_features` method for StackingCVRegressor. People can get test meta-features using this method. ([#294](https://github.com/rasbt/mlxtend/pull/294)) - via [takashioya](https://github.com/takashioya)) ##### Changes diff --git a/docs/sources/user_guide/regressor/StackingRegressor.ipynb b/docs/sources/user_guide/regressor/StackingRegressor.ipynb index 8bde45a11..bec46fcbb 100644 --- a/docs/sources/user_guide/regressor/StackingRegressor.ipynb +++ b/docs/sources/user_guide/regressor/StackingRegressor.ipynb @@ -634,7 +634,7 @@ "text": [ "## StackingRegressor\n", "\n", - "*StackingRegressor(regressors, meta_regressor, verbose=0)*\n", + "*StackingRegressor(regressors, meta_regressor, verbose=0, store_train_meta_features=False)*\n", "\n", "A Stacking regressor for scikit-learn estimators for regression.\n", "\n", @@ -663,6 +663,14 @@ " - `verbose>2`: Changes `verbose` param of the underlying regressor to\n", " self.verbose - 2\n", "\n", + "- `store_train_meta_features` : bool (default: False)\n", + "\n", + " If True, the meta-features computed from the training data\n", + " used for fitting the\n", + " meta-regressor stored in the `self.train_meta_features_` array,\n", + " which can be\n", + " accessed after calling `fit`.\n", + "\n", "**Attributes**\n", "\n", "- `regr_` : list, shape=[n_regressors]\n", @@ -681,6 +689,12 @@ "\n", " Intercept of the fitted meta-estimator\n", "\n", + "- `train_meta_features` : numpy array, shape = [n_samples, len(self.regressors)]\n", + "\n", + " meta-features for training data, where n_samples is the\n", + " number of samples\n", + " in training data and len(self.regressors) is the number of regressors.\n", + "\n", "### Methods\n", "\n", "
\n", @@ -758,15 +772,36 @@ "\n", "
\n", "\n", + "*predict_meta_features(X)*\n", + "\n", + "Get meta-features of test-data.\n", + "\n", + "**Parameters**\n", + "\n", + "- `X` : numpy array, shape = [n_samples, n_features]\n", + "\n", + " Test vectors, where n_samples is the number of samples and\n", + " n_features is the number of features.\n", + "\n", + "**Returns**\n", + "\n", + "- `meta-features` : numpy array, shape = [n_samples, len(self.regressors)]\n", + "\n", + " meta-features for test data, where n_samples is the number of\n", + " samples in test data and len(self.regressors) is the number\n", + " of regressors.\n", + "\n", + "
\n", + "\n", "*score(X, y, sample_weight=None)*\n", "\n", "Returns the coefficient of determination R^2 of the prediction.\n", "\n", - "The coefficient R^2 is defined as (1 - u/v), where u is the regression\n", - "sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual\n", + "The coefficient R^2 is defined as (1 - u/v), where u is the residual\n", + "sum of squares ((y_true - y_pred) ** 2).sum() and v is the total\n", "sum of squares ((y_true - y_true.mean()) ** 2).sum().\n", "\n", - "Best possible score is 1.0 and it can be negative (because the\n", + "The best possible score is 1.0 and it can be negative (because the\n", "\n", "model can be arbitrarily worse). A constant model that always\n", "predicts the expected value of y, disregarding the input features,\n",