From 248a5037f629839886d6d02c36ec6d4e1e39117a Mon Sep 17 00:00:00 2001 From: Matti Lyra Date: Fri, 7 Nov 2014 11:53:58 +0000 Subject: [PATCH] Fixed issue 3815. Discrete AdaBoostClassifier now fails early if the base classifier if worse than random. --- sklearn/ensemble/tests/test_weight_boosting.py | 7 ++++++- sklearn/ensemble/weight_boosting.py | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index f498fd524baf2..c473fdc1dcd17 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -42,7 +42,6 @@ boston.data, boston.target = shuffle(boston.data, boston.target, random_state=rng) - def test_classification_toy(): """Check classification on a toy dataset.""" for alg in ['SAMME', 'SAMME.R']: @@ -247,6 +246,12 @@ def test_base_estimator(): clf = AdaBoostRegressor(SVR(), random_state=0) clf.fit(X, y_regr) + # check that an empty discrete ensemble fails early + X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]] + y_fail = ["foo", "bar", 1, 2] + clf = AdaBoostClassifier(SVC(), algorithm="SAMME") + assert_raises(ValueError, clf.fit, X_fail, y_fail) + def test_sample_weight_missing(): from sklearn.linear_model import LinearRegression diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py index 1ccc26c5ffa4d..6746d46a881a0 100644 --- a/sklearn/ensemble/weight_boosting.py +++ b/sklearn/ensemble/weight_boosting.py @@ -546,6 +546,10 @@ def _boost_discrete(self, iboost, X, y, sample_weight): # Stop if the error is at least as bad as random guessing if estimator_error >= 1. - (1. / n_classes): self.estimators_.pop(-1) + if len(self.estimators_) == 0: + raise ValueError('BaseClassifier in AdaBoostClassifier ' + 'ensemble is worse than random, ensemble ' + 'can not be fit.') return None, None, None # Boost weight using multi-class AdaBoost SAMME alg