From 248a5037f629839886d6d02c36ec6d4e1e39117a Mon Sep 17 00:00:00 2001
From: Matti Lyra <m.lyra@sussex.ac.uk>
Date: Fri, 7 Nov 2014 11:53:58 +0000
Subject: [PATCH] Fixed issue 3815. Discrete AdaBoostClassifier now fails early
 if the base classifier if worse than random.

---
 sklearn/ensemble/tests/test_weight_boosting.py | 7 ++++++-
 sklearn/ensemble/weight_boosting.py            | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index f498fd524baf2..c473fdc1dcd17 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -42,7 +42,6 @@
 boston.data, boston.target = shuffle(boston.data, boston.target,
                                      random_state=rng)
 
-
 def test_classification_toy():
     """Check classification on a toy dataset."""
     for alg in ['SAMME', 'SAMME.R']:
@@ -247,6 +246,12 @@ def test_base_estimator():
     clf = AdaBoostRegressor(SVR(), random_state=0)
     clf.fit(X, y_regr)
 
+    # check that an empty discrete ensemble fails early
+    X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]]
+    y_fail = ["foo", "bar", 1, 2]
+    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
+    assert_raises(ValueError, clf.fit, X_fail, y_fail)
+
 
 def test_sample_weight_missing():
     from sklearn.linear_model import LinearRegression
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
index 1ccc26c5ffa4d..6746d46a881a0 100644
--- a/sklearn/ensemble/weight_boosting.py
+++ b/sklearn/ensemble/weight_boosting.py
@@ -546,6 +546,10 @@ def _boost_discrete(self, iboost, X, y, sample_weight):
         # Stop if the error is at least as bad as random guessing
         if estimator_error >= 1. - (1. / n_classes):
             self.estimators_.pop(-1)
+            if len(self.estimators_) == 0:
+                raise ValueError('BaseClassifier in AdaBoostClassifier '
+                                 'ensemble is worse than random, ensemble '
+                                 'can not be fit.')
             return None, None, None
 
         # Boost weight using multi-class AdaBoost SAMME alg