Merge branch 'master' of https://github.com/scikit-learn/scikit-learn …

…into searchcv
thechargedneutron · Sep 2, 2017 · ee82d80 · ee82d80
2 parents cb0cabc + 233a3e5
commit ee82d80
Show file tree

Hide file tree

Showing 38 changed files with 307 additions and 119 deletions.
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -43,6 +43,18 @@ Classifiers and regressors
 Enhancements
 ............
 
+Classifiers and regressors
+
+- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
+  is faster when using ``return_std=True`` in particular more when called
+  several times in a row. :issue:`9234` by :user:`andrewww <andrewww>`
+  and :user:`Minghui Liu <minghui-liu>`.
+
+- Add `named_estimators_` parameter in
+  :class:`sklearn.ensemble.voting_classifier` to access fitted
+  estimators. :issue:`9157` by :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+
 Model evaluation and meta-estimators
 
 - A scorer based on :func:`metrics.brier_score_loss` is also available.

diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py
@@ -88,12 +88,14 @@
 
 n_estimators = len(estimators)
 
+
 # Generate data
 def f(x):
     x = x.ravel()
 
     return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)
 
+
 def generate(n_samples, noise, n_repeat=1):
     X = np.random.rand(n_samples) * 10 - 5
     X = np.sort(X)
@@ -110,6 +112,7 @@ def generate(n_samples, noise, n_repeat=1):
 
     return X, y
 
+
 X_train = []
 y_train = []
 
@@ -120,6 +123,8 @@ def generate(n_samples, noise, n_repeat=1):
 
 X_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)
 
+plt.figure(figsize=(10, 8))
+
 # Loop over estimators to compare
 for n, (name, estimator) in enumerate(estimators):
     # Compute predictions
@@ -166,8 +171,8 @@ def generate(n_samples, noise, n_repeat=1):
     plt.xlim([-5, 5])
     plt.title(name)
 
-    if n == 0:
-        plt.legend(loc="upper left", prop={"size": 11})
+    if n == n_estimators - 1:
+        plt.legend(loc=(1.1, .5))
 
     plt.subplot(2, n_estimators, n_estimators + n + 1)
     plt.plot(X_test, y_error, "r", label="$error(x)$")
@@ -178,7 +183,9 @@ def generate(n_samples, noise, n_repeat=1):
     plt.xlim([-5, 5])
     plt.ylim([0, 0.1])
 
-    if n == 0:
-        plt.legend(loc="upper left", prop={"size": 11})
+    if n == n_estimators - 1:
+
+        plt.legend(loc=(1.1, .5))
 
+plt.subplots_adjust(right=.75)
 plt.show()
diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
@@ -65,7 +65,8 @@
 
     print("Iteration %i %s" % (i, 70 * "_"))
     print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
-          % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))
+          % (n_labeled_points, n_total_samples - n_labeled_points,
+             n_total_samples))
 
     print(classification_report(true_labels, predicted_labels))
 
@@ -95,7 +96,7 @@
         # for more than 5 iterations, visualize the gain only on the first 5
         if i < 5:
             sub = f.add_subplot(5, 5, index + 1 + (5 * i))
-            sub.imshow(image, cmap=plt.cm.gray_r)
+            sub.imshow(image, cmap=plt.cm.gray_r, interpolation='none')
             sub.set_title("predict: %i\ntrue: %i" % (
                 lp_model.transduction_[image_index], y[image_index]), size=10)
             sub.axis('off')
@@ -108,6 +109,7 @@
     n_labeled_points += len(uncertainty_index)
 
 f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
-           "uncertain labels to learn with the next model.")
-plt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)
+           "uncertain labels to learn with the next model.", y=1.15)
+plt.subplots_adjust(left=0.2, bottom=0.03, right=0.9, top=0.9, wspace=0.2,
+                    hspace=0.85)
 plt.show()
diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
@@ -609,7 +609,7 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
         "manhattan", "cosine", or 'precomputed'.
         If linkage is "ward", only "euclidean" is accepted.
 
-    memory : joblib.Memory-like or string, optional
+    memory : None, str or object with the joblib.Memory interface, optional
         Used to cache the output of the computation of the tree.
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.
@@ -769,7 +769,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         "manhattan", "cosine", or 'precomputed'.
         If linkage is "ward", only "euclidean" is accepted.
 
-    memory : joblib.Memory-like or string, optional
+    memory : None, str or object with the joblib.Memory interface, optional
         Used to cache the output of the computation of the tree.
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.

diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
@@ -28,7 +28,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.testing import raises
+from sklearn.utils.testing import assert_raises
 
 
 SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
@@ -110,10 +110,9 @@ def teardown_module():
         shutil.rmtree(SCIKIT_LEARN_EMPTY_DATA)
 
 
-@raises(IOError)
 def test_load_empty_lfw_people():
-    fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA,
-                     download_if_missing=False)
+    assert_raises(IOError, fetch_lfw_people, data_home=SCIKIT_LEARN_EMPTY_DATA,
+                  download_if_missing=False)
 
 
 def test_load_fake_lfw_people():
@@ -148,16 +147,15 @@ def test_load_fake_lfw_people():
                         'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])
 
 
-@raises(ValueError)
 def test_load_fake_lfw_people_too_restrictive():
-    fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100,
-                     download_if_missing=False)
+    assert_raises(ValueError, fetch_lfw_people, data_home=SCIKIT_LEARN_DATA,
+                  min_faces_per_person=100, download_if_missing=False)
 
 
-@raises(IOError)
 def test_load_empty_lfw_pairs():
-    fetch_lfw_pairs(data_home=SCIKIT_LEARN_EMPTY_DATA,
-                    download_if_missing=False)
+    assert_raises(IOError, fetch_lfw_pairs,
+                  data_home=SCIKIT_LEARN_EMPTY_DATA,
+                  download_if_missing=False)
 
 
 def test_load_fake_lfw_pairs():

diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
@@ -15,7 +15,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
-from sklearn.utils.testing import raises
 from sklearn.utils.testing import assert_in
 from sklearn.utils.fixes import sp_version
 
@@ -138,20 +137,17 @@ def test_load_compressed():
     assert_array_equal(y, ybz)
 
 
-@raises(ValueError)
 def test_load_invalid_file():
-    load_svmlight_file(invalidfile)
+    assert_raises(ValueError, load_svmlight_file, invalidfile)
 
 
-@raises(ValueError)
 def test_load_invalid_order_file():
-    load_svmlight_file(invalidfile2)
+    assert_raises(ValueError, load_svmlight_file, invalidfile2)
 
 
-@raises(ValueError)
 def test_load_zero_based():
     f = BytesIO(b("-1 4:1.\n1 0:1\n"))
-    load_svmlight_file(f, zero_based=False)
+    assert_raises(ValueError, load_svmlight_file, f, zero_based=False)
 
 
 def test_load_zero_based_auto():
@@ -186,21 +182,19 @@ def test_load_with_qid():
         assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]])
 
 
-@raises(ValueError)
 def test_load_invalid_file2():
-    load_svmlight_files([datafile, invalidfile, datafile])
+    assert_raises(ValueError, load_svmlight_files,
+                  [datafile, invalidfile, datafile])
 
 
-@raises(TypeError)
 def test_not_a_filename():
     # in python 3 integers are valid file opening arguments (taken as unix
     # file descriptors)
-    load_svmlight_file(.42)
+    assert_raises(TypeError, load_svmlight_file, .42)
 
 
-@raises(IOError)
 def test_invalid_filename():
-    load_svmlight_file("trou pic nic douille")
+    assert_raises(IOError, load_svmlight_file, "trou pic nic douille")
 
 
 def test_dump():

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
@@ -927,9 +927,9 @@ def fit(self, X, y=None):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
-            Training vector, where n_samples in the number of samples
-            and n_features is the number of features.
+        X : Ignored.
+
+        y : Ignored.
 
         Returns
         -------
@@ -1081,6 +1081,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
@@ -1251,6 +1253,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
@@ -1284,6 +1288,8 @@ def partial_fit(self, X, y=None, iter_offset=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         iter_offset : integer, optional
             The number of iteration on data batches that has been
             performed before this call to partial_fit. This is optional:

diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
@@ -149,6 +149,8 @@ def fit(self, X, y=None):
         X : array-like, shape (n_samples, n_features)
             Training data.
 
+        y : Ignored.
+
         Returns
         -------
         self
@@ -338,6 +340,8 @@ def score(self, X, y=None):
         X : array, shape (n_samples, n_features)
             The data
 
+        y : Ignored.
+
         Returns
         -------
         ll : float

diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
@@ -509,6 +509,8 @@ def fit_transform(self, X, y=None):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
@@ -524,6 +526,8 @@ def fit(self, X, y=None):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self

diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
@@ -158,7 +158,7 @@ def fit(self, X, y=None):
             Training data, where n_samples is the number of samples and
             n_features is the number of features.
 
-        y : Passthrough for ``Pipeline`` compatibility.
+        y : Ignored.
 
         Returns
         -------
@@ -199,6 +199,8 @@ def partial_fit(self, X, y=None, check_input=True):
         check_input : bool
             Run check_array on X.
 
+        y : Ignored.
+
         Returns
         -------
         self : object

diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
@@ -1211,6 +1211,8 @@ def fit_transform(self, X, y=None, W=None, H=None):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Data matrix to be decomposed
 
+        y : Ignored.
+
         W : array-like, shape (n_samples, n_components)
             If init='custom', it is used as initial guess for the solution.
 
@@ -1249,6 +1251,8 @@ def fit(self, X, y=None, **params):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Data matrix to be decomposed
 
+        y : Ignored.
+
         Returns
         -------
         self

diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
@@ -473,6 +473,8 @@ def partial_fit(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored.
+
         Returns
         -------
         self
@@ -515,6 +517,8 @@ def fit(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored.
+
         Returns
         -------
         self
@@ -714,6 +718,8 @@ def score(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored.
+
         Returns
         -------
         score : float