From 75b044867230946b29b1b13ff8e82543f183dbd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 31 Aug 2017 01:05:48 +0200
Subject: [PATCH 01/10] DOC improve check_memory related docstrings (#9649)

---
 sklearn/cluster/hierarchical.py       | 4 ++--
 sklearn/linear_model/randomized_l1.py | 4 ++--
 sklearn/pipeline.py                   | 4 ++--
 sklearn/utils/validation.py           | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index 3a61b4f8770e4..966ed5e2cc121 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -609,7 +609,7 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
         "manhattan", "cosine", or 'precomputed'.
         If linkage is "ward", only "euclidean" is accepted.
 
-    memory : joblib.Memory-like or string, optional
+    memory : None, str or object with the joblib.Memory interface, optional
         Used to cache the output of the computation of the tree.
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.
@@ -769,7 +769,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         "manhattan", "cosine", or 'precomputed'.
         If linkage is "ward", only "euclidean" is accepted.
 
-    memory : joblib.Memory-like or string, optional
+    memory : None, str or object with the joblib.Memory interface, optional
         Used to cache the output of the computation of the tree.
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.
diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py
index 8f3692dc8675b..1b8cb567b661a 100644
--- a/sklearn/linear_model/randomized_l1.py
+++ b/sklearn/linear_model/randomized_l1.py
@@ -278,7 +278,7 @@ class RandomizedLasso(BaseRandomizedLinearModel):
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    memory : Instance of sklearn.externals.joblib.Memory or string, optional \
+    memory : None, str or object with the joblib.Memory interface, optional \
             (default=None)
         Used for internal caching. By default, no caching is done.
         If a string is given, it is the path to the caching directory.
@@ -472,7 +472,7 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel):
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    memory : Instance of sklearn.externals.joblib.Memory or string, optional \
+    memory : None, str or object with the joblib.Memory interface, optional \
             (default=None)
         Used for internal caching. By default, no caching is done.
         If a string is given, it is the path to the caching directory.
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 43a3b09e42e44..1c22210cbfb22 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -52,7 +52,7 @@ class Pipeline(_BaseComposition):
         chained, in the order in which they are chained, with the last object
         an estimator.
 
-    memory : joblib.Memory-like or string, optional
+    memory : None, str or object with the joblib.Memory interface, optional
         Used to cache the fitted transformers of the pipeline. By default,
         no caching is performed. If a string is given, it is the path to
         the caching directory. Enabling caching triggers a clone of
@@ -528,7 +528,7 @@ def make_pipeline(*steps, **kwargs):
     ----------
     *steps : list of estimators,
 
-    memory : joblib.Memory-like or string, optional
+    memory : None, str or object with the joblib.Memory interface, optional
         Used to cache the fitted transformers of the pipeline. By default,
         no caching is performed. If a string is given, it is the path to
         the caching directory. Enabling caching triggers a clone of
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 7f89bfc89f9da..5847b540d7b6c 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -165,7 +165,7 @@ def check_memory(memory):
 
     Parameters
     ----------
-    memory : joblib.Memory-like or string or None
+    memory : None, str or object with the joblib.Memory interface
 
     Returns
     -------

From 4889a67942713777e0e250eda9a3e019d84d1950 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 31 Aug 2017 10:28:09 +0200
Subject: [PATCH 02/10] MAINT remove unused imports

---
 sklearn/linear_model/tests/test_ransac.py | 2 --
 sklearn/pipeline.py                       | 2 +-
 sklearn/tests/test_multioutput.py         | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
index 41255f0c45fa4..7146ed1a129b2 100644
--- a/sklearn/linear_model/tests/test_ransac.py
+++ b/sklearn/linear_model/tests/test_ransac.py
@@ -1,5 +1,3 @@
-from scipy import sparse
-
 import numpy as np
 from scipy import sparse
 
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 1c22210cbfb22..66da9dffeb066 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -15,7 +15,7 @@
 from scipy import sparse
 
 from .base import clone, TransformerMixin
-from .externals.joblib import Parallel, delayed, Memory
+from .externals.joblib import Parallel, delayed
 from .externals import six
 from .utils.metaestimators import if_delegate_has_method
 from .utils import Bunch
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index da8be05f29f75..26981d20fc633 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -15,7 +15,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn import datasets
 from sklearn.base import clone
-from sklearn.datasets import fetch_mldata
 from sklearn.datasets import make_classification
 from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
 from sklearn.exceptions import NotFittedError

From d6a42354145c92cf88093cbcc70b13f639319c38 Mon Sep 17 00:00:00 2001
From: felix <felix-salfelder@users.noreply.github.com>
Date: Fri, 1 Sep 2017 07:11:00 +0100
Subject: [PATCH 03/10] DOC fix a glitch in pca docstring (#9664)

---
 sklearn/decomposition/pca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index c0f1eb77b5f56..171774321cec0 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -220,7 +220,7 @@ class PCA(_BasePCA):
     mean_ : array, shape (n_features,)
         Per-feature empirical mean, estimated from the training set.
 
-        Equal to `X.mean(axis=1)`.
+        Equal to `X.mean(axis=0)`.
 
     n_components_ : int
         The estimated number of components. When n_components is set

From 9b5561148f56a3934da9882a52f1978d7aa5bc75 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Fri, 1 Sep 2017 04:29:05 -0400
Subject: [PATCH 04/10] [MRG] Figure improvements (#9648)

* Example plots render poorly in dev

* flake8 + bias_variance

* title padding

* misc ensemble variance plotting

don't use rcParams to set size of a single figure,
put legend outside of plot

* semisupervised plotting fixes

use explicit kwargs in subplots_adjust, change hspace, don't change aspect ratio of imshow.
---
 examples/ensemble/plot_bias_variance.py           | 15 +++++++++++----
 ...ot_label_propagation_digits_active_learning.py | 10 ++++++----
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py
index 8d88f99df1668..0f0a2478472c3 100644
--- a/examples/ensemble/plot_bias_variance.py
+++ b/examples/ensemble/plot_bias_variance.py
@@ -88,12 +88,14 @@
 
 n_estimators = len(estimators)
 
+
 # Generate data
 def f(x):
     x = x.ravel()
 
     return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)
 
+
 def generate(n_samples, noise, n_repeat=1):
     X = np.random.rand(n_samples) * 10 - 5
     X = np.sort(X)
@@ -110,6 +112,7 @@ def generate(n_samples, noise, n_repeat=1):
 
     return X, y
 
+
 X_train = []
 y_train = []
 
@@ -120,6 +123,8 @@ def generate(n_samples, noise, n_repeat=1):
 
 X_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)
 
+plt.figure(figsize=(10, 8))
+
 # Loop over estimators to compare
 for n, (name, estimator) in enumerate(estimators):
     # Compute predictions
@@ -166,8 +171,8 @@ def generate(n_samples, noise, n_repeat=1):
     plt.xlim([-5, 5])
     plt.title(name)
 
-    if n == 0:
-        plt.legend(loc="upper left", prop={"size": 11})
+    if n == n_estimators - 1:
+        plt.legend(loc=(1.1, .5))
 
     plt.subplot(2, n_estimators, n_estimators + n + 1)
     plt.plot(X_test, y_error, "r", label="$error(x)$")
@@ -178,7 +183,9 @@ def generate(n_samples, noise, n_repeat=1):
     plt.xlim([-5, 5])
     plt.ylim([0, 0.1])
 
-    if n == 0:
-        plt.legend(loc="upper left", prop={"size": 11})
+    if n == n_estimators - 1:
+
+        plt.legend(loc=(1.1, .5))
 
+plt.subplots_adjust(right=.75)
 plt.show()
diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
index 5c8543937beba..f46b7ece7cd78 100644
--- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
+++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
@@ -65,7 +65,8 @@
 
     print("Iteration %i %s" % (i, 70 * "_"))
     print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
-          % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))
+          % (n_labeled_points, n_total_samples - n_labeled_points,
+             n_total_samples))
 
     print(classification_report(true_labels, predicted_labels))
 
@@ -95,7 +96,7 @@
         # for more than 5 iterations, visualize the gain only on the first 5
         if i < 5:
             sub = f.add_subplot(5, 5, index + 1 + (5 * i))
-            sub.imshow(image, cmap=plt.cm.gray_r)
+            sub.imshow(image, cmap=plt.cm.gray_r, interpolation='none')
             sub.set_title("predict: %i\ntrue: %i" % (
                 lp_model.transduction_[image_index], y[image_index]), size=10)
             sub.axis('off')
@@ -108,6 +109,7 @@
     n_labeled_points += len(uncertainty_index)
 
 f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
-           "uncertain labels to learn with the next model.")
-plt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)
+           "uncertain labels to learn with the next model.", y=1.15)
+plt.subplots_adjust(left=0.2, bottom=0.03, right=0.9, top=0.9, wspace=0.2,
+                    hspace=0.85)
 plt.show()

From b1b77ffbf310a83123e643f831bde8de25ba53c3 Mon Sep 17 00:00:00 2001
From: pasbi <pasbi@users.noreply.github.com>
Date: Fri, 1 Sep 2017 11:10:01 +0200
Subject: [PATCH 05/10] Improve y parameter documentation for transformers
 (#9578)

---
 sklearn/decomposition/dict_learning.py   | 12 +++++++++---
 sklearn/decomposition/factor_analysis.py |  4 ++++
 sklearn/decomposition/fastica_.py        |  4 ++++
 sklearn/decomposition/incremental_pca.py |  4 +++-
 sklearn/decomposition/nmf.py             |  4 ++++
 sklearn/decomposition/online_lda.py      |  6 ++++++
 sklearn/decomposition/pca.py             | 10 ++++++++++
 sklearn/decomposition/sparse_pca.py      |  4 ++++
 sklearn/decomposition/truncated_svd.py   |  4 ++++
 sklearn/manifold/isomap.py               |  4 ++++
 sklearn/manifold/locally_linear.py       |  4 ++++
 sklearn/manifold/mds.py                  |  4 ++++
 sklearn/manifold/spectral_embedding_.py  |  6 ++++++
 sklearn/manifold/t_sne.py                |  4 ++++
 14 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 62cd2cd2aa101..7510efe508202 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -927,9 +927,9 @@ def fit(self, X, y=None):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
-            Training vector, where n_samples in the number of samples
-            and n_features is the number of features.
+        X : Ignored.
+
+        y : Ignored.
 
         Returns
         -------
@@ -1081,6 +1081,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
@@ -1251,6 +1253,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
@@ -1284,6 +1288,8 @@ def partial_fit(self, X, y=None, iter_offset=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         iter_offset : integer, optional
             The number of iteration on data batches that has been
             performed before this call to partial_fit. This is optional:
diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
index 4440ee90bd84a..1619d8e4da639 100644
--- a/sklearn/decomposition/factor_analysis.py
+++ b/sklearn/decomposition/factor_analysis.py
@@ -149,6 +149,8 @@ def fit(self, X, y=None):
         X : array-like, shape (n_samples, n_features)
             Training data.
 
+        y : Ignored.
+
         Returns
         -------
         self
@@ -338,6 +340,8 @@ def score(self, X, y=None):
         X : array, shape (n_samples, n_features)
             The data
 
+        y : Ignored.
+
         Returns
         -------
         ll : float
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index fcc11ff643a5e..4af514bc327b2 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -509,6 +509,8 @@ def fit_transform(self, X, y=None):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
@@ -524,6 +526,8 @@ def fit(self, X, y=None):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self
diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
index f0604001fab53..45828513bf95f 100644
--- a/sklearn/decomposition/incremental_pca.py
+++ b/sklearn/decomposition/incremental_pca.py
@@ -158,7 +158,7 @@ def fit(self, X, y=None):
             Training data, where n_samples is the number of samples and
             n_features is the number of features.
 
-        y : Passthrough for ``Pipeline`` compatibility.
+        y : Ignored.
 
         Returns
         -------
@@ -199,6 +199,8 @@ def partial_fit(self, X, y=None, check_input=True):
         check_input : bool
             Run check_array on X.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
index 153731cb83651..a8a744d7ff5e1 100644
--- a/sklearn/decomposition/nmf.py
+++ b/sklearn/decomposition/nmf.py
@@ -1211,6 +1211,8 @@ def fit_transform(self, X, y=None, W=None, H=None):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Data matrix to be decomposed
 
+        y : Ignored.
+
         W : array-like, shape (n_samples, n_components)
             If init='custom', it is used as initial guess for the solution.
 
@@ -1249,6 +1251,8 @@ def fit(self, X, y=None, **params):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Data matrix to be decomposed
 
+        y : Ignored.
+
         Returns
         -------
         self
diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
index e9743c69422fb..84293145a1c61 100644
--- a/sklearn/decomposition/online_lda.py
+++ b/sklearn/decomposition/online_lda.py
@@ -473,6 +473,8 @@ def partial_fit(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored.
+
         Returns
         -------
         self
@@ -515,6 +517,8 @@ def fit(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored.
+
         Returns
         -------
         self
@@ -714,6 +718,8 @@ def score(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored.
+
         Returns
         -------
         score : float
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 171774321cec0..bf167e4ae1b3c 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -319,6 +319,8 @@ def fit(self, X, y=None):
             Training data, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
@@ -336,6 +338,8 @@ def fit_transform(self, X, y=None):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
@@ -550,6 +554,8 @@ def score(self, X, y=None):
         X : array, shape(n_samples, n_features)
             The data.
 
+        y : Ignored.
+
         Returns
         -------
         ll : float
@@ -676,6 +682,8 @@ def fit(self, X, y=None):
             Training data, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
@@ -762,6 +770,8 @@ def fit_transform(self, X, y=None):
             New data, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
diff --git a/sklearn/decomposition/sparse_pca.py b/sklearn/decomposition/sparse_pca.py
index 47c03a80278b9..e0bd0debd04b5 100644
--- a/sklearn/decomposition/sparse_pca.py
+++ b/sklearn/decomposition/sparse_pca.py
@@ -107,6 +107,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
@@ -275,6 +277,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
diff --git a/sklearn/decomposition/truncated_svd.py b/sklearn/decomposition/truncated_svd.py
index 87b8b45e1543a..14925db8e6e0e 100644
--- a/sklearn/decomposition/truncated_svd.py
+++ b/sklearn/decomposition/truncated_svd.py
@@ -132,6 +132,8 @@ def fit(self, X, y=None):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Training data.
 
+        y : Ignored.
+
         Returns
         -------
         self : object
@@ -148,6 +150,8 @@ def fit_transform(self, X, y=None):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Training data.
 
+        y : Ignored.
+
         Returns
         -------
         X_new : array, shape (n_samples, n_components)
diff --git a/sklearn/manifold/isomap.py b/sklearn/manifold/isomap.py
index 1f6d0ae0dc0b1..6de1bfe7cdfb9 100644
--- a/sklearn/manifold/isomap.py
+++ b/sklearn/manifold/isomap.py
@@ -157,6 +157,8 @@ def fit(self, X, y=None):
             numpy array, precomputed tree, or NearestNeighbors
             object.
 
+        y: Ignored.
+
         Returns
         -------
         self : returns an instance of self.
@@ -173,6 +175,8 @@ def fit_transform(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y: Ignored.
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
diff --git a/sklearn/manifold/locally_linear.py b/sklearn/manifold/locally_linear.py
index e8705cff359a6..0cfeb04889907 100644
--- a/sklearn/manifold/locally_linear.py
+++ b/sklearn/manifold/locally_linear.py
@@ -652,6 +652,8 @@ def fit(self, X, y=None):
         X : array-like of shape [n_samples, n_features]
             training set.
 
+        y: Ignored.
+
         Returns
         -------
         self : returns an instance of self.
@@ -667,6 +669,8 @@ def fit_transform(self, X, y=None):
         X : array-like of shape [n_samples, n_features]
             training set.
 
+        y: Ignored.
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
diff --git a/sklearn/manifold/mds.py b/sklearn/manifold/mds.py
index 5f7327ef4dc84..c21a58689e8bc 100644
--- a/sklearn/manifold/mds.py
+++ b/sklearn/manifold/mds.py
@@ -379,6 +379,8 @@ def fit(self, X, y=None, init=None):
             Input data. If ``dissimilarity=='precomputed'``, the input should
             be the dissimilarity matrix.
 
+        y: Ignored.
+
         init : ndarray, shape (n_samples,), optional, default: None
             Starting configuration of the embedding to initialize the SMACOF
             algorithm. By default, the algorithm is initialized with a randomly
@@ -397,6 +399,8 @@ def fit_transform(self, X, y=None, init=None):
             Input data. If ``dissimilarity=='precomputed'``, the input should
             be the dissimilarity matrix.
 
+        y: Ignored.
+
         init : ndarray, shape (n_samples,), optional, default: None
             Starting configuration of the embedding to initialize the SMACOF
             algorithm. By default, the algorithm is initialized with a randomly
diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index a330b7da7f856..7b64870aa4906 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -428,6 +428,8 @@ def _get_affinity_matrix(self, X, Y=None):
             Interpret X as precomputed adjacency graph computed from
             samples.
 
+        Y: Ignored.
+
         Returns
         -------
         affinity_matrix, shape (n_samples, n_samples)
@@ -474,6 +476,8 @@ def fit(self, X, y=None):
             Interpret X as precomputed adjacency graph computed from
             samples.
 
+        Y: Ignored.
+
         Returns
         -------
         self : object
@@ -514,6 +518,8 @@ def fit_transform(self, X, y=None):
             Interpret X as precomputed adjacency graph computed from
             samples.
 
+        Y: Ignored.
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
index 163e8340f7b29..83c0b363fb5a7 100644
--- a/sklearn/manifold/t_sne.py
+++ b/sklearn/manifold/t_sne.py
@@ -851,6 +851,8 @@ def fit_transform(self, X, y=None):
             If the metric is 'precomputed' X must be a square distance
             matrix. Otherwise it contains a sample per row.
 
+        y : Ignored.
+
         Returns
         -------
         X_new : array, shape (n_samples, n_components)
@@ -870,6 +872,8 @@ def fit(self, X, y=None):
             matrix. Otherwise it contains a sample per row. If the method
             is 'exact', X may be a sparse matrix of type 'csr', 'csc'
             or 'coo'.
+
+        y : Ignored.
         """
         self.fit_transform(X)
         return self

From ecc96be8c5e831fd0a12f3274ed4a31dabcbffe6 Mon Sep 17 00:00:00 2001
From: Pravar D Mahajan <pravar.d.mahajan@gmail.com>
Date: Fri, 1 Sep 2017 05:26:19 -0400
Subject: [PATCH 06/10] [MRG] Raise exception on providing complex data to
 estimators (#9551)

* Modifies model_selection.cross_validate docstring (#9534)

- Fixes rendering of docstring examples
- Instead of importing cross_val_score in example, cross_validate is imported

* raise error on complex data input to estimators

* Raise exception on providing complex data to estimators

* adding checks to check_estimator for complex data

* removing some unnecessary parts

* autopep8 changes

* removing ipdb, restoring some autopep8 fixes

* removing ipdb, restoring some autopep8 fixes

* adding documentation for complex data handling

* adding one line explanation for each test case
---
 sklearn/utils/estimator_checks.py      | 11 +++++++
 sklearn/utils/tests/test_validation.py | 40 ++++++++++++++++++++++++++
 sklearn/utils/validation.py            | 28 +++++++++++++++++-
 3 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 81f0d88e3f02b..3e7cb198a9d12 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -76,6 +76,7 @@ def _yield_non_meta_checks(name, estimator):
     yield check_sample_weights_pandas_series
     yield check_sample_weights_list
     yield check_estimators_fit_returns_self
+    yield check_complex_data
 
     # Check that all estimator yield informative messages when
     # trained on empty datasets
@@ -458,6 +459,16 @@ def check_dtype_object(name, estimator_orig):
     assert_raises_regex(TypeError, msg, estimator.fit, X, y)
 
 
+def check_complex_data(name, estimator_orig):
+    # check that estimators raise an exception on providing complex data
+    X = np.random.sample(10) + 1j * np.random.sample(10)
+    X = X.reshape(-1, 1)
+    y = np.random.sample(10) + 1j * np.random.sample(10)
+    estimator = clone(estimator_orig)
+    assert_raises_regex(ValueError, "Complex data not supported",
+                        estimator.fit, X, y)
+
+
 @ignore_warnings
 def check_dict_unchanged(name, estimator_orig):
     # this estimator raises
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 6bebad884d835..dcfaa81178b79 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -437,6 +437,46 @@ def test_check_array_min_samples_and_features_messages():
     assert_array_equal(y, y_checked)
 
 
+def test_check_array_complex_data_error():
+    # np array
+    X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])
+    assert_raises_regexp(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # list of lists
+    X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]
+    assert_raises_regexp(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # tuple of tuples
+    X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j))
+    assert_raises_regexp(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # list of np arrays
+    X = [np.array([1 + 2j, 3 + 4j, 5 + 7j]),
+         np.array([2 + 3j, 4 + 5j, 6 + 7j])]
+    assert_raises_regexp(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # tuple of np arrays
+    X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]),
+         np.array([2 + 3j, 4 + 5j, 6 + 7j]))
+    assert_raises_regexp(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # dataframe
+    X = MockDataFrame(
+        np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]))
+    assert_raises_regexp(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # sparse matrix
+    X = sp.coo_matrix([[0, 1 + 2j], [0, 0]])
+    assert_raises_regexp(
+        ValueError, "Complex data not supported", check_array, X)
+
+
 def test_has_fit_parameter():
     assert_false(has_fit_parameter(KNeighborsClassifier, "sample_weight"))
     assert_true(has_fit_parameter(RandomForestRegressor, "sample_weight"))
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 5847b540d7b6c..080c30fcf9b2c 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -13,6 +13,7 @@
 
 import numpy as np
 import scipy.sparse as sp
+from numpy.core.numeric import ComplexWarning
 
 from ..externals import six
 from ..utils.fixes import signature
@@ -307,6 +308,13 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy,
     return spmatrix
 
 
+def _ensure_no_complex_data(array):
+    if hasattr(array, 'dtype') and array.dtype is not None \
+            and hasattr(array.dtype, 'kind') and array.dtype.kind == "c":
+        raise ValueError("Complex data not supported\n"
+                         "{}\n".format(array))
+
+
 def check_array(array, accept_sparse=False, dtype="numeric", order=None,
                 copy=False, force_all_finite=True, ensure_2d=True,
                 allow_nd=False, ensure_min_samples=1, ensure_min_features=1,
@@ -427,10 +435,28 @@ def check_array(array, accept_sparse=False, dtype="numeric", order=None,
     context = " by %s" % estimator_name if estimator is not None else ""
 
     if sp.issparse(array):
+        _ensure_no_complex_data(array)
         array = _ensure_sparse_format(array, accept_sparse, dtype, copy,
                                       force_all_finite)
     else:
-        array = np.array(array, dtype=dtype, order=order, copy=copy)
+        # If np.array(..) gives ComplexWarning, then we convert the warning
+        # to an error. This is needed because specifying a non complex
+        # dtype to the function converts complex to real dtype,
+        # thereby passing the test made in the lines following the scope
+        # of warnings context manager.
+        with warnings.catch_warnings():
+            try:
+                warnings.simplefilter('error', ComplexWarning)
+                array = np.array(array, dtype=dtype, order=order, copy=copy)
+            except ComplexWarning:
+                raise ValueError("Complex data not supported\n"
+                                 "{}\n".format(array))
+
+        # It is possible that the np.array(..) gave no warning. This happens
+        # when no dtype conversion happend, for example dtype = None. The
+        # result is that np.array(..) produces an array of complex dtype
+        # and we need to catch and raise exception for such cases.
+        _ensure_no_complex_data(array)
 
         if ensure_2d:
             if array.ndim == 1:

From 846313b57009dd9f8340f5f7004bc2bf1119b709 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 1 Sep 2017 12:53:59 +0200
Subject: [PATCH 07/10] [MRG+1] Deprecate sklearn.utils.testing.raises and
 remove it from tests (#9660)

---
 sklearn/datasets/tests/test_lfw.py            | 18 +++--
 .../datasets/tests/test_svmlight_format.py    | 20 ++----
 .../tests/test_gaussian_process.py            |  5 +-
 sklearn/linear_model/tests/test_logistic.py   |  5 +-
 sklearn/linear_model/tests/test_sgd.py        | 67 ++++++++-----------
 sklearn/linear_model/tests/test_theil_sen.py  | 18 ++---
 sklearn/svm/tests/test_bounds.py              |  8 +--
 sklearn/tree/tests/test_tree.py               |  4 +-
 sklearn/utils/testing.py                      | 11 ++-
 9 files changed, 69 insertions(+), 87 deletions(-)

diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 3e5875a060be1..ac6395c4958be 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -28,7 +28,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.testing import raises
+from sklearn.utils.testing import assert_raises
 
 
 SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
@@ -110,10 +110,9 @@ def teardown_module():
         shutil.rmtree(SCIKIT_LEARN_EMPTY_DATA)
 
 
-@raises(IOError)
 def test_load_empty_lfw_people():
-    fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA,
-                     download_if_missing=False)
+    assert_raises(IOError, fetch_lfw_people, data_home=SCIKIT_LEARN_EMPTY_DATA,
+                  download_if_missing=False)
 
 
 def test_load_fake_lfw_people():
@@ -148,16 +147,15 @@ def test_load_fake_lfw_people():
                         'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])
 
 
-@raises(ValueError)
 def test_load_fake_lfw_people_too_restrictive():
-    fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100,
-                     download_if_missing=False)
+    assert_raises(ValueError, fetch_lfw_people, data_home=SCIKIT_LEARN_DATA,
+                  min_faces_per_person=100, download_if_missing=False)
 
 
-@raises(IOError)
 def test_load_empty_lfw_pairs():
-    fetch_lfw_pairs(data_home=SCIKIT_LEARN_EMPTY_DATA,
-                    download_if_missing=False)
+    assert_raises(IOError, fetch_lfw_pairs,
+                  data_home=SCIKIT_LEARN_EMPTY_DATA,
+                  download_if_missing=False)
 
 
 def test_load_fake_lfw_pairs():
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index d688dc798237b..2e3b7982476b0 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -15,7 +15,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
-from sklearn.utils.testing import raises
 from sklearn.utils.testing import assert_in
 from sklearn.utils.fixes import sp_version
 
@@ -138,20 +137,17 @@ def test_load_compressed():
     assert_array_equal(y, ybz)
 
 
-@raises(ValueError)
 def test_load_invalid_file():
-    load_svmlight_file(invalidfile)
+    assert_raises(ValueError, load_svmlight_file, invalidfile)
 
 
-@raises(ValueError)
 def test_load_invalid_order_file():
-    load_svmlight_file(invalidfile2)
+    assert_raises(ValueError, load_svmlight_file, invalidfile2)
 
 
-@raises(ValueError)
 def test_load_zero_based():
     f = BytesIO(b("-1 4:1.\n1 0:1\n"))
-    load_svmlight_file(f, zero_based=False)
+    assert_raises(ValueError, load_svmlight_file, f, zero_based=False)
 
 
 def test_load_zero_based_auto():
@@ -186,21 +182,19 @@ def test_load_with_qid():
         assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]])
 
 
-@raises(ValueError)
 def test_load_invalid_file2():
-    load_svmlight_files([datafile, invalidfile, datafile])
+    assert_raises(ValueError, load_svmlight_files,
+                  [datafile, invalidfile, datafile])
 
 
-@raises(TypeError)
 def test_not_a_filename():
     # in python 3 integers are valid file opening arguments (taken as unix
     # file descriptors)
-    load_svmlight_file(.42)
+    assert_raises(TypeError, load_svmlight_file, .42)
 
 
-@raises(IOError)
 def test_invalid_filename():
-    load_svmlight_file("trou pic nic douille")
+    assert_raises(IOError, load_svmlight_file, "trou pic nic douille")
 
 
 def test_dump():
diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
index 860e3f290f3ea..37d872fc99fb5 100644
--- a/sklearn/gaussian_process/tests/test_gaussian_process.py
+++ b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -11,7 +11,7 @@
 from sklearn.gaussian_process import regression_models as regression
 from sklearn.gaussian_process import correlation_models as correlation
 from sklearn.datasets import make_regression
-from sklearn.utils.testing import assert_greater, assert_true, raises
+from sklearn.utils.testing import assert_greater, assert_true, assert_raises
 
 
 f = lambda x: x * np.sin(x)
@@ -95,10 +95,9 @@ def test_2d_2d(regr=regression.constant, corr=correlation.squared_exponential,
     assert_true(np.allclose(y_pred, y) and np.allclose(MSE, 0.))
 
 
-@raises(ValueError)
 def test_wrong_number_of_outputs():
     gp = GaussianProcess()
-    gp.fit([[1, 2, 3], [4, 5, 6]], [1, 2, 3])
+    assert_raises(ValueError, gp.fit, [[1, 2, 3], [4, 5, 6]], [1, 2, 3])
 
 
 def test_more_builtin_correlation_models(random_start=1):
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 94eb3ea3d2dcb..ea4300df01100 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -17,7 +17,6 @@
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import raises
 
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model.logistic import (
@@ -249,13 +248,13 @@ def test_write_parameters():
     assert_array_almost_equal(clf.decision_function(X), 0)
 
 
-@raises(ValueError)
 def test_nan():
     # Test proper NaN handling.
     # Regression test for Issue #252: fit used to go into an infinite loop.
     Xnan = np.array(X, dtype=np.float64)
     Xnan[0, 1] = np.nan
-    LogisticRegression(random_state=0).fit(Xnan, Y1)
+    logistic = LogisticRegression(random_state=0)
+    assert_raises(ValueError, logistic.fit, Xnan, Y1)
 
 
 def test_consistency_path():
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index f033a4f6021b2..d4552a9934cf1 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -9,7 +9,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import raises
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_false, assert_true
 from sklearn.utils.testing import assert_equal
@@ -266,11 +265,11 @@ def test_late_onset_averaging_reached(self):
                                   decimal=16)
         assert_almost_equal(clf1.intercept_, average_intercept, decimal=16)
 
-    @raises(ValueError)
     def test_sgd_bad_alpha_for_optimal_learning_rate(self):
         # Check whether expected ValueError on bad alpha, i.e. 0
         # since alpha is used to compute the optimal learning rate
-        self.factory(alpha=0, learning_rate="optimal")
+        assert_raises(ValueError, self.factory,
+                      alpha=0, learning_rate="optimal")
 
 
 class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest):
@@ -287,63 +286,56 @@ def test_sgd(self):
             # assert_almost_equal(clf.coef_[0], clf.coef_[1], decimal=7)
             assert_array_equal(clf.predict(T), true_result)
 
-    @raises(ValueError)
     def test_sgd_bad_l1_ratio(self):
         # Check whether expected ValueError on bad l1_ratio
-        self.factory(l1_ratio=1.1)
+        assert_raises(ValueError, self.factory, l1_ratio=1.1)
 
-    @raises(ValueError)
     def test_sgd_bad_learning_rate_schedule(self):
         # Check whether expected ValueError on bad learning_rate
-        self.factory(learning_rate="<unknown>")
+        assert_raises(ValueError, self.factory, learning_rate="<unknown>")
 
-    @raises(ValueError)
     def test_sgd_bad_eta0(self):
         # Check whether expected ValueError on bad eta0
-        self.factory(eta0=0, learning_rate="constant")
+        assert_raises(ValueError, self.factory, eta0=0,
+                      learning_rate="constant")
 
-    @raises(ValueError)
     def test_sgd_bad_alpha(self):
         # Check whether expected ValueError on bad alpha
-        self.factory(alpha=-.1)
+        assert_raises(ValueError, self.factory, alpha=-.1)
 
-    @raises(ValueError)
     def test_sgd_bad_penalty(self):
         # Check whether expected ValueError on bad penalty
-        self.factory(penalty='foobar', l1_ratio=0.85)
+        assert_raises(ValueError, self.factory, penalty='foobar',
+                      l1_ratio=0.85)
 
-    @raises(ValueError)
     def test_sgd_bad_loss(self):
         # Check whether expected ValueError on bad loss
-        self.factory(loss="foobar")
+        assert_raises(ValueError, self.factory, loss="foobar")
 
-    @raises(ValueError)
     def test_sgd_max_iter_param(self):
         # Test parameter validity check
-        self.factory(max_iter=-10000)
+        assert_raises(ValueError, self.factory, max_iter=-10000)
 
-    @raises(ValueError)
     def test_sgd_shuffle_param(self):
         # Test parameter validity check
-        self.factory(shuffle="false")
+        assert_raises(ValueError, self.factory, shuffle="false")
 
-    @raises(TypeError)
     def test_argument_coef(self):
         # Checks coef_init not allowed as model argument (only fit)
-        # Provided coef_ does not match dataset.
-        self.factory(coef_init=np.zeros((3,))).fit(X, Y)
+        # Provided coef_ does not match dataset
+        assert_raises(TypeError, self.factory, coef_init=np.zeros((3,)))
 
-    @raises(ValueError)
     def test_provide_coef(self):
         # Checks coef_init shape for the warm starts
         # Provided coef_ does not match dataset.
-        self.factory().fit(X, Y, coef_init=np.zeros((3,)))
+        assert_raises(ValueError, self.factory().fit,
+                      X, Y, coef_init=np.zeros((3,)))
 
-    @raises(ValueError)
     def test_set_intercept(self):
         # Checks intercept_ shape for the warm starts
         # Provided intercept_ does not match dataset.
-        self.factory().fit(X, Y, intercept_init=np.zeros((3,)))
+        assert_raises(ValueError, self.factory().fit,
+                      X, Y, intercept_init=np.zeros((3,)))
 
     def test_set_intercept_binary(self):
         # Checks intercept_ shape for the warm starts in binary case
@@ -386,10 +378,10 @@ def test_set_intercept_to_intercept(self):
         clf = self.factory().fit(X, Y)
         self.factory().fit(X, Y, intercept_init=clf.intercept_)
 
-    @raises(ValueError)
     def test_sgd_at_least_two_labels(self):
         # Target must have at least two labels
-        self.factory(alpha=0.01, max_iter=20).fit(X2, np.ones(9))
+        clf = self.factory(alpha=0.01, max_iter=20)
+        assert_raises(ValueError, clf.fit, X2, np.ones(9))
 
     def test_partial_fit_weight_class_balanced(self):
         # partial_fit with class_weight='balanced' not supported"""
@@ -607,17 +599,15 @@ def test_equal_class_weight(self):
         # should be similar up to some epsilon due to learning rate schedule
         assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
 
-    @raises(ValueError)
     def test_wrong_class_weight_label(self):
         # ValueError due to not existing class label.
         clf = self.factory(alpha=0.1, max_iter=1000, class_weight={0: 0.5})
-        clf.fit(X, Y)
+        assert_raises(ValueError, clf.fit, X, Y)
 
-    @raises(ValueError)
     def test_wrong_class_weight_format(self):
         # ValueError due to wrong class_weight argument type.
         clf = self.factory(alpha=0.1, max_iter=1000, class_weight=[0.5])
-        clf.fit(X, Y)
+        assert_raises(ValueError, clf.fit, X, Y)
 
     def test_weights_multiplied(self):
         # Tests that class_weight and sample_weight are multiplicative
@@ -700,18 +690,16 @@ def test_sample_weights(self):
         # the prediction on this point should shift
         assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
 
-    @raises(ValueError)
     def test_wrong_sample_weights(self):
         # Test if ValueError is raised if sample_weight has wrong shape
         clf = self.factory(alpha=0.1, max_iter=1000, fit_intercept=False)
         # provided sample_weight too long
-        clf.fit(X, Y, sample_weight=np.arange(7))
+        assert_raises(ValueError, clf.fit, X, Y, sample_weight=np.arange(7))
 
-    @raises(ValueError)
     def test_partial_fit_exception(self):
         clf = self.factory(alpha=0.01)
         # classes was not specified
-        clf.partial_fit(X3, Y3)
+        assert_raises(ValueError, clf.partial_fit, X3, Y3)
 
     def test_partial_fit_binary(self):
         third = X.shape[0] // 3
@@ -851,15 +839,14 @@ def test_sgd(self):
         clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
         assert_equal(clf.coef_[0], clf.coef_[1])
 
-    @raises(ValueError)
     def test_sgd_bad_penalty(self):
         # Check whether expected ValueError on bad penalty
-        self.factory(penalty='foobar', l1_ratio=0.85)
+        assert_raises(ValueError, self.factory,
+                      penalty='foobar', l1_ratio=0.85)
 
-    @raises(ValueError)
     def test_sgd_bad_loss(self):
         # Check whether expected ValueError on bad loss
-        self.factory(loss="foobar")
+        assert_raises(ValueError, self.factory, loss="foobar")
 
     def test_sgd_averaged_computed_correctly(self):
         # Tests the average regressor matches the naive implementation
diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py
index 279beb8014e95..3a2b1f9dc006f 100644
--- a/sklearn/linear_model/tests/test_theil_sen.py
+++ b/sklearn/linear_model/tests/test_theil_sen.py
@@ -20,7 +20,7 @@
 from sklearn.linear_model.theil_sen import _spatial_median, _breakdown_point
 from sklearn.linear_model.theil_sen import _modified_weiszfeld_step
 from sklearn.utils.testing import (
-        assert_almost_equal, assert_greater, assert_less, raises,
+        assert_almost_equal, assert_greater, assert_less, assert_raises,
 )
 
 
@@ -202,31 +202,31 @@ def test_calc_breakdown_point():
     assert_less(np.abs(bp - 1 + 1 / (np.sqrt(2))), 1.e-6)
 
 
-@raises(ValueError)
 def test_checksubparams_negative_subpopulation():
     X, y, w, c = gen_toy_problem_1d()
-    TheilSenRegressor(max_subpopulation=-1, random_state=0).fit(X, y)
+    theil_sen = TheilSenRegressor(max_subpopulation=-1, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
 
 
-@raises(ValueError)
 def test_checksubparams_too_few_subsamples():
     X, y, w, c = gen_toy_problem_1d()
-    TheilSenRegressor(n_subsamples=1, random_state=0).fit(X, y)
+    theil_sen = TheilSenRegressor(n_subsamples=1, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
 
 
-@raises(ValueError)
 def test_checksubparams_too_many_subsamples():
     X, y, w, c = gen_toy_problem_1d()
-    TheilSenRegressor(n_subsamples=101, random_state=0).fit(X, y)
+    theil_sen = TheilSenRegressor(n_subsamples=101, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
 
 
-@raises(ValueError)
 def test_checksubparams_n_subsamples_if_less_samples_than_features():
     random_state = np.random.RandomState(0)
     n_samples, n_features = 10, 20
     X = random_state.normal(size=(n_samples, n_features))
     y = random_state.normal(size=n_samples)
-    TheilSenRegressor(n_subsamples=9, random_state=0).fit(X, y)
+    theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
 
 
 def test_subpopulation():
diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py
index 583c413bc5c11..e46dbb92df44a 100644
--- a/sklearn/svm/tests/test_bounds.py
+++ b/sklearn/svm/tests/test_bounds.py
@@ -5,7 +5,7 @@
 from sklearn.svm import LinearSVC
 from sklearn.linear_model.logistic import LogisticRegression
 
-from sklearn.utils.testing import assert_true, raises
+from sklearn.utils.testing import assert_true, assert_raises
 from sklearn.utils.testing import assert_raise_message
 
 
@@ -63,13 +63,11 @@ def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=None):
                 (np.asarray(clf.intercept_) != 0).any())
 
 
-@raises(ValueError)
 def test_ill_posed_min_c():
     X = [[0, 0], [0, 0]]
     y = [0, 1]
-    l1_min_c(X, y)
+    assert_raises(ValueError, l1_min_c, X, y)
 
 
-@raises(ValueError)
 def test_unsupported_loss():
-    l1_min_c(dense_X, Y1, 'l1')
+    assert_raises(ValueError, l1_min_c, dense_X, Y1, 'l1')
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 97eee80ecff71..71ee8fa2bcb61 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -30,7 +30,6 @@
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import raises
 from sklearn.utils.testing import ignore_warnings
 
 from sklearn.utils.validation import check_random_state
@@ -394,11 +393,10 @@ def test_importances():
                        clf2.feature_importances_)
 
 
-@raises(ValueError)
 def test_importances_raises():
     # Check if variable importance before fit raises ValueError.
     clf = DecisionTreeClassifier()
-    clf.feature_importances_
+    assert_raises(ValueError, getattr, clf, 'feature_importances_')
 
 
 def test_importances_gini_equal_mse():
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 4e7f7ea3e98a3..c5467f199697f 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -45,8 +45,17 @@
 import sklearn
 from sklearn.base import BaseEstimator
 from sklearn.externals import joblib
+from sklearn.utils import deprecated
 
-from nose.tools import raises
+try:
+    from nose.tools import raises as _nose_raises
+    deprecation_message = (
+        'sklearn.utils.testing.raises has been deprecated in version 0.20 '
+        'and will be removed in 0.22. Please use '
+        'sklearn.utils.testing.assert_raises instead.')
+    raises = deprecated(deprecation_message)(_nose_raises)
+except ImportError:
+    pass
 from nose import with_setup
 
 from numpy.testing import assert_almost_equal

From 6e01feff14e2016253b5ada96964a1b5b6145128 Mon Sep 17 00:00:00 2001
From: Minghui Liu <minghui.kevin.liu@gmail.com>
Date: Fri, 1 Sep 2017 13:13:12 +0200
Subject: [PATCH 08/10] OPTIM make GaussianProcessRegressor faster with
 return_std=True

---
 doc/whats_new.rst                          |  8 ++++++++
 sklearn/gaussian_process/gpr.py            | 17 ++++++++++++-----
 sklearn/gaussian_process/tests/test_gpr.py | 22 +++++++++++++++++++++-
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 0ca707ce2cbbf..258d6acc11aa8 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -43,6 +43,14 @@ Classifiers and regressors
 Enhancements
 ............
 
+Classifiers and regressors
+
+- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
+  is faster when using ``return_std=True`` in particular more when called
+  several times in a row. :issue:`9234` by :user:`andrewww <andrewww>`
+  and :user:`Minghui Liu <minghui-liu>`.
+
+
 Model evaluation and meta-estimators
 
 - A scorer based on :func:`metrics.brier_score_loss` is also available.
diff --git a/sklearn/gaussian_process/gpr.py b/sklearn/gaussian_process/gpr.py
index 4f9ff9cee7911..c92ca7f68f368 100644
--- a/sklearn/gaussian_process/gpr.py
+++ b/sklearn/gaussian_process/gpr.py
@@ -245,6 +245,8 @@ def obj_func(theta, eval_gradient=True):
         K[np.diag_indices_from(K)] += self.alpha
         try:
             self.L_ = cholesky(K, lower=True)  # Line 2
+            # self.L_ changed, self._K_inv needs to be recomputed
+            self._K_inv = None
         except np.linalg.LinAlgError as exc:
             exc.args = ("The kernel, %s, is not returning a "
                         "positive definite matrix. Try gradually "
@@ -320,13 +322,18 @@ def predict(self, X, return_std=False, return_cov=False):
                 y_cov = self.kernel_(X) - K_trans.dot(v)  # Line 6
                 return y_mean, y_cov
             elif return_std:
-                # compute inverse K_inv of K based on its Cholesky
-                # decomposition L and its inverse L_inv
-                L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0]))
-                K_inv = L_inv.dot(L_inv.T)
+                # cache result of K_inv computation
+                if self._K_inv is None:
+                    # compute inverse K_inv of K based on its Cholesky
+                    # decomposition L and its inverse L_inv
+                    L_inv = solve_triangular(self.L_.T,
+                                             np.eye(self.L_.shape[0]))
+                    self._K_inv = L_inv.dot(L_inv.T)
+
                 # Compute variance of predictive distribution
                 y_var = self.kernel_.diag(X)
-                y_var -= np.einsum("ij,ij->i", np.dot(K_trans, K_inv), K_trans)
+                y_var -= np.einsum("ij,ij->i",
+                                   np.dot(K_trans, self._K_inv), K_trans)
 
                 # Check if any of the variances is negative because of
                 # numerical issues. If yes: set the variance to 0.
diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py
index b645a6be18e22..602b2b88ae9c9 100644
--- a/sklearn/gaussian_process/tests/test_gpr.py
+++ b/sklearn/gaussian_process/tests/test_gpr.py
@@ -15,11 +15,13 @@
 from sklearn.utils.testing \
     import (assert_true, assert_greater, assert_array_less,
             assert_almost_equal, assert_equal, assert_raise_message,
-            assert_array_almost_equal)
+            assert_array_almost_equal, assert_array_equal)
 
 
 def f(x):
     return x * np.sin(x)
+
+
 X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T
 X2 = np.atleast_2d([2., 4., 5.5, 6.5, 7.5]).T
 y = f(X).ravel()
@@ -344,3 +346,21 @@ def test_no_fit_default_predict():
 
     assert_array_almost_equal(y_std1, y_std2)
     assert_array_almost_equal(y_cov1, y_cov2)
+
+
+def test_K_inv_reset():
+    y2 = f(X2).ravel()
+    for kernel in kernels:
+        # Test that self._K_inv is reset after a new fit
+        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        assert_true(hasattr(gpr, '_K_inv'))
+        assert_true(gpr._K_inv is None)
+        gpr.predict(X, return_std=True)
+        assert_true(gpr._K_inv is not None)
+        gpr.fit(X2, y2)
+        assert_true(gpr._K_inv is None)
+        gpr.predict(X2, return_std=True)
+        gpr2 = GaussianProcessRegressor(kernel=kernel).fit(X2, y2)
+        gpr2.predict(X2, return_std=True)
+        # the value of K_inv should be independent of the first fit
+        assert_array_equal(gpr._K_inv, gpr2._K_inv)

From deaa96452a981e3e54dc302fc14cb1c83cb2e399 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 1 Sep 2017 14:19:19 +0200
Subject: [PATCH 09/10] Fix test_validation.py

---
 sklearn/utils/tests/test_validation.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index dcfaa81178b79..37a0eb859f565 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -438,42 +438,41 @@ def test_check_array_min_samples_and_features_messages():
 
 
 def test_check_array_complex_data_error():
-    # np array
     X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])
-    assert_raises_regexp(
+    assert_raises_regex(
         ValueError, "Complex data not supported", check_array, X)
 
     # list of lists
     X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]
-    assert_raises_regexp(
+    assert_raises_regex(
         ValueError, "Complex data not supported", check_array, X)
 
     # tuple of tuples
     X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j))
-    assert_raises_regexp(
+    assert_raises_regex(
         ValueError, "Complex data not supported", check_array, X)
 
     # list of np arrays
     X = [np.array([1 + 2j, 3 + 4j, 5 + 7j]),
          np.array([2 + 3j, 4 + 5j, 6 + 7j])]
-    assert_raises_regexp(
+    assert_raises_regex(
         ValueError, "Complex data not supported", check_array, X)
 
     # tuple of np arrays
     X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]),
          np.array([2 + 3j, 4 + 5j, 6 + 7j]))
-    assert_raises_regexp(
+    assert_raises_regex(
         ValueError, "Complex data not supported", check_array, X)
 
     # dataframe
     X = MockDataFrame(
         np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]))
-    assert_raises_regexp(
+    assert_raises_regex(
         ValueError, "Complex data not supported", check_array, X)
 
     # sparse matrix
     X = sp.coo_matrix([[0, 1 + 2j], [0, 0]])
-    assert_raises_regexp(
+    assert_raises_regex(
         ValueError, "Complex data not supported", check_array, X)
 
 

From 233a3e53478aaeb4999728d19402580d6302c726 Mon Sep 17 00:00:00 2001
From: RAKOTOARISON Herilalaina <rkt.herilalaina@gmail.com>
Date: Sun, 3 Sep 2017 00:54:35 +0200
Subject: [PATCH 10/10] ENH Add named_estimator_ for votingClassifier (#9168)

---
 doc/whats_new.rst                                |  4 ++++
 sklearn/ensemble/tests/test_voting_classifier.py |  7 +++++++
 sklearn/ensemble/voting_classifier.py            | 14 +++++++++++++-
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 258d6acc11aa8..88aa6cd7c0404 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -50,6 +50,10 @@ Classifiers and regressors
   several times in a row. :issue:`9234` by :user:`andrewww <andrewww>`
   and :user:`Minghui Liu <minghui-liu>`.
 
+- Add `named_estimators_` parameter in
+  :class:`sklearn.ensemble.voting_classifier` to access fitted
+  estimators. :issue:`9157` by :user:`Herilalaina Rakotoarison <herilalaina>`.
+
 
 Model evaluation and meta-estimators
 
diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py
index 023be79912d12..22665384ed7ce 100644
--- a/sklearn/ensemble/tests/test_voting_classifier.py
+++ b/sklearn/ensemble/tests/test_voting_classifier.py
@@ -296,7 +296,14 @@ def test_set_params():
     clf3 = GaussianNB()
     eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
                              weights=[1, 2])
+    assert_true('lr' in eclf1.named_estimators)
+    assert_true(eclf1.named_estimators.lr is eclf1.estimators[0][1])
+    assert_true(eclf1.named_estimators.lr is eclf1.named_estimators['lr'])
     eclf1.fit(X, y)
+    assert_true('lr' in eclf1.named_estimators_)
+    assert_true(eclf1.named_estimators_.lr is eclf1.estimators_[0])
+    assert_true(eclf1.named_estimators_.lr is eclf1.named_estimators_['lr'])
+
     eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
                              weights=[1, 2])
     eclf2.set_params(nb=clf2).fit(X, y)
diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index ad6c0125dd664..26bc8e66df01a 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -21,6 +21,7 @@
 from ..externals.joblib import Parallel, delayed
 from ..utils.validation import has_fit_parameter, check_is_fitted
 from ..utils.metaestimators import _BaseComposition
+from ..utils import Bunch
 
 
 def _parallel_fit_estimator(estimator, X, y, sample_weight=None):
@@ -75,6 +76,11 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
         The collection of fitted sub-estimators as defined in ``estimators``
         that are not `None`.
 
+    named_estimators_ : Bunch object, a dictionary with attribute access
+        Attribute to access any fitted sub-estimators by name.
+
+        .. versionadded:: 0.20
+
     classes_ : array-like, shape = [n_predictions]
         The classes labels.
 
@@ -94,6 +100,9 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
     >>> eclf1 = eclf1.fit(X, y)
     >>> print(eclf1.predict(X))
     [1 1 1 2 2 2]
+    >>> np.array_equal(eclf1.named_estimators_.lr.predict(X),
+    ...                eclf1.named_estimators_['lr'].predict(X))
+    True
     >>> eclf2 = VotingClassifier(estimators=[
     ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
     ...         voting='soft')
@@ -122,7 +131,7 @@ def __init__(self, estimators, voting='hard', weights=None, n_jobs=1,
 
     @property
     def named_estimators(self):
-        return dict(self.estimators)
+        return Bunch(**dict(self.estimators))
 
     def fit(self, X, y, sample_weight=None):
         """ Fit the estimators.
@@ -188,6 +197,9 @@ def fit(self, X, y, sample_weight=None):
                                                  sample_weight=sample_weight)
                 for clf in clfs if clf is not None)
 
+        self.named_estimators_ = Bunch(**dict())
+        for k, e in zip(self.estimators, self.estimators_):
+            self.named_estimators_[k[0]] = e
         return self
 
     @property