scikit-learn · GaelVaroquaux · Oct 22, 2015 · Oct 17, 2015 · Oct 20, 2015 · Oct 17, 2015
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
@@ -209,7 +209,8 @@ def fit(self, X, y, sample_weight=None):
             Returns self.
         """
         # Validate or convert input data
-        X = check_array(X, dtype=DTYPE, accept_sparse="csc")
+        X = check_array(X, accept_sparse="csc", dtype=DTYPE)
+        y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None)
         if issparse(X):
             # Pre-sort indices to avoid that each individual tree of the
             # ensemble sorts the indices.

diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
@@ -1642,7 +1642,7 @@ def fit(self, X, y):
         # X and y must be of type float64
         X = check_array(X, dtype=np.float64, order='F',
                         copy=self.copy_X and self.fit_intercept)
-        y = np.asarray(y, dtype=np.float64)
+        y = check_array(y, dtype=np.float64, ensure_2d=False)
 
         if hasattr(self, 'l1_ratio'):
             model_str = 'ElasticNet'

diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
@@ -10,8 +10,8 @@
 from ..base import BaseEstimator, ClassifierMixin
 from ..preprocessing import LabelEncoder
 from ..multiclass import _ovr_decision_function
-from ..utils import check_array, check_random_state, column_or_1d
-from ..utils import compute_class_weight, deprecated
+from ..utils import check_array, check_random_state, column_or_1d, check_X_y
+from ..utils import ConvergenceWarning, compute_class_weight, deprecated
 from ..utils.extmath import safe_sparse_dot
 from ..utils.validation import check_is_fitted
 from ..utils.multiclass import check_classification_targets
@@ -151,7 +151,8 @@ def fit(self, X, y, sample_weight=None):
             raise TypeError("Sparse precomputed kernels are not supported.")
         self._sparse = sparse and not callable(self.kernel)
 
-        X = check_array(X, accept_sparse='csr', dtype=np.float64, order='C')
+        #X = check_array(X, accept_sparse='csr', dtype=np.float64, order='C')
+        X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
         y = self._validate_targets(y)
 
         sample_weight = np.asarray([]

diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
@@ -28,7 +28,7 @@
 from ..base import RegressorMixin
 from ..externals import six
 from ..feature_selection.from_model import _LearntSelectorMixin
-from ..utils import check_array
+from ..utils import check_array, check_X_y
 from ..utils import check_random_state
 from ..utils import compute_sample_weight
 from ..utils.multiclass import check_classification_targets
@@ -151,6 +151,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         random_state = check_random_state(self.random_state)
         if check_input:
             X = check_array(X, dtype=DTYPE, accept_sparse="csc")
+            y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None)
             if issparse(X):
                 X.sort_indices()
 

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -131,6 +131,30 @@ def _yield_classifier_checks(name, Classifier):
     if 'class_weight' in Classifier().get_params().keys():
         yield check_class_weight_classifiers
 
+def check_supervised_y_no_nan(name, Estimator):
+    # Checks that the Estimator targets are not NaN.
+
+    warnings.simplefilter("ignore")
+    np.random.seed(888)
+    X = np.random.randn(10, 5)
+    y1 = np.random.randn(10) / 0.
+    y2 = np.random.randn(10, 2) / 0.
+
+    errmsg = "Input contains NaN, infinity or a value too large for " \
+             "dtype('float64')."
+    try:
+        if "MultiTask" in name:
+            Estimator().fit(X, y2)
+        else:
+            Estimator().fit(X, y1)
+    except ValueError as e:
+        if str(e) != errmsg:
+            raise ValueError("Estimator {0} raised warning as expected, but "
+                             "does not match expected error message" \
+                             .format(name))
+    else:
+        raise ValueError("Estimator {0} should have raised error on fitting "
+                         "array y with NaN value.".format(name))
 
 def _yield_regressor_checks(name, Regressor):
     # TODO: test with intercept
@@ -141,6 +165,7 @@ def _yield_regressor_checks(name, Regressor):
     yield check_estimators_partial_fit_n_features
     yield check_regressors_no_decision_function
     yield check_supervised_y_2d
+    yield check_supervised_y_no_nan
     if name != 'CCA':
         # check that the regressor handles int input
         yield check_regressors_int
@@ -207,10 +232,10 @@ def check_estimator(Estimator):
     Parameters
     ----------
     Estimator : class
-        Class to check.
+        Class to check. Estimator is a class object (not an instance).
 
     """
-    name = Estimator.__class__.__name__
+    name = Estimator.__name__
     check_parameters_default_constructible(name, Estimator)
     for check in _yield_all_checks(name, Estimator):
         check(name, Estimator)
@@ -695,6 +720,7 @@ def check_estimators_empty_data_messages(name, Estimator):
 
 
 def check_estimators_nan_inf(name, Estimator):
+    # Checks that Estimator X's do not contain NaN or inf.
     rnd = np.random.RandomState(0)
     X_train_finite = rnd.uniform(size=(10, 3))
     X_train_nan = rnd.uniform(size=(10, 3))

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
@@ -8,6 +8,7 @@
 from sklearn.utils.estimator_checks import check_estimator
 from sklearn.utils.estimator_checks import check_estimators_unfitted
 from sklearn.ensemble import AdaBoostClassifier
+from sklearn.linear_model import MultiTaskElasticNet
 from sklearn.utils.validation import check_X_y, check_array
 
 
@@ -75,7 +76,8 @@ def test_check_estimator():
     msg = "Estimator doesn't check for NaN and inf in predict"
     assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict)
     # check for sparse matrix input handling
-    msg = "Estimator type doesn't seem to fail gracefully on sparse data"
+    name = NoSparseClassifier.__name__
+    msg = "Estimator " + name + " doesn't seem to fail gracefully on sparse data"
     # the check for sparse input handling prints to the stdout,
     # instead of raising an error, so as not to remove the original traceback.
     # that means we need to jump through some hoops to catch it.
@@ -92,6 +94,7 @@ def test_check_estimator():
 
     # doesn't error on actual estimator
     check_estimator(AdaBoostClassifier)
+    check_estimator(MultiTaskElasticNet)
 
 
 def test_check_estimators_unfitted():