scikit-learn · thomasjpfan · Sep 18, 2019 · Dec 18, 2018 · Dec 18, 2018 · Dec 18, 2018
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
@@ -91,7 +91,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
     n_clusters_per_class : int, optional (default=2)
         The number of clusters per class.
 
-    weights : list of floats or None (default=None)
+    weights : array-like of shape (n_classes,) or (n_classes - 1,),
-    weights : array-like of shape (n_classes,) or (n_classes - 1,),
+    weights : array-like of shape (n_classes,) or (n_classes - 1,),\
-    weights : array-like of shape (n_classes,) or (n_classes - 1,),
+    weights : array-like of shape (n_classes,) or (n_classes - 1,),\
+              (default=None)
         The proportions of samples assigned to each class. If None, then
         classes are balanced. Note that if ``len(weights) == n_classes - 1``,
         then the last class weight is automatically inferred.
@@ -160,22 +161,27 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
                          " features")
     # Use log2 to avoid overflow errors
     if n_informative < np.log2(n_classes * n_clusters_per_class):
-        raise ValueError("n_classes * n_clusters_per_class must"
-                         " be smaller or equal 2 ** n_informative")
-    if weights and len(weights) not in [n_classes, n_classes - 1]:
-        raise ValueError("Weights specified but incompatible with number "
-                         "of classes.")
+        msg = "n_classes({}) * n_clusters_per_class({}) must be"
+        msg += " smaller or equal 2**n_informative({})={}"
+        raise ValueError(msg.format(n_classes, n_clusters_per_class,
+                                    n_informative, 2**n_informative))
+
+    if weights is not None:
+        if len(weights) not in [n_classes, n_classes - 1]:
+            raise ValueError("Weights specified but incompatible with number "
+                             "of classes.")
+        if len(weights) == n_classes - 1:
+            if isinstance(weights, list):
+                weights = weights + [1.0 - sum(weights)]
+            else:
+                weights = np.resize(weights, n_classes)
+                weights[-1] = 1.0 - sum(weights[:-1])
+    else:
+        weights = [1.0 / n_classes] * n_classes
 
     n_useless = n_features - n_informative - n_redundant - n_repeated
     n_clusters = n_classes * n_clusters_per_class
 
-    if weights and len(weights) == (n_classes - 1):
-        weights = weights + [1.0 - sum(weights)]
-
-    if weights is None:
-        weights = [1.0 / n_classes] * n_classes
-        weights[-1] = 1.0 - sum(weights[:-1])
-
     # Distribute samples among clusters by weight
     n_samples_per_cluster = [
         int(n_samples * weights[k % n_classes] / n_clusters_per_class)

diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
@@ -146,6 +146,36 @@ def test_make_classification_informative_features():
              n_clusters_per_class=2)
 
 
+@pytest.mark.parametrize(
+    'weights, err_type, err_msg',
+    [
+        ([], ValueError,
+         "Weights specified but incompatible with number of classes."),
+        ([.25, .75, .1], ValueError,
+         "Weights specified but incompatible with number of classes."),
+        (np.array([]), ValueError,
+         "Weights specified but incompatible with number of classes."),
+        (np.array([.25, .75, .1]), ValueError,
+         "Weights specified but incompatible with number of classes."),
+        (np.random.random(3), ValueError,
+         "Weights specified but incompatible with number of classes.")
+    ]
+)
+def test_make_classification_weights_type(weights, err_type, err_msg):
+    with pytest.raises(err_type, match=err_msg):
+        make_classification(weights=weights)
+
+
+@pytest.mark.parametrize("kwargs", [{}, {"n_classes": 3, "n_informative": 3}])
+def test_make_classification_weights_array_or_list_ok(kwargs):
+    X1, y1 = make_classification(weights=[.1, .9],
+                                 random_state=0, **kwargs)
+    X2, y2 = make_classification(weights=np.array([.1, .9]),
+                                 random_state=0, **kwargs)
+    assert_almost_equal(X1, X2)
+    assert_almost_equal(y1, y2)
+
+
 def test_make_multilabel_classification_return_sequences():
     for allow_unlabeled, min_length in zip((True, False), (0, 1)):
         X, Y = make_multilabel_classification(n_samples=100, n_features=20,