DOC Clarify docs for make_classification (#9918)

scikit-learn · Oct 16, 2017 · 1edc385 · 1edc385
1 parent ba4f1b5
commit 1edc385
Showing 1 changed file with 9 additions and 5 deletions.
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
@@ -42,9 +42,10 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
     """Generate a random n-class classification problem.
 
     This initially creates clusters of points normally distributed (std=1)
-    about vertices of a `2 * class_sep`-sided hypercube, and assigns an equal
-    number of clusters to each class. It introduces interdependence between
-    these features and adds various types of further noise to the data.
+    about vertices of an `n_informative`-dimensional hypercube with sides of
+    length `2*class_sep` and assigns an equal number of clusters to each
+    class. It introduces interdependence between these features and adds 
+    various types of further noise to the data.
 
     Prior to shuffling, `X` stacks a number of these primary "informative"
     features, "redundant" linear combinations of these, "repeated" duplicates
@@ -94,10 +95,13 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
         exceeds 1.
 
     flip_y : float, optional (default=0.01)
-        The fraction of samples whose class are randomly exchanged.
+        The fraction of samples whose class are randomly exchanged. Larger
+        values introduce noise in the labels and make the classification
+        task harder.
 
     class_sep : float, optional (default=1.0)
-        The factor multiplying the hypercube dimension.
+        The factor multiplying the hypercube size.  Larger values spread
+        out the clusters/classes and make the classification task easier.
 
     hypercube : boolean, optional (default=True)
         If True, the clusters are put on the vertices of a hypercube. If