Address alex's review

scikit-learn · Feb 27, 2019 · 58d169c · 58d169c
1 parent 3a78d1a
commit 58d169c
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 43 deletions.
diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
@@ -689,11 +689,11 @@ Implementation
 --------------
 
 This implementation follows what is explained in the original paper [1]_. For
-the optimisation method, it currently uses scipy's l-bfgs-b with a full
+the optimisation method, it currently uses scipy's L-BFGS-B with a full
 gradient computation at each iteration, to avoid to tune the learning rate and
 provide stable learning.
 
-See the examples below and the doc  string of
+See the examples below and the docstring of
 :meth:`NeighborhoodComponentsAnalysis.fit` for further information.
 
 Complexity
@@ -705,7 +705,7 @@ NCA stores a matrix of pairwise distances, taking ``n_samples ** 2`` memory.
 Time complexity depends on the number of iterations done by the optimisation
 algorithm. However, one can set the maximum number of iterations with the
 argument ``max_iter``. For each iteration, time complexity is
-``O(n_components x n_samples x min(n_samples, n_features)``.
+``O(n_components x n_samples x min(n_samples, n_features))``.
 
 
 Transform

diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst
@@ -152,7 +152,7 @@ indices where the value is `1` represents the assigned classes of that sample::
     >>> clf.predict([[0., 0.]])
     array([[0, 1]])
 
-See the examples below and the doc string of
+See the examples below and the docstring of
 :meth:`MLPClassifier.fit` for further information.
 
 .. topic:: Examples:

diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
@@ -154,7 +154,7 @@ one-vs-all classification.
 
 :class:`SGDClassifier` supports both weighted classes and weighted
 instances via the fit parameters ``class_weight`` and ``sample_weight``. See
-the examples below and the doc string of :meth:`SGDClassifier.fit` for
+the examples below and the docstring of :meth:`SGDClassifier.fit` for
 further information.
 
 .. topic:: Examples:

diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
@@ -48,12 +48,11 @@ class NeighborhoodComponentsAnalysis(BaseEstimator, TransformerMixin):
 
         'auto'
             Depending on ``n_components``, the most reasonable initialization
-            will be chosen among the following ones. First, we try to use
-            'lda', as it uses labels information: if ``n_components <=
-            n_classes``, ``init='lda'``. If we can't, we then try 'pca', as it
-            projects data in meaningful directions (those of higher variance):
-            if ``n_components < min(n_features, n_samples)``, ``init = 'pca'``.
-            Otherwise, we just use 'identity'.
+            will be chosen. If ``n_components <= n_classes`` we use 'lda', as
+            it uses labels information. If not, but
+            ``n_components < min(n_features, n_samples)``, we use 'pca', as
+            it projects data in meaningful directions (those of higher
+            variance). Otherwise, we just use 'identity'.
 
         'pca'
             ``n_components`` principal components of the inputs passed
@@ -95,9 +94,10 @@ class NeighborhoodComponentsAnalysis(BaseEstimator, TransformerMixin):
 
     callback : callable, optional (default=None)
         If not None, this function is called after every iteration of the
-        optimizer, taking as arguments the current solution (transformation)
-        and the number of iterations. This might be useful in case one wants
-        to examine or store the transformation found after each iteration.
+        optimizer, taking as arguments the current solution (flattened
+        transformation matrix) and the number of iterations. This might be
+        useful in case one wants to examine or store the transformation
+        found after each iteration.
 
     verbose : int, optional (default=0)
         If 0, no progress messages will be printed.

diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
@@ -26,8 +26,8 @@ def test_simple_example():
     """Test on a simple example.
 
     Puts four points in the input space where the opposite labels points are
-    next to each other. After transform the same labels points should be next
-    to each other.
+    next to each other. After transform the samples from the same class
+    should be next to each other.
 
     """
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
@@ -36,23 +36,24 @@ def test_simple_example():
                                          random_state=42)
     nca.fit(X, y)
     X_t = nca.transform(X)
-    np.testing.assert_equal(pairwise_distances(X_t).argsort()[:, 1],
-                            np.array([2, 3, 0, 1]))
+    assert_array_equal(pairwise_distances(X_t).argsort()[:, 1],
+                       np.array([2, 3, 0, 1]))
 
 
 def test_toy_example_collapse_points():
     """Test on a toy example of three points that should collapse
 
-    Test that on this simple example, the new points are collapsed:
-    Two same label points with a different label point in the middle.
-    The objective is 2/(1 + exp(d/2)), with d the euclidean distance
-    between the two same labels points. This is maximized for d=0
-    (because d>=0), with an objective equal to 1 (loss=-1.).
+    We build a simple example: two points from the same class and a point from
+    a different class in the middle of them. On this simple example, the new
+    (transformed) points should all collapse into one single point. Indeed, the
+    objective is 2/(1 + exp(d/2)), with d the euclidean distance between the
+    two samples from the same class. This is maximized for d=0 (because d>=0),
+    with an objective equal to 1 (loss=-1.).
 
     """
-    random_state = np.random.RandomState(42)
+    rng = np.random.RandomState(42)
     input_dim = 5
-    two_points = random_state.randn(2, input_dim)
+    two_points = rng.randn(2, input_dim)
     X = np.vstack([two_points, two_points.mean(axis=0)[np.newaxis, :]])
     y = [0, 0, 1]
 
@@ -90,10 +91,10 @@ def test_finite_differences():
     approximation.
     """
     # Initialize the transformation `M`, as well as `X` and `y` and `NCA`
-    random_state = np.random.RandomState(42)
+    rng = np.random.RandomState(42)
     X, y = make_classification()
-    M = random_state.randn(random_state.randint(1, X.shape[1] + 1),
-                           X.shape[1])
+    M = rng.randn(rng.randint(1, X.shape[1] + 1),
+                  X.shape[1])
     nca = NeighborhoodComponentsAnalysis()
     nca.n_iter_ = 0
     mask = y[:, np.newaxis] == y[np.newaxis, :]
@@ -114,7 +115,7 @@ def test_params_validation():
     X = np.arange(12).reshape(4, 3)
     y = [1, 1, 2, 2]
     NCA = NeighborhoodComponentsAnalysis
-    random_state = np.random.RandomState(42)
+    rng = np.random.RandomState(42)
 
     # TypeError
     assert_raises(TypeError, NCA(max_iter='21').fit, X, y)
@@ -133,7 +134,7 @@ def test_params_validation():
                          '`max_iter`= -1, must be >= 1.',
                          NCA(max_iter=-1).fit, X, y)
 
-    init = random_state.rand(5, 3)
+    init = rng.rand(5, 3)
     assert_raise_message(ValueError,
                          'The output dimensionality ({}) of the given linear '
                          'transformation `init` cannot be greater than its '
@@ -175,11 +176,11 @@ def test_transformation_dimensions():
 
 
 def test_n_components():
-    random_state = np.random.RandomState(42)
+    rng = np.random.RandomState(42)
     X = np.arange(12).reshape(4, 3)
     y = [1, 1, 2, 2]
 
-    init = random_state.rand(X.shape[1] - 1, 3)
+    init = rng.rand(X.shape[1] - 1, 3)
 
     # n_components = X.shape[1] != transformation.shape[0]
     n_components = X.shape[1]
@@ -209,7 +210,7 @@ def test_n_components():
 
 
 def test_init_transformation():
-    random_state = np.random.RandomState(42)
+    rng = np.random.RandomState(42)
     X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)
 
     # Start learning from scratch
@@ -232,12 +233,12 @@ def test_init_transformation():
     nca_lda = NeighborhoodComponentsAnalysis(init='lda')
     nca_lda.fit(X, y)
 
-    init = random_state.rand(X.shape[1], X.shape[1])
+    init = rng.rand(X.shape[1], X.shape[1])
     nca = NeighborhoodComponentsAnalysis(init=init)
     nca.fit(X, y)
 
     # init.shape[1] must match X.shape[1]
-    init = random_state.rand(X.shape[1], X.shape[1] + 1)
+    init = rng.rand(X.shape[1], X.shape[1] + 1)
     nca = NeighborhoodComponentsAnalysis(init=init)
     assert_raise_message(ValueError,
                          'The input dimensionality ({}) of the given '
@@ -247,7 +248,7 @@ def test_init_transformation():
                          nca.fit, X, y)
 
     # init.shape[0] must be <= init.shape[1]
-    init = random_state.rand(X.shape[1] + 1, X.shape[1])
+    init = rng.rand(X.shape[1] + 1, X.shape[1])
     nca = NeighborhoodComponentsAnalysis(init=init)
     assert_raise_message(ValueError,
                          'The output dimensionality ({}) of the given '
@@ -257,7 +258,7 @@ def test_init_transformation():
                          nca.fit, X, y)
 
     # init.shape[0] must match n_components
-    init = random_state.rand(X.shape[1], X.shape[1])
+    init = rng.rand(X.shape[1], X.shape[1])
     n_components = X.shape[1] - 2
     nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
     assert_raise_message(ValueError,
@@ -276,17 +277,17 @@ def test_init_transformation():
 def test_auto_init(n_samples, n_features, n_classes, n_components):
     # Test that auto choose the init as expected with every configuration
     # of order of n_samples, n_features, n_classes and n_components.
-    random_state = np.random.RandomState(42)
+    rng = np.random.RandomState(42)
     nca_base = NeighborhoodComponentsAnalysis(init='auto',
                                               n_components=n_components,
                                               max_iter=1,
-                                              random_state=random_state)
+                                              random_state=rng)
     if n_classes >= n_samples:
         pass
         # n_classes > n_samples is impossible, and n_classes == n_samples
         # throws an error from lda but is an absurd case
     else:
-        X = random_state.randn(n_samples, n_features)
+        X = rng.randn(n_samples, n_features)
         y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
         if n_components > n_features:
             # this would return a ValueError, which is already tested in
@@ -359,13 +360,13 @@ def test_warm_start_effectiveness():
 def test_verbose(init_name, capsys):
     # assert there is proper output when verbose = 1, for every initialization
     # except auto because auto will call one of the others
-    random_state = np.random.RandomState(42)
+    rng = np.random.RandomState(42)
     X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)
     regexp_init = r'... done in \ *\d+\.\d{2}s'
     msgs = {'pca': "Finding principal components" + regexp_init,
             'lda': "Finding most discriminative components" + regexp_init}
     if init_name == 'precomputed':
-        init = random_state.randn(X.shape[1], X.shape[1])
+        init = rng.randn(X.shape[1], X.shape[1])
     else:
         init = init_name
     nca = NeighborhoodComponentsAnalysis(verbose=1, init=init)
@@ -461,6 +462,7 @@ def test_callback(capsys):
     max_iter = 10
 
     def my_cb(transformation, n_iter):
+        assert transformation.shape == (iris_data.shape[1]**2,)
         rem_iter = max_iter - n_iter
         print('{} iterations remaining...'.format(rem_iter))
 

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
@@ -953,7 +953,7 @@ def check_scalar(x, name, target_type, min_val=None, max_val=None):
         Acceptable data types for the parameter.
 
     min_val : float or int, optional (default=None)
-        The minimum value value the parameter can take. If None (default) it
+        The minimum valid value the parameter can take. If None (default) it
         is implied that the parameter does not have a lower bound.
 
     max_val : float or int, optional (default=None)