scikit-learn-contrib · bellet · Apr 15, 2019 · Feb 4, 2019 · Feb 5, 2019 · Feb 6, 2019
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
@@ -2,7 +2,7 @@
 from scipy.spatial.distance import euclidean
 from sklearn.base import BaseEstimator
 from sklearn.utils.validation import _is_arraylike, check_is_fitted
-from sklearn.metrics import roc_auc_score
+from sklearn.metrics import roc_auc_score, accuracy_score
 import numpy as np
 from abc import ABCMeta, abstractmethod
 import six
@@ -317,7 +317,7 @@ def predict(self, pairs):
     y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,)
       The predicted learned metric value between samples in every pair.
     """
-    check_is_fitted(self, 'threshold_')
+    check_is_fitted(self, ['threshold_', 'transformer_'])
     return - 2 * (self.decision_function(pairs) > self.threshold_) + 1
 
   def decision_function(self, pairs):
@@ -401,6 +401,7 @@ def predict(self, quadruplets):
     prediction : `numpy.ndarray` of floats, shape=(n_constraints,)
       Predictions of the ordering of pairs, for each quadruplet.
     """
+    check_is_fitted(self, 'transformer_')
     quadruplets = check_input(quadruplets, type_of_inputs='tuples',
                               preprocessor=self.preprocessor_,
                               estimator=self, tuple_size=self._tuple_size)
@@ -443,11 +444,22 @@ def score(self, quadruplets, y=None):
       points, or 2D array of indices of quadruplets if the metric learner
       uses a preprocessor.
 
-    y : Ignored, for scikit-learn compatibility.
+    y : array-like, shape=(n_constraints,) or `None`
+      Labels of constraints. y[i] should be 1 if
+      d(pairs[i, 0], X[i, 1]) is wanted to be larger than
+      d(X[i, 2], X[i, 3]), and -1 if it is wanted to be smaller. If None,
+      `y` will be set to `np.ones(quadruplets.shape[0])`, i.e. we want all
+      first two points to be closer than the last two points in each
+      quadruplet.
 
     Returns
     -------
     score : float
       The quadruplets score.
     """
-    return -np.mean(self.predict(quadruplets))
+    quadruplets = check_input(quadruplets, y, type_of_inputs='tuples',
+                              preprocessor=self.preprocessor_,
+                              estimator=self, tuple_size=self._tuple_size)
+    if y is None:
+      y = np.ones(quadruplets.shape[0])
+    return accuracy_score(y, self.predict(quadruplets))
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
@@ -46,9 +46,15 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False,
     super(_BaseLSML, self).__init__(preprocessor)
 
   def _fit(self, quadruplets, y=None, weights=None):
-    quadruplets = self._prepare_inputs(quadruplets,
+    quadruplets = self._prepare_inputs(quadruplets, y,
                                        type_of_inputs='tuples')
-
+    if y is None:
+      y = np.ones(quadruplets.shape[0])
+    # we swap the quadruplets where the label is -1 since they are not in
+    # the right order
+    quadruplets_to_swap = quadruplets[y == -1]
+    quadruplets[y == -1] = np.column_stack([quadruplets_to_swap[:, 2:],
+                                            quadruplets_to_swap[:, :2]])
     # check to make sure that no two constrained vectors are identical
     vab = quadruplets[:, 0, :] - quadruplets[:, 1, :]
     vcd = quadruplets[:, 2, :] - quadruplets[:, 3, :]
@@ -144,18 +150,22 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin):
       metric (See function `transformer_from_metric`.)
   """
 
-  def fit(self, quadruplets, weights=None):
+  def fit(self, quadruplets, y=None, weights=None):
     """Learn the LSML model.
 
     Parameters
     ----------
     quadruplets : array-like, shape=(n_constraints, 4, n_features) or
                   (n_constraints, 4)
         3D array-like of quadruplets of points or 2D array of quadruplets of
-        indicators. In order to supervise the algorithm in the right way, we
-        should have the four samples ordered in a way such that:
-        d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i <
-        n_constraints.
+        indicators.
+    y : array-like, shape=(n_constraints,) or `None`
+        Labels of constraints. y[i] should be 1 if
+        d(pairs[i, 0], X[i, 1]) is wanted to be larger than
+        d(X[i, 2], X[i, 3]), and -1 if it is wanted to be smaller. If None,
+        `y` will be set to `np.ones(quadruplets.shape[0])`, i.e. we want to
+        put all first two points closer than the last two points in each
+        quadruplet.
     weights : (n_constraints,) array of floats, optional
         scale factor for each constraint
 

diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py
@@ -11,8 +11,8 @@
 @pytest.mark.parametrize('with_preprocessor', [True, False])
 @pytest.mark.parametrize('estimator, build_dataset', pairs_learners,
                          ids=ids_pairs_learners)
-def test_predict_monotonous(estimator, build_dataset,
-                                            with_preprocessor):
+def test_predict_only_one_or_minus_one(estimator, build_dataset,
+                                       with_preprocessor):
   """Test that all predicted values are either +1 or -1"""
   input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
   estimator = clone(estimator)
@@ -21,15 +21,15 @@ def test_predict_monotonous(estimator, build_dataset,
   pairs_train, pairs_test, y_train, y_test = train_test_split(input_data,
                                                               labels)
   estimator.fit(pairs_train, y_train)
-  predictions = estimator.predict(pairs_test, y_test)
+  predictions = estimator.predict(pairs_test)
   assert np.isin(predictions, [-1, 1]).all()
 
 
 @pytest.mark.parametrize('with_preprocessor', [True, False])
 @pytest.mark.parametrize('estimator, build_dataset', pairs_learners,
                          ids=ids_pairs_learners)
 def test_predict_monotonous(estimator, build_dataset,
-                                            with_preprocessor):
+                            with_preprocessor):
   """Test that there is a threshold distance separating points labeled as
   similar and points labeled as dissimilar """
   input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)

diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py
@@ -0,0 +1,65 @@
+import pytest
+from sklearn.exceptions import NotFittedError
+from sklearn.model_selection import train_test_split
+
+from test.test_utils import quadruplets_learners, ids_quadruplets_learners
+from sklearn.utils.testing import set_random_state
+from sklearn import clone
+import numpy as np
+
+
+@pytest.mark.parametrize('with_preprocessor', [True, False])
+@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners,
+                         ids=ids_quadruplets_learners)
+def test_predict_only_one_or_minus_one(estimator, build_dataset,
+                                       with_preprocessor):
+  """Test that all predicted values are either +1 or -1"""
+  input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
+  estimator = clone(estimator)
+  estimator.set_params(preprocessor=preprocessor)
+  set_random_state(estimator)
+  (quadruplets_train,
+   quadruplets_test, y_train, y_test) = train_test_split(input_data, labels)
+  estimator.fit(quadruplets_train, y_train)
+  predictions = estimator.predict(quadruplets_test)
+  assert np.isin(predictions, [-1, 1]).all()
+
+
+@pytest.mark.parametrize('with_preprocessor', [True, False])
+@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners,
+                         ids=ids_quadruplets_learners)
+def test_predict_monotonous(estimator, build_dataset,
+                            with_preprocessor):
+  """Test that there is a threshold distance separating points labeled as
+  similar and points labeled as dissimilar """
+  input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
+  estimator = clone(estimator)
+  estimator.set_params(preprocessor=preprocessor)
+  set_random_state(estimator)
+  (quadruplets_train,
+   quadruplets_test, y_train, y_test) = train_test_split(input_data, labels)
+  estimator.fit(quadruplets_train, y_train)
+  distances = estimator.score_quadruplets(quadruplets_test)
+  predictions = estimator.predict(quadruplets_test)
+  min_dissimilar = np.min(distances[predictions == -1])
+  max_similar = np.max(distances[predictions == 1])
+  assert max_similar <= min_dissimilar
+  separator = np.mean([min_dissimilar, max_similar])
+  assert (predictions[distances > separator] == -1).all()
+  assert (predictions[distances < separator] == 1).all()
+
+
+@pytest.mark.parametrize('with_preprocessor', [True, False])
+@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners,
+                         ids=ids_quadruplets_learners)
+def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset,
+                                            with_preprocessor):
+  """Test that a NotFittedError is raised if someone tries to predict and
+  the metric learner has not been fitted."""
+  input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
+  estimator = clone(estimator)
+  estimator.set_params(preprocessor=preprocessor)
+  set_random_state(estimator)
+  with pytest.raises(NotFittedError):
+    estimator.predict(input_data)
+