scikit-learn · ogrisel · Dec 5, 2013 · Nov 14, 2013 · Nov 18, 2013 · Nov 18, 2013
diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py
@@ -1315,11 +1315,12 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
                                                  beta=beta,
                                                  labels=labels,
                                                  pos_label=pos_label,
-                                                 average=average)
+                                                 average=average,
+                                                 warn_for=('f-score',))
     return f
 
 
-def _prf_divide(numerator, denominator, metric, modifier, average):
+def _prf_divide(numerator, denominator, metric, modifier, average, warn_for):
     """Performs division and handles divide-by-zero.
 
     On zero-division, sets the corresponding result elements to zero
@@ -1344,8 +1345,17 @@ def _prf_divide(numerator, denominator, metric, modifier, average):
     if average == 'samples':
         axis0, axis1 = axis1, axis0
 
-    msg = ('{0} and F-score are ill-defined and being set to 0.0 {{0}} '
-           'no {1} {2}s.'.format(metric.title(), modifier, axis0))
+    if metric in warn_for and 'f-score' in warn_for:
+        msg_start = '{0} and F-score are'.format(metric.title())
+    elif metric in warn_for:
+        msg_start = '{0} is'.format(metric.title())
+    elif 'f-score' in warn_for:
+        msg_start = 'F-score is'
+    else:
+        return result
+
+    msg = ('{0} ill-defined and being set to 0.0 {{0}} '
+           'no {1} {2}s.'.format(msg_start, modifier, axis0))
     if len(mask) == 1:
         msg = msg.format('due to')
     else:
@@ -1355,7 +1365,9 @@ def _prf_divide(numerator, denominator, metric, modifier, average):
 
 
 def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
-                                    pos_label=1, average=None):
+                                    pos_label=1, average=None,
+                                    warn_for=('precision', 'recall',
+                                              'f-score')):
     """Compute precision, recall, F-measure and support for each class
 
     The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
@@ -1419,6 +1431,9 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
 
+    warn_for : tuple or set, for internal use
+        This determines which warnings will be made in the case that this
+        function is being used to return only one of its metrics.
 
     Returns
     -------
@@ -1547,9 +1562,9 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
         # Oddly, we may get an "invalid" rather than a "divide" error
         # here.
         precision = _prf_divide(tp_sum, pred_sum,
-                                'precision', 'predicted', average)
+                                'precision', 'predicted', average, warn_for)
         recall = _prf_divide(tp_sum, true_sum,
-                             'recall', 'true', average)
+                             'recall', 'true', average, warn_for)
         # Don't need to warn for F: either P or R warned, or tp == 0 where pos
         # and true are nonzero, in which case, F is well-defined and zero
         f_score = ((1 + beta2) * precision * recall /
@@ -1654,7 +1669,8 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
     p, _, _, _ = precision_recall_fscore_support(y_true, y_pred,
                                                  labels=labels,
                                                  pos_label=pos_label,
-                                                 average=average)
+                                                 average=average,
+                                                 warn_for=('precision',))
     return p
 
 
@@ -1729,7 +1745,8 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'):
     _, r, _, _ = precision_recall_fscore_support(y_true, y_pred,
                                                  labels=labels,
                                                  pos_label=pos_label,
-                                                 average=average)
+                                                 average=average,
+                                                 warn_for=('recall',))
     return r
 
 

diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py
@@ -23,6 +23,7 @@
                                    assert_array_equal,
                                    assert_array_almost_equal,
                                    assert_warns,
+                                   assert_no_warnings,
                                    assert_greater,
                                    ignore_warnings)
 
@@ -1886,6 +1887,58 @@ def test_prf_warnings():
                      'being set to 0.0 due to no true samples.')
 
 
+def test_recall_warnings():
+    assert_no_warnings(recall_score,
+                       np.array([[1, 1], [1, 1]]),
+                       np.array([[0, 0], [0, 0]]),
+                       average='micro')
+
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter('always')
+        recall_score(np.array([[0, 0], [0, 0]]),
+                     np.array([[1, 1], [1, 1]]),
+                     average='micro')
+        assert_equal(str(record.pop().message),
+                     'Recall is ill-defined and '
+                     'being set to 0.0 due to no true samples.')
+
+
+def test_precision_warnings():
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter('always')
+
+        precision_score(np.array([[1, 1], [1, 1]]),
+                        np.array([[0, 0], [0, 0]]),
+                        average='micro')
+        assert_equal(str(record.pop().message),
+                     'Precision is ill-defined and '
+                     'being set to 0.0 due to no predicted samples.')
+
+    assert_no_warnings(precision_score,
+                       np.array([[0, 0], [0, 0]]),
+                       np.array([[1, 1], [1, 1]]),
+                       average='micro')
+
+
+def test_fscore_warnings():
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter('always')
+
+        for score in [f1_score, partial(fbeta_score, beta=2)]:
+            score(np.array([[1, 1], [1, 1]]),
+                  np.array([[0, 0], [0, 0]]),
+                  average='micro')
+            assert_equal(str(record.pop().message),
+                         'F-score is ill-defined and '
+                         'being set to 0.0 due to no predicted samples.')
+            score(np.array([[0, 0], [0, 0]]),
+                  np.array([[1, 1], [1, 1]]),
+                  average='micro')
+            assert_equal(str(record.pop().message),
+                         'F-score is ill-defined and '
+                         'being set to 0.0 due to no true samples.')
+
+
 def test__check_clf_targets():
     """Check that _check_clf_targets correctly merges target types, squeezes
     output and fails if input lengths differ."""