scikit-learn · jnothman · Mar 1, 2020 · Jan 17, 2020 · Jan 17, 2020 · Jan 17, 2020
diff --git a/sklearn/metrics/_plot/confusion_matrix.py b/sklearn/metrics/_plot/confusion_matrix.py
@@ -61,7 +61,7 @@ def plot(self, include_values=True, cmap='viridis',
 
         values_format : str, default=None
             Format specification for values in confusion matrix. If `None`,
-            the format specification is '.2g'.
+            the format specification is 'd' or '.2g' whichever is shorter.
 
         ax : matplotlib axes, default=None
             Axes object to plot on. If `None`, a new figure and axes is
@@ -83,22 +83,30 @@ def plot(self, include_values=True, cmap='viridis',
         n_classes = cm.shape[0]
         self.im_ = ax.imshow(cm, interpolation='nearest', cmap=cmap)
         self.text_ = None
-
         cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(256)
 
         if include_values:
             self.text_ = np.empty_like(cm, dtype=object)
-            if values_format is None:
-                values_format = '.2g'
 
             # print text with appropriate color depending on background
             thresh = (cm.max() + cm.min()) / 2.0
+
             for i, j in product(range(n_classes), range(n_classes)):
                 color = cmap_max if cm[i, j] < thresh else cmap_min
-                self.text_[i, j] = ax.text(j, i,
-                                           format(cm[i, j], values_format),
-                                           ha="center", va="center",
-                                           color=color)
+
+                if values_format is None:
+                    text_cm = format(cm[i, j], '.2g')
+                    if cm.dtype.kind != 'f':
+                        text_d = format(cm[i, j], 'd')
+                        if len(text_d) < len(text_cm):
+                            text_cm = text_d
+                else:
+                    text_cm = format(cm[i, j], values_format)
+
+                self.text_[i, j] = ax.text(
+                    j, i, text_cm,
+                    ha="center", va="center",
+                    color=color)
 
         fig.colorbar(self.im_, ax=ax)
         ax.set(xticks=np.arange(n_classes),
@@ -164,7 +172,7 @@ def plot_confusion_matrix(estimator, X, y_true, labels=None,
 
     values_format : str, default=None
         Format specification for values in confusion matrix. If `None`,
-        the format specification is '.2g'.
+        the format specification is 'd' or '.2g' whichever is shorter.
 
     cmap : str or matplotlib Colormap, default='viridis'
         Colormap recognized by matplotlib.

diff --git a/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py b/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py
@@ -21,6 +21,7 @@
     "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
     "matplotlib.*")
 
+
 @pytest.fixture(scope="module")
 def n_classes():
     return 5
@@ -226,8 +227,6 @@ def test_confusion_matrix_contrast(pyplot):
     assert_allclose(disp.text_[1, 1].get_color(), min_color)
 
 
-
-
 @pytest.mark.parametrize(
     "clf", [LogisticRegression(),
             make_pipeline(StandardScaler(), LogisticRegression()),
@@ -264,3 +263,20 @@ def test_confusion_matrix_text_format(pyplot, data, y_pred, n_classes,
     text_text = np.array([
         t.get_text() for t in disp.text_.ravel()])
     assert_array_equal(expected_text, text_text)
+
+
+def test_confusion_matrix_standard_format(pyplot):
+    cm = np.array([[10000000, 0], [123456, 12345678]])
+    plotted_text = ConfusionMatrixDisplay(cm, [False, True]).plot().text_
+    # Values should be shown as whole numbers 'd',
+    # except the first number which should be shown as 1e+07 (longer length)
+    # and the last number will be showns as 1.2e+07 (longer length)
+    test = [t.get_text() for t in plotted_text.ravel()]
+    assert test == ['1e+07', '0', '123456', '1.2e+07']
+
+    cm = np.array([[0.1, 10], [100, 0.525]])
+    plotted_text = ConfusionMatrixDisplay(cm, [False, True]).plot().text_
+    # Values should now formatted as '.2g', since there's a float in
+    # Values are have two dec places max, (e.g 100 becomes 1e+02)
+    test = [t.get_text() for t in plotted_text.ravel()]
+    assert test == ['0.1', '10', '1e+02', '0.53']