scikit-learn · qinhanmin2014 · Aug 8, 2019 · Jul 5, 2019 · Jul 10, 2019 · Jul 10, 2019
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
@@ -1647,3 +1647,53 @@ make this task easier and faster (in no particular order).
     <https://git-scm.com/docs/git-grep#_examples>`_) is also extremely
     useful to see every occurrence of a pattern (e.g. a function call or a
     variable) in the code base.
+
+
+.. _plotting_api:
+
+Plotting API
+============
+
+Scikit-learn defines a simple API for creating visualizations for machine
+learning. The key features of this API is to run calculations once and to have
+the flexibility to adjust the visualizations after the fact. This logic is
+encapsulated into a visualizer object where the computed data is stored and 
+the plotting is done in a `plot` method. The visualizer object's `__init__`
+method contains only the data needed to create the visualization. The `plot`
+method takes in parameters that only have to do with visualization, such as a
+matplotlib axes. The `plot` method will store the matplotlib artists as
+attributes allowing for style adjustments through the visualizer object. A
+`plot_*` helper function accepts parameters to do the computation and the
+parameters used for plotting. After the function creates the visualizer with
+the computed values, it calls the visualizer's plot method. Note that the
+`plot` method defines attributes related to matplotlib, such as the line
+artist. This allows for customizations after calling the `plot` method.
+
+For example, the `RocCurveVisualizer` defines the following methods and
+attributes:
+
+.. code-block:: python
+
+   class RocCurveVisualizer:
+       def __init__(self, fpr, tpr, auc_roc, estimator_name):
+           ...
+           self.fpr = ...
+           self.tpr = ...
+           self.auc_roc = ...
+           self.estimator_name = estimator_name
+
+       def plot(self, ax=None, name=None, **kwargs):
+           ...
+           self.line_ = ...
+           self.ax_ = ax
+           self.figure_ = ax.figure_
+
+   def plot_roc_curve(estimator, X, y, pos_label=None, sample_weight=None,
+                      drop_intermediate=True, response_method="auto",
+                      name=None, ax=None, **kwargs):
+       # do computation
+       viz = RocCurveVisualizer(fpr, tpr, auc_roc, 
+                                estimator.__class__.__name__)
+       return viz.plot(ax=ax, name=name, **kwargs)
+```
+
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -1007,6 +1007,28 @@ See the :ref:`metrics` section of the user guide for further details.
    metrics.pairwise_distances_chunked
 
 
+Plotting tools
+--------------
+
+.. automodule:: sklearn.metrics.plot
+   :no-members:
+   :no-inherited-members:
+
+.. currentmodule:: sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   metrics.plot_roc_curve
+
+.. autosummary::
+   :toctree: generated/
+   :template: class_without_init.rst
+
+   metrics.plot.RocCurveVisualizer
+
+
 .. _mixture_ref:
 
 :mod:`sklearn.mixture`: Gaussian Mixture Models

diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
@@ -36,7 +36,8 @@
 import matplotlib.pyplot as plt
 
 from sklearn import svm, datasets
-from sklearn.metrics import roc_curve, auc
+from sklearn.metrics import auc
+from sklearn.metrics import plot_roc_curve
 from sklearn.model_selection import StratifiedKFold
 
 # #############################################################################
@@ -65,40 +66,35 @@
 aucs = []
 mean_fpr = np.linspace(0, 1, 100)
 
-i = 0
-for train, test in cv.split(X, y):
-    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
-    # Compute ROC curve and area the curve
-    fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
-    tprs.append(interp(mean_fpr, fpr, tpr))
-    tprs[-1][0] = 0.0
-    roc_auc = auc(fpr, tpr)
-    aucs.append(roc_auc)
-    plt.plot(fpr, tpr, lw=1, alpha=0.3,
-             label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
-
-    i += 1
-plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
-         label='Chance', alpha=.8)
+fig, ax = plt.subplots()
+for i, (train, test) in enumerate(cv.split(X, y)):
+    classifier.fit(X[train], y[train])
+    viz = plot_roc_curve(classifier, X[test], y[test],
+                         name='ROC fold {}'.format(i),
+                         alpha=0.3, lw=1, ax=ax)
+    interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr)
+    interp_tpr[0] = 0.0
+    tprs.append(interp_tpr)
+    aucs.append(viz.auc_roc)
+
+ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
+        label='Chance', alpha=.8)
 
 mean_tpr = np.mean(tprs, axis=0)
 mean_tpr[-1] = 1.0
 mean_auc = auc(mean_fpr, mean_tpr)
 std_auc = np.std(aucs)
-plt.plot(mean_fpr, mean_tpr, color='b',
-         label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
-         lw=2, alpha=.8)
+ax.plot(mean_fpr, mean_tpr, color='b',
+        label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
+        lw=2, alpha=.8)
 
 std_tpr = np.std(tprs, axis=0)
 tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
 tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
-plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
-                 label=r'$\pm$ 1 std. dev.')
-
-plt.xlim([-0.05, 1.05])
-plt.ylim([-0.05, 1.05])
-plt.xlabel('False Positive Rate')
-plt.ylabel('True Positive Rate')
-plt.title('Receiver operating characteristic example')
-plt.legend(loc="lower right")
+ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
+                label=r'$\pm$ 1 std. dev.')
+
+ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
+       title="Receiver operating characteristic example")
+ax.legend(loc="lower right")
 plt.show()
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -74,6 +74,9 @@
 from .scorer import SCORERS
 from .scorer import get_scorer
 
+from .plot import plot_roc_curve
+
+
 __all__ = [
     'accuracy_score',
     'adjusted_mutual_info_score',
@@ -125,6 +128,7 @@
     'pairwise_distances_argmin_min',
     'pairwise_distances_chunked',
     'pairwise_kernels',
+    'plot_roc_curve',
     'precision_recall_curve',
     'precision_recall_fscore_support',
     'precision_score',

diff --git a/sklearn/metrics/plot/__init__.py b/sklearn/metrics/plot/__init__.py
@@ -0,0 +1,6 @@
+from .roc_curve import plot_roc_curve, RocCurveVisualizer
+
+__all__ = [
+    'plot_roc_curve',
+    'RocCurveVisualizer'
+]
diff --git a/sklearn/metrics/plot/roc_curve.py b/sklearn/metrics/plot/roc_curve.py
@@ -0,0 +1,144 @@
+from .. import auc
+from .. import roc_curve
+
+from ...utils import check_matplotlib_support
+
+
+class RocCurveVisualizer:
+    """ROC Curve visualization.
+
+    Parameters
+    ----------
+    fpr : ndarray
+        False positive rate.
+    tpr : ndarray
+        True positive rate.
+    auc_roc : float
+        Area under ROC curve.
+    estimator_name : str
+        Name of estimator.
+
+    Attributes
+    ----------
+    line_ : matplotlib Artist
+        ROC Curve.
+    ax_ : matplotlib Axes
+        Axes with ROC Curve
+    figure_ : matplotlib Figure
+        Figure containing the curve
+    """
+
+    def __init__(self, fpr, tpr, auc_roc, estimator_name):
+        self.fpr = fpr
+        self.tpr = tpr
+        self.auc_roc = auc_roc
+        self.estimator_name = estimator_name
+
+    def plot(self, ax=None, name=None, **kwargs):
+        """Plot visualization
+
+        Extra keyword arguments will be passed to matplotlib's ``plot``.
+
+        Parameters
+        ----------
+        ax : Matplotlib Axes or None, default=None
+            Axes object to plot on.
+
+        name : str or None, default=None
+            Name of ROC Curve for labeling. If `None`, use the name of the
+            estimator.
+        """
+        check_matplotlib_support('plot_roc_curve')
+        import matplotlib.pyplot as plt
+
+        if ax is None:
+            fig, ax = plt.subplots()
+
+        name = self.estimator_name if name is None else name
+
+        if 'label' not in kwargs:
+            label = "{} (AUC = {:0.2f})".format(name, self.auc_roc)
+            kwargs['label'] = label
+        self.line_ = ax.plot(self.fpr, self.tpr, **kwargs)[0]
+        ax.set_xlabel("False Positive Rate")
+        ax.set_ylabel("True Positive Rate")
+        ax.legend()
+
+        self.ax_ = ax
+        self.figure_ = ax.figure
+        return self
+
+
+def plot_roc_curve(estimator, X, y, pos_label=None, sample_weight=None,
+                   drop_intermediate=True, response_method="auto",
+                   name=None, ax=None, **kwargs):
+    """Plot Receiver operating characteristic (ROC) curve
+
+    Extra keyword arguments will be passed to matplotlib's `plot`.
+
+    Parameters
+    ----------
+    estimator : estimator instance
+        Trained classifier.
+
+    X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        Input values.
+
+    y : array-like, shape (n_samples, )
+        Target values.
+
+    pos_label : int or str, default=None
+        The label of the positive class.
+        When `pos_label=None`, if y_true is in {-1, 1} or {0, 1},
+        `pos_label` is set to 1, otherwise an error will be raised.
+
+    sample_weight : array-like, shape (n_samples, ) or None, default=None
+        Sample weights.
+
+    drop_intermediate : boolean, default=True
+        Whether to drop some suboptimal thresholds which would not appear
+        on a plotted ROC curve. This is useful in order to create lighter
+        ROC curves.
+
+    response_method : 'predict_proba', 'decision_function', or 'auto' \
+    default='auto'
+        Specifies whether to use `predict_proba` or `decision_function` as the
+        target response. If set to 'auto', `predict_proba` is tried first
+        and if it does not exist `decision_function` is tried next.
+
+    name : str or None, default=None
+        Name of ROC Curve for labeling. If `None`, use the name of the
+        estimator.
+
+    ax : matplotlib axes, default=None
+        axes object to plot on
+
+    Returns
+    -------
+    viz : :class:`sklearn.metrics.plot.RocCurveVisualizer`
+        object that stores computed values
+    """
+    if response_method != "auto":
+        prediction_method = getattr(estimator, response_method, None)
+        if prediction_method is None:
+            raise ValueError(
+                "response method {} is not defined".format(response_method))
+    else:
+        predict_proba = getattr(estimator, 'predict_proba', None)
+        decision_function = getattr(estimator, 'decision_function', None)
+        prediction_method = predict_proba or decision_function
+
+        if prediction_method is None:
+            raise ValueError('response methods not defined')
+
+    y_pred = prediction_method(X)
+
+    if y_pred.ndim != 1:
+        if y_pred.shape[1] > 2:
+            raise ValueError("Estimator must be a binary classifier")
+        y_pred = y_pred[:, 1]
+    fpr, tpr, _ = roc_curve(y, y_pred, pos_label=pos_label,
+                            drop_intermediate=drop_intermediate)
+    auc_roc = auc(fpr, tpr)
+    viz = RocCurveVisualizer(fpr, tpr, auc_roc, estimator.__class__.__name__)
+    return viz.plot(ax=ax, name=name, **kwargs)
diff --git a/sklearn/metrics/plot/tests/__init__.py b/sklearn/metrics/plot/tests/__init__.py