add scikit-learn style balanced acc for comparison

rasbt · Jan 9, 2021 · e180b3f · e180b3f
1 parent e8e7c09
commit e180b3f
Show file tree

Hide file tree

Showing 6 changed files with 47 additions and 13 deletions.
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -15,18 +15,15 @@ The CHANGELOG for the current development version is available at
 
 ##### New Features
 
-- -
-
+- Adds a second "balanced accuracy" interpretation ("balanced") to `accuracy_score` in addition to the existing "average" option to compute the scikit-learn-style balanced accuracy. ([#764](https://github.com/rasbt/mlxtend/pull/764))
 
 ##### Changes
 
 - -
 
 ##### Bug Fixes
 
-- Fixes problem with binary vectors when mlxtend.evaluate.accuracy has the `'average'` setting. ([#763](https://github.com/rasbt/mlxtend/pull/763))
-
-
+- - 
 
 ### Version 0.18.0 (11/25/2020)
 

diff --git a/docs/sources/user_guide/evaluate/accuracy_score.ipynb b/docs/sources/user_guide/evaluate/accuracy_score.ipynb
@@ -197,7 +197,7 @@
      "text": [
       "Standard accuracy: 33.33%\n",
       "Class 1 accuracy: 55.56%\n",
-      "Average per-class accuracy: 24.44%\n"
+      "Average per-class accuracy: 55.56%\n"
      ]
     }
    ],
@@ -256,22 +256,27 @@
       "\n",
       "    True class labels or target values.\n",
       "\n",
+      "\n",
       "- `y_predicted` : array-like, shape=[n_values]\n",
       "\n",
       "    Predicted class labels or target values.\n",
       "\n",
+      "\n",
       "- `method` : str, 'standard' by default.\n",
       "\n",
       "    The chosen method for accuracy computation.\n",
       "    If set to 'standard', computes overall accuracy.\n",
       "    If set to 'binary', computes accuracy for class pos_label.\n",
       "    If set to 'average', computes average per-class (balanced) accuracy.\n",
+      "    If set to 'balanced', computes the scikit-learn-style balanced accuracy.\n",
+      "\n",
       "\n",
       "- `pos_label` : str or int, 1 by default.\n",
       "\n",
       "    The class whose accuracy score is to be reported.\n",
       "    Used only when `method` is set to 'binary'\n",
       "\n",
+      "\n",
       "- `normalize` : bool, True by default.\n",
       "\n",
       "    If True, returns fraction of correctly classified samples.\n",

diff --git a/mlxtend/evaluate/accuracy.py b/mlxtend/evaluate/accuracy.py
@@ -21,16 +21,21 @@ def accuracy_score(y_target, y_predicted, method="standard",
     ------------
     y_target : array-like, shape=[n_values]
         True class labels or target values.
+    
     y_predicted : array-like, shape=[n_values]
         Predicted class labels or target values.
+    
     method : str, 'standard' by default.
         The chosen method for accuracy computation.
         If set to 'standard', computes overall accuracy.
         If set to 'binary', computes accuracy for class pos_label.
         If set to 'average', computes average per-class (balanced) accuracy.
+        If set to 'balanced', computes the scikit-learn-style balanced accuracy.
+    
     pos_label : str or int, 1 by default.
         The class whose accuracy score is to be reported.
         Used only when `method` is set to 'binary'
+    
     normalize : bool, True by default.
         If True, returns fraction of correctly classified samples.
         If False, returns number of correctly classified samples.
@@ -65,6 +70,11 @@ def accuracy_score(y_target, y_predicted, method="standard",
         return _compute_metric(target_temp, predicted_temp, normalize)
 
     elif method == "average":
+        return sum([_compute_metric(np.where(target_temp != lab, 1, 0),
+                    np.where(predicted_temp != lab, 1, 0))
+                    for lab in unique_labels]) / float(unique_labels.shape[0])
+
+    elif method == "balanced":
         all_class_acc = []
         for c in np.unique(y_target):
             positive_labels = (y_target == c)
@@ -74,5 +84,5 @@ def accuracy_score(y_target, y_predicted, method="standard",
         return np.mean(all_class_acc)
 
     else:
-        raise ValueError('`method` must be "standard", "average"'
-                         'or "binary". Got "%s".' % method)
+        raise ValueError('`method` must be "standard", "average",'
+                         ' "balanced", or "binary". Got "%s".' % method)
diff --git a/mlxtend/evaluate/scoring.py b/mlxtend/evaluate/scoring.py
@@ -38,6 +38,8 @@ def scoring(y_target, y_predicted, metric='error',
         'accuracy': (TP + TN)/(FP + FN + TP + TN) = 1-ERR\n
         'average per-class accuracy': Average per-class accuracy\n
         'average per-class error':  Average per-class error\n
+        'balanced per-class accuracy': Average per-class accuracy\n
+        'balanced per-class error':  Average per-class error\n
         'error': (TP + TN)/(FP+ FN + TP + TN) = 1-ACC\n
         'false_positive_rate': FP/N = FP/(FP + TN)\n
         'true_positive_rate': TP/P = TP/(FN + TP)\n
@@ -73,6 +75,7 @@ def scoring(y_target, y_predicted, metric='error',
                    'accuracy',
                    'average per-class accuracy',
                    'average per-class error',
+                   'balanced accuracy',
                    'false_positive_rate',
                    'true_positive_rate',
                    'true_negative_rate',
@@ -114,6 +117,8 @@ def scoring(y_target, y_predicted, metric='error',
                      pred_tmp,
                      func=_error,
                      unique_labels=unique_labels)
+    elif metric == 'balanced accuracy':
+        res = accuracy_score(targ_tmp, pred_tmp, method='balanced')
 
     # binary classification metrics
     else:

diff --git a/mlxtend/evaluate/tests/test_accuracy.py b/mlxtend/evaluate/tests/test_accuracy.py
@@ -32,17 +32,25 @@ def test_standard():
     assert y_1 == 3
 
 
-def test_average_multiclass():
+def test_balanced_multiclass():
     y_targ = np.array([0, 0, 0, 1, 1, 1, 1, 1, 2, 2])
     y_pred = np.array([0, 1, 1, 0, 1, 1, 2, 2, 2, 2])
 
-    y = accuracy_score(y_targ, y_pred, method='average')
+    y = accuracy_score(y_targ, y_pred, method='balanced')
     assert_almost_equal(y, 0.578, decimal=3)
 
 
-def test_average_binary():
+def test_balanced_binary():
     y_targ = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
     y_pred = np.array([0, 1, 1, 0, 1, 1, 1, 1, 1, 1])
 
-    y = accuracy_score(y_targ, y_pred, method='average')
+    y = accuracy_score(y_targ, y_pred, method='balanced')
     assert_almost_equal(y, 0.542, decimal=3)
+
+
+def test_average():
+    y_targ = np.array([0, 0, 0, 1, 1, 1, 1, 1, 2, 2])
+    y_pred = np.array([0, 1, 1, 0, 1, 1, 2, 2, 2, 2])
+
+    y = accuracy_score(y_targ, y_pred, method='average')
+    assert_almost_equal(y, float(2) / 3, decimal=4)
diff --git a/mlxtend/evaluate/tests/test_scoring.py b/mlxtend/evaluate/tests/test_scoring.py
@@ -115,13 +115,22 @@ def test_matthews_corr_coef():
     assert round(res, 3) == 0.258, res
 
 
+def test_balanced_accuracy():
+    y_targ = np.array([0, 0, 0, 1, 1, 1, 1, 1, 2, 2])
+    y_pred = np.array([0, 1, 1, 0, 1, 1, 2, 2, 2, 2])
+    res = scoring(y_target=y_targ,
+                  y_predicted=y_pred,
+                  metric='balanced accuracy')
+    assert round(res, 3) == 0.578, res
+
+
 def test_avg_perclass_accuracy():
     y_targ = np.array([0, 0, 0, 1, 1, 1, 1, 1, 2, 2])
     y_pred = np.array([0, 1, 1, 0, 1, 1, 2, 2, 2, 2])
     res = scoring(y_target=y_targ,
                   y_predicted=y_pred,
                   metric='average per-class accuracy')
-    assert round(res, 3) == 0.578, res
+    assert round(res, 3) == 0.667, res
 
 
 def test_avg_perclass_error():