Added metrics rank and nrank to benchmark pipeline

saezlab · Nov 23, 2023 · 17f6f9b · 17f6f9b
1 parent 9da10c9
commit 17f6f9b
Show file tree

Hide file tree

Showing 7 changed files with 103 additions and 56 deletions.
diff --git a/decoupler/__init__.py b/decoupler/__init__.py
@@ -29,4 +29,4 @@
 from .plotting import plot_associations, plot_network  # noqa: F401
 from .benchmark import benchmark, format_benchmark_inputs, get_performances  # noqa: F401
 from .utils_benchmark import get_toy_benchmark_data, show_metrics  # noqa: F401
-from .metrics import metric_auroc, metric_auprc, metric_mcauroc, metric_mcauprc  # noqa: F401
+from .metrics import metric_auroc, metric_auprc, metric_mcauroc, metric_mcauprc, metric_rank, metric_nrank  # noqa: F401
diff --git a/decoupler/benchmark.py b/decoupler/benchmark.py
@@ -139,7 +139,7 @@ def _benchmark(mat, obs, net, perturb, sign, metrics=['auroc', 'auprc'], groupby
     return df
 
 
-def benchmark(mat, obs, net, perturb, sign, metrics=['auroc', 'auprc', 'mcauroc', 'mcauprc'], groupby=None,
+def benchmark(mat, obs, net, perturb, sign, metrics=['auroc', 'auprc', 'mcauroc', 'mcauprc', 'rank', 'nrank'], groupby=None,
               by='experiment', f_expr=True, f_srcs=False, min_exp=5, pi0=0.5, n_iter=1000, seed=42,
               verbose=True, use_raw=True, decouple_kws={}):
     """
@@ -160,8 +160,7 @@ def benchmark(mat, obs, net, perturb, sign, metrics=['auroc', 'auprc', 'mcauroc'
         Column name in obs with sign of the perturbation. Can be set to 1 or -1 if all experiments are overexpression or
         knockouts, respectively.
     metrics : list, str
-        Performance metric(s) to compute. See the description of get_performance for more details. Defaults
-        to ['roc', 'calprc'].
+        Performance metric(s) to compute. See the description of get_performance for more details.
     groupby : list, str, None
         Performance metrics(s) can be computed per groups if enough experiments are available.
     by : str

diff --git a/decoupler/metrics.py b/decoupler/metrics.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import numba as nb
+from scipy.stats import rankdata
 
 
 @nb.njit(nb.types.UniTuple(nb.f4[:], 3)(nb.f4[:], nb.f4[:]), cache=True)
@@ -129,6 +130,30 @@ def check_m_inputs(y_true, y_score):
     assert y_true.size == y_score.size, 'y_true and y_score must have the same length.'
 
 
+def metric_rank(y_true, y_score):
+    """
+    Rank (from 1 to N)
+    """
+
+    check_m_inputs(y_true, y_score)
+
+    return rankdata(-y_score, axis=1, nan_policy='omit')[y_true.astype(bool)]
+
+
+def metric_nrank(y_true, y_score):
+    """
+    Min-max normalized rank (from 0 to 1)
+    """
+
+    check_m_inputs(y_true, y_score)
+
+    rnks = rankdata(-y_score, axis=1, nan_policy='omit')
+    mins = np.min(rnks, axis=1)
+    maxs = np.max(rnks, axis=1)
+    nrnks = (rnks - mins.reshape(-1, 1)) / (maxs - mins).reshape(-1, 1)
+    return nrnks[y_true.astype(bool)]
+
+
 def metric_auroc(y_true, y_score):
     """
     Area Under the Receiver Operating characteristic Curve (AUROC)

diff --git a/decoupler/plotting.py b/decoupler/plotting.py
@@ -811,7 +811,7 @@ def plot_metrics_boxplot(df, metric, groupby=None, figsize=(5, 5), dpi=100, ax=N
     df : DataFrame
         Performance metrics per method, obtained by running run_benchmark.
     metric : str
-        Name of metric to plot, must be either "mcauroc" or "mcauprc".
+        Name of metric to plot, must be either "mcauroc", "mcauprc", "rank" or "nrank".
     groupby : str
         Metrics can be gruped by an extra categorical column.
     figsize : tuple
@@ -843,7 +843,7 @@ def plot_metrics_boxplot(df, metric, groupby=None, figsize=(5, 5), dpi=100, ax=N
     sns = check_if_seaborn()
     plt = check_if_matplotlib()
 
-    if metric not in ['mcauroc', 'mcauprc']:
+    if metric not in ['mcauroc', 'mcauprc', 'rank', 'nrank']:
         raise ValueError('Argument metric must be either "mcauroc" or "mcauprc".')
 
     # Subset metric

diff --git a/decoupler/tests/test_metrics.py b/decoupler/tests/test_metrics.py
@@ -3,6 +3,7 @@
 from sklearn.metrics import roc_auc_score, average_precision_score
 from sklearn.metrics._ranking import _binary_clf_curve
 from ..metrics import check_m_inputs, binary_clf_curve, mc_perm, metric_auroc, metric_auprc, metric_mcauroc, metric_mcauprc
+from ..metrics import metric_rank, metric_nrank
 
 
 def test_check_m_inputs():
@@ -92,3 +93,21 @@ def test_metric_mcauprc():
     a = metric_auprc(y_true=grt, y_score=act, pi0=0.5)
     b = metric_mcauprc(y_true=grt, y_score=act)
     assert np.isclose(a, np.mean(b), rtol=1e-01)
+
+
+def test_metric_rank():
+    act = np.array([[7, 6, 5], [5, 4, 3], [2, 1, 0]])
+    grt = np.array([[1, 0, 0], [0, 1, 1], [0, 0, 1]])
+
+    a = metric_rank(y_true=grt, y_score=act)
+    assert a.size == 4
+    assert np.all(a == np.array([1., 2., 3., 3.]))
+
+
+def test_metric_nrank():
+    act = np.array([[7, 6, 5], [5, 4, 3], [2, 1, 0]])
+    grt = np.array([[1, 0, 0], [0, 1, 1], [0, 0, 1]])
+
+    a = metric_nrank(y_true=grt, y_score=act)
+    assert a.size == 4
+    assert np.all(a == np.array([0., 0.5, 1., 1.]))
diff --git a/decoupler/tests/test_utilsbenchmark.py b/decoupler/tests/test_utilsbenchmark.py
@@ -34,27 +34,33 @@ def test_show_metrics():
 def test_validate_metrics():
     metrics = 'auroc'
     validate_metrics(metrics)
-    metrics = ['auroc', 'auprc', 'mcauroc', 'mcauprc']
+    metrics = ['auroc', 'auprc', 'mcauroc', 'mcauprc', 'rank', 'nrank']
     validate_metrics(metrics)
     metrics = ['auroc', 'asd', 'mcauroc', 'mcauprc']
     with pytest.raises(ValueError):
         validate_metrics(metrics)
 
 
 def test_compute_metric():
-    act = [6., 5., 4., 3., 2., 1., 0.]
-    grt = [1., 0., 1., 1., 0., 0., 0.]
+    act = np.array([[6., 5., 4., 0.], [3., 2., 1., 0.]])
+    grt = np.array([[0., 1., 0., 0.], [1., 0., 0., 0.]])
     metric = 'auroc'
-    res = compute_metric(act, grt, metric)
+    res, ci = compute_metric(act, grt, metric)
     assert type(res) is np.ndarray
     metric = 'auprc'
-    res = compute_metric(act, grt, metric)
+    res, ci = compute_metric(act, grt, metric)
     assert type(res) is np.ndarray
     metric = 'mcauroc'
-    res = compute_metric(act, grt, metric)
+    res, ci = compute_metric(act, grt, metric)
     assert type(res) is np.ndarray
     metric = 'mcauprc'
-    res = compute_metric(act, grt, metric)
+    res, ci = compute_metric(act, grt, metric)
+    assert type(res) is np.ndarray
+    metric = 'rank'
+    res, ci = compute_metric(act, grt, metric)
+    assert type(res) is np.ndarray
+    metric = 'nrank'
+    res, ci = compute_metric(act, grt, metric)
     assert type(res) is np.ndarray
 
 

diff --git a/decoupler/utils_benchmark.py b/decoupler/utils_benchmark.py
@@ -10,7 +10,7 @@
 
 from .utils import get_toy_data
 from .pre import match
-from .metrics import metric_auroc, metric_auprc, metric_mcauroc, metric_mcauprc
+from .metrics import metric_auroc, metric_auprc, metric_mcauroc, metric_mcauprc, metric_rank, metric_nrank
 
 
 def get_toy_benchmark_data(n_samples=24, seed=42, shuffle_perc=0.25):
@@ -101,77 +101,75 @@ def validate_metrics(metrics):
 
 def compute_metric(act, grt, metric, pi0=0.5, n_iter=1000, seed=42):
 
-    if metric == 'auroc':
-        scores = metric_auroc(grt, act)
-    elif metric == 'auprc':
-        scores = metric_auprc(grt, act, pi0=pi0)
-    elif metric == 'mcauroc':
-        scores = metric_mcauroc(grt, act, n_iter=n_iter, seed=seed)
-    elif metric == 'mcauprc':
-        scores = metric_mcauprc(grt, act, n_iter=n_iter, seed=seed)
+    if metric == 'rank':
+        scores = metric_rank(grt, act)
+        ci = np.nan
+    elif metric == 'nrank':
+        scores = metric_nrank(grt, act)
+        ci = np.nan
+    else:
+        # Flatten across obs
+        act = act.ravel()
+        grt = grt.ravel()
+        # Identify activity scores with NAs
+        nan_mask = np.isnan(act)
+        # Remove NAs from activity matrix and ground truth
+        act = act[~nan_mask]
+        grt = grt[~nan_mask]
+        # Compute Class Imbalance
+        ci = np.sum(grt) / len(grt)
+        if metric == 'auroc':
+            scores = metric_auroc(grt, act)
+        elif metric == 'auprc':
+            scores = metric_auprc(grt, act, pi0=pi0)
+        elif metric == 'mcauroc':
+            scores = metric_mcauroc(grt, act, n_iter=n_iter, seed=seed)
+        elif metric == 'mcauprc':
+            scores = metric_mcauprc(grt, act, n_iter=n_iter, seed=seed)
 
     # Output must be list
     if type(scores) is not np.ndarray:
         scores = np.array([scores])
 
-    return scores
+    return scores, ci
 
 
 def append_by_experiment(df, grpby_i, grp, act, grt, srcs, mthds, metrics, min_exp=5, pi0=0.5,
                          n_iter=1000, seed=42):
-
-    # Flatten act by method
-    act, grt = act.reshape(-1, act.shape[-1]).T, grt.flatten()
-
     # Compute per method and metric
     for m in range(len(mthds)):
         mth = mthds[m]
+        act_i = act[:, :, m]
+        # Compute metrics
         for metric in metrics:
-            # identify activity scores with NAs in each method
-            act_i = act[m]
-            nan_mask = np.isnan(act_i)
-            # Remove NAs from activity matrix and ground truth
-            act_i = act_i[~nan_mask]
-            grt_i = grt[~nan_mask]
-            # Compute Class Imbalance
-            ci = np.sum(grt_i) / len(grt_i)
-            # Compute metrics
-            scores = compute_metric(act_i, grt_i, metric, pi0=pi0, n_iter=n_iter, seed=seed)
+            scores, ci = compute_metric(act_i, grt, metric, pi0=pi0, n_iter=n_iter, seed=seed)
             for score in scores:
                 row = [grpby_i, grp, None, mth, metric, score, ci]
                 df.append(row)
 
 
 def append_by_source(df, grpby_i, grp, act, grt, srcs, mthds, metrics, min_exp=5, pi0=0.5,
                      n_iter=1000, seed=42):
-
     for m in range(len(mthds)):
+        # Extract per method
         mth = mthds[m]
         act_i = act[:, :, m]
-        nan_mask = np.isnan(act_i)
-
-        grt_i = grt.copy()
-        grt_i[nan_mask] = np.nan
-
         # Remove sources with less than min_exp
-        src_msk = np.sum(grt_i > 0., axis=0) >= min_exp
-        act_i, grt_i = act[:, src_msk, :], grt_i[:, src_msk]
+        src_msk = np.sum(grt > 0., axis=0) >= min_exp
+        act_i, grt_i = act[:, src_msk, :], grt[:, src_msk]
         srcs_method = srcs[src_msk]
-
         # Compute per source, method and metric
         for s in range(len(srcs_method)):
             src = srcs_method[s]
-            tmp_grt = grt_i[:, s]
-            nan_mask = np.isnan(tmp_grt)
-
-            grt_source = tmp_grt[~nan_mask]
-            act_source = act_i[:, s, m][~nan_mask]
-
-            # Compute Class Imbalance
-            ci = np.sum(grt_source) / len(grt_source)
-            if ci != 0. and ci != 1.:
+            grt_source = grt_i[:, s]
+            act_source = act_i[:, s, m]
+            # Check that grt is not all the same
+            unq_grt = np.unique(grt_source[~np.isnan(act_source)])
+            # Convert from vector to arr
+            grt_source, act_source = grt_source[np.newaxis], act_source[np.newaxis]
+            if unq_grt.size > 1:
                 for metric in metrics:
-                    scores = compute_metric(act_source, grt_source, metric, pi0=pi0, n_iter=n_iter, seed=seed)
+                    scores, ci = compute_metric(act_source, grt_source, metric, pi0=pi0, n_iter=n_iter, seed=seed)
                     for score in scores:
                         row = [grpby_i, grp, src, mth, metric, score, ci]
                         df.append(row)