In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
fpath = '/content/drive/MyDrive/Kd_Meshari/'

In [4]:
%cd '/content/drive/MyDrive/Kd_Meshari/'

/content/drive/MyDrive/Kd_Meshari


In [5]:
!pwd

/content/drive/MyDrive/Kd_Meshari


In [9]:
# -*- coding: utf-8 -*-
"""
Grouped metrics plot from Excel (no seaborn).

Excel columns (case-insensitive):
  Method, MSE, CI, Rm2 (or RM2), AUPR
Optional error columns:
  MSE_err / _se / _std / _stderr (same for CI, Rm2, AUPR)

Output: figures/benchmark_from_excel.png
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe

# ========= USER CONFIG =========
excel_path = "Davis_results.xlsx"   # <-- change to your file path
sheet_name = 0                # sheet index or name
out_path   = "figures/benchmark_from_Davis.png"

# Labeling mode: "stagger" (default) | "top2_only" | "inside"
ANNOTATE_MODE = "stagger"
FONT_SIZE = 9

# y-axis options
UNIFY_YLIM_0_TO_1 = True  # set False to auto-scale to data range

# Lower-is-better only for MSE
lower_is_better = {"MSE": True, "CI": False, "RM2": False, "AUPR": False}

# ========= LOAD DATA =========
def load_from_excel(path, sheet=0):
    df = pd.read_excel(path, sheet_name=sheet)
    df.columns = [str(c).strip() for c in df.columns]
    colmap = {c.lower(): c for c in df.columns}

    def require(name):
        key = name.lower()
        if key not in colmap:
            raise ValueError(f"Missing required column '{name}'. Found: {list(df.columns)}")
        return colmap[key]

    method_col = require("Method")
    # Accept Rm2 or RM2 or R_m2
    rm2_col = colmap.get("rm2") or colmap.get("r_m2")
    if rm2_col is None:
        rm2_col = require("Rm2")

    metric_cols = [require("MSE"), require("CI"), rm2_col, require("AUPR")]
    labels = ["MSE", "CI", "RM2", "AUPR"]  # display labels

    # Optional errors
    def find_err_col(base):
        candidates = [f"{base}_err", f"{base}_se", f"{base}_std", f"{base}_stderr"]
        for cand in candidates:
            # keep case as in dataframe
            if cand in df.columns:
                return cand
            # also try with same casing as metric col in file
            for c in df.columns:
                if c.lower() == cand:
                    return c
        return None

    err_cols = [find_err_col("mse"), find_err_col("ci"), find_err_col("rm2"), find_err_col("aupr")]

    use_cols = [method_col] + metric_cols + [c for c in err_cols if c]
    df = df[use_cols].copy()

    # Coerce to numeric
    for c in metric_cols + [c for c in err_cols if c]:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    # Drop rows with all metrics missing
    df = df.dropna(subset=metric_cols, how="all")

    methods = df[method_col].astype(str).tolist()
    scores = df[metric_cols].to_numpy(dtype=float)

    # Build error matrix (zeros if none)
    if any(err_cols):
        errs = np.zeros_like(scores, dtype=float)
        for j, ec in enumerate(err_cols):
            if ec is not None:
                errs[:, j] = df[ec].to_numpy(dtype=float)
    else:
        errs = np.zeros_like(scores, dtype=float)

    return methods, labels, scores, errs

methods, metrics_labels, scores, errs = load_from_excel(excel_path, sheet_name)

# ========= PLOTTING =========
def plot_grouped(methods, metrics_labels, scores, errs, out_path):
    n_methods, n_metrics = scores.shape
    x = np.arange(n_metrics)

    # Figure width scales with number of methods
    fig_w = max(12, 2.8 + 1.2 * n_methods)
    fig, ax = plt.subplots(figsize=(fig_w, 6))
    plt.rcParams.update({"figure.dpi": 120})

    # Narrower bars if many methods
    bar_w = min(0.12, 0.9 / max(n_methods, 1))
    offsets = (np.arange(n_methods) - (n_methods - 1) / 2.0) * bar_w

    # Colors (cycle if more methods)
    palette = ["#4472C4", "#ED7D31", "#A5A5A5", "#5B9BD5", "#70AD47", "#996633",
               "#1f77b4", "#8c564b", "#2ca02c", "#ff7f0e"]
    if len(palette) < n_methods:
        palette = (palette * ((n_methods + len(palette) - 1)//len(palette)))[:n_methods]

    # Best / second-best per metric
    top1_idx, top2_idx = [], []
    for j, label in enumerate(metrics_labels):
        col = scores[:, j]
        if lower_is_better[label]:
            order = np.argsort(col)              # ascending best
        else:
            order = np.argsort(-col)             # descending best
        top1_idx.append(order[0])
        top2_idx.append(order[1] if len(order) > 1 else order[0])

    # Draw bars
    for i in range(n_methods):
        ax.bar(x + offsets[i], scores[i, :], yerr=errs[i, :], width=bar_w, capsize=3,
               label=methods[i], color=palette[i], edgecolor="none")

    # Label helpers
    white_outline = [pe.withStroke(linewidth=3, foreground="white")]

    def label_color(i, j):
        return "red" if i == top1_idx[j] else ("purple" if i == top2_idx[j] else "black")

    def stagger_offset(i):
        # Adjust spacing if needed
        return 0.010 + 0.006 * (i - (n_methods - 1) / 2.0)

    # Annotate values
    for j in range(n_metrics):
        for i in range(n_methods):
            val = float(scores[i, j])
            err = float(errs[i, j]) if errs is not None else 0.0
            txt = f"{val:.3f}"
            col = label_color(i, j)

            if ANNOTATE_MODE == "inside":
                ax.text(x[j] + offsets[i], val - 0.015, txt, ha="center", va="top",
                        fontsize=FONT_SIZE, color="white",
                        path_effects=white_outline, clip_on=False)
            elif ANNOTATE_MODE == "top2_only" and i not in (top1_idx[j], top2_idx[j]):
                continue
            else:  # stagger (default)
                y = val + err + stagger_offset(i)
                ax.text(x[j] + offsets[i], y, txt, ha="center", va="bottom",
                        fontsize=FONT_SIZE, color=col,
                        path_effects=white_outline, clip_on=False)

    # Axes / grid / legend
    ax.set_xticks(x)
    ax.set_xticklabels(metrics_labels, fontsize=12)
    ax.set_ylabel("Score", fontsize=12)
    ax.yaxis.grid(True, linestyle="--", alpha=0.35)
    ax.set_axisbelow(True)

    if UNIFY_YLIM_0_TO_1:
        ax.set_ylim(0, 1.0)
    else:
        ymax = float(np.nanmax(scores + errs)) if np.isfinite(scores + errs).all() else float(np.nanmax(scores))
        ax.set_ylim(0, ymax * 1.12)

    ax.legend(frameon=False, bbox_to_anchor=(1.02, 1), loc="upper left")
    plt.tight_layout()

    os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
    fig.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved: {out_path}")

plot_grouped(methods, metrics_labels, scores, errs, out_path)


Saved: figures/benchmark_from_Davis.png
