In [None]:
%matplotlib widget

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import mplcursors

In [None]:
import json
import re

def norm_min_max(df: pd.DataFrame, col: str):
    values = df[col]
    return (values - values.min()) / (values.max() - values.min())


def load_task_and_preprocess(results_file, task_name):

    with open(results_file, "r") as f:
        lines = f.readlines()

    list_json = []
    for l in lines:
        list_json.append(json.loads(l))
    df = pd.DataFrame(list_json)

    acc_keys = {"livecodebench": "acc", "code2text_python": "smoothed_bleu_4,create_output"}
    df = df[df["task_name"] == task_name]
    df["params"] = df["model"].apply(lambda x: re.findall(r"(\d+(?:\.\d+)?[bBmM])", x.upper())[0])
    df["acc_values"] = df["acc_values"].apply(lambda x: x[acc_keys[task_name]])
    df = df.reset_index()
    df["energy_norm"] = norm_min_max(df, "energy_consumed")
    df["ene_eff"] = 1 - df["energy_norm"]
    df["perf"] = norm_min_max(df, "acc_values")
    df = df[["model", "params", "task_name", "acc_values", "perf", "energy_consumed", "ene_eff"]]
    return df

df_lcb = load_task_and_preprocess("../../lm_eval/results/final_results.jsonl", "livecodebench")
df_c2t = load_task_and_preprocess("../../lm_eval/results/final_results.jsonl", "code2text_python")

In [None]:
import numpy as np
from matplotlib.colors import ListedColormap, BoundaryNorm, LinearSegmentedColormap

def point_creation():
    n = 1600  # resolution
    x = np.linspace(-0.1, 1.1, n)
    y = np.linspace(-0.1, 1.1, n)
    X, Y = np.meshgrid(x, y)
    return X, Y

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, BoundaryNorm
from mpl_toolkits.axes_grid1.inset_locator import inset_axes  # optional
from matplotlib import cm
import matplotlib.patheffects as pe




def gradient_labeling_two_side(classes_left, classes_right, df_left, df_right,
                               filename, title_left="LiveCodeBench", title_right="CodeXGLUE", curve_plot=None, plot_title=None):
    # discrete, print-friendly cmap
    cmap = cm.get_cmap("YlGn", 5)
    norm = BoundaryNorm(np.arange(-0.5, 5.5, 1), cmap.N)

    # side-by-side, shared y; tighter gap + room for bottom colorbar
    fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(11.2, 7.8))
    fig.subplots_adjust(left=0.09, right=0.99, bottom=0.28, wspace=0.08)

    if curve_plot is not None:
        x_lcb, y_lcb, x_c2t, y_c2t = curve_plot
        ax1.plot(x_lcb, y_lcb, c="#123455", linewidth=2.4, linestyle="--", label="Fitted Curve")
        ax1.legend()
        if x_c2t is not None:
            ax2.plot(x_c2t, y_c2t, c="#123455", linewidth=2.4, linestyle="--", label="Fitted Curve")
            ax2.legend()
    # background fields
    im1 = ax1.imshow(classes_left,  origin="lower", extent=[-0.1, 1.1, -0.1, 1.1],
                     cmap=cmap, norm=norm, interpolation="nearest", rasterized=True)
    im2 = ax2.imshow(classes_right, origin="lower", extent=[-0.1, 1.1, -0.1, 1.1],
                     cmap=cmap, norm=norm, interpolation="nearest", rasterized=True)

    # --- halo markers: white ring + dark core (high contrast everywhere)
    def halo_scatter(ax, x, y):
        ax.scatter(x, y, s=60, c="#0B2578", marker="o", linewidths=0, zorder=4)     # halo
        sc = ax.scatter(x, y, s=24, c="#1a1a1a", marker="o",
                   edgecolors="white", linewidths=0.7, zorder=5)                   # core
        return sc

    first_scatter = halo_scatter(ax1, df_left["ene_eff"],  df_left["perf"])
    second_scatter = halo_scatter(ax2, df_right["ene_eff"], df_right["perf"])

    # labels, limits, titles
    for ax in (ax1, ax2):
        ax.set_xlim(-0.05, 1.05); ax.set_ylim(-0.05, 1.05)
        ax.set_xlabel("Energy Efficiency")
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
    ax1.set_ylabel("Accuracy")
    ax1.set_title(title_left)
    ax2.set_title(title_right)

    # short horizontal colorbar under both plots
    labels = ["Very Weak", "Weak", "Moderate", "Strong", "Very Strong"]
    cbar = fig.colorbar(im1, ax=[ax1, ax2], orientation="horizontal",
                        ticks=range(5), pad=0.14, shrink=0.7, fraction=0.18)
    cbar.ax.set_xticklabels(labels, fontsize=9)
    def annotate_params(ax, x, y, params):
        for xi, yi, pi in zip(x, y, params):
            ax.annotate(pi, (xi, yi),
                        xytext=(5, 4), textcoords="offset points",
                        fontsize=5, color="black", zorder=7,
                        path_effects=[pe.withStroke(linewidth=2.2, foreground="white")])
    annotate_params(ax1, df_left["ene_eff"],  df_left["perf"],  df_left["params"])
    annotate_params(ax2, df_right["ene_eff"], df_right["perf"], df_right["params"])
    
    if plot_title is not None:
        fig.suptitle(plot_title, fontsize=16, y=0.98)
    plt.savefig(f"{filename}.pdf", bbox_inches="tight", dpi=400)
    return first_scatter, second_scatter




In [None]:
import numpy as np
from matplotlib.colors import ListedColormap, BoundaryNorm, LinearSegmentedColormap


def calculate_euc_formula(df):
    df["distance"] = ((1 - df["perf"]) ** 2 + (1 - df["ene_eff"]) ** 2) ** 0.5
    df["distance_rank"] = 0

def fill_distance_ranking(df):
    # Circle parameters
    radiuses = np.linspace(0, np.sqrt(2), 6)
    selected_so_far = set()
    curr_rank = 5
    for r in radiuses:
        if r == 0:
            continue
        # Generate points on the circle
        theta = np.linspace(0, 2 * np.pi, 100)
        x = center[0] + r * np.cos(theta)
        y = center[1] + r * np.sin(theta)
        less_than_r = df[df["distance"] < r].index.to_list()
        new_points = set(less_than_r).difference(selected_so_far)
        selected_so_far.update(less_than_r)
        if new_points:
            df.loc[list(new_points), "distance_rank"] = curr_rank
        else:
            # plt.scatter([], [])
            pass
        curr_rank -= 1

def distance_base_class_calc():
    X, Y = point_creation()
    val = ((X - center[0]) ** 2 + (Y - center[1]) ** 2) ** 0.5
    classes = np.ceil((val) / (np.sqrt(2) / 5))
    classes[classes <= 0] = 1
    classes[classes > 5] = 5
    classes -= 1
    classes -= 4
    classes = np.abs(classes)

    return classes



def distance_based_computation(df_lcb, df_c2t):
    calculate_euc_formula(df_lcb)
    calculate_euc_formula(df_c2t)

    fill_distance_ranking(df_lcb)
    fill_distance_ranking(df_c2t)

    classes = distance_base_class_calc()
    
    gradient_labeling_two_side(classes, classes, df_lcb, df_c2t, filename="distance_based")


center = (1, 1)
distance_based_computation(df_lcb, df_c2t)


In [None]:
from sklearn.covariance import MinCovDet
from scipy.stats import chi2

def remove_outliers(df):

    X = df[["ene_eff", "perf"]].to_numpy()
    mcd = MinCovDet().fit(X)
    D2 = mcd.mahalanobis(X)
    cut = chi2.ppf(0.95, df=2)

    outliers = D2 > cut
    inliers = ~outliers
    X_clean = X[inliers]
    plt.figure(figsize=(5, 2))
    plt.scatter(X_clean[:, 0], X_clean[:, 1])
    plt.scatter(X[:, 0], X[:, 1], marker="x", s=12)
    return X_clean

X_clean_lcb = remove_outliers(df_lcb)
X_clean_c2t = remove_outliers(df_c2t)

In [None]:
def create_all_possible_derivatives(ene_eff, acc):
    derivatives = []
    for i in range(len(ene_eff)):
        i_x = ene_eff[i]
        i_y = acc[i]
        for j in range(i+1, len(ene_eff)):
            new_x = ene_eff[j]
            new_y = acc[j]
            if (new_x < i_x and new_y > i_y) or (new_x > i_x and new_y < i_y):
                derivatives.append(-abs((new_y - i_y) / (new_x - i_x)))
    return derivatives
all_possible_derivates_lcb = create_all_possible_derivatives(df_lcb["ene_eff"], df_lcb["perf"])
all_possible_derivates_c2t = create_all_possible_derivatives(df_c2t["ene_eff"], df_c2t["perf"])

import numpy as np
from sklearn.covariance import MinCovDet
from scipy.stats import chi2

def remove_derivative_outliers(all_possible_derivates):

    deriv = np.array(all_possible_derivates)
    mcd = MinCovDet().fit(deriv.reshape(-1, 1))
    # squared Mahalanobis distances under robust location/covariance:
    d2 = mcd.mahalanobis(deriv.reshape(-1, 1))

    thr = chi2.ppf(0.95, df=1)

    deriv_inliers_all = deriv[d2 <= thr]
    deriv_outliers_all = deriv[d2 > thr]
    return deriv_inliers_all

deriv_inliers_all_lcb = remove_derivative_outliers(all_possible_derivates_lcb)
deriv_inliers_all_c2t = remove_derivative_outliers(all_possible_derivates_c2t)

In [None]:
import numpy as np
import cvxpy as cp
import matplotlib.pyplot as plt

def approximate_regression_function(df, X_clean, task_name, deriv_inliers_all):

    x_raw, y = X_clean[:, 0], X_clean[:, 1]

    degree_dict = {"livecodebench": 5, "code2text_python": 5}

    d = degree_dict[task_name]
    b = cp.Variable(d + 1)

    # Least-squares objective
    x_transformed = np.vander(x_raw, N=d + 1, increasing=True)
    objective = cp.Minimize(cp.sum_squares(x_transformed @ b - y))

    # Enforce f′(z) ≤ 0 on a grid
    z = np.linspace(0, 1, 50)
    D = np.zeros((len(z), d + 1))
    for j, zj in enumerate(z):
        for k in range(1, d + 1):
            D[j, k] = k * zj ** (k - 1)
        
    constraints = {
        "livecodebench": [D @ b <= np.percentile(deriv_inliers_all, 75), cp.sum(b) >= 1e-2],
        "code2text_python": [D @ b <= np.percentile(deriv_inliers_all, 75), cp.sum(b) >= 1e-2],
    }
    # the second constraint is for ensuring that plot lies above 0
    prob = cp.Problem(objective, constraints[task_name])
    prob.solve()

    # below this for plotting
    X_plot = np.linspace(-0.1, 1.1, 100).reshape(-1, 1)
    X_grid = np.vander(X_plot.flatten(), N=d + 1, increasing=True)
    y_grid = X_grid @ b.value

    plt.figure(figsize=(5, 3))
    plt.scatter(X_clean[:, 0], X_clean[:, 1], s=30)
    plt.scatter(df["ene_eff"], df["perf"], c="red", marker="x", s=8)
    plt.plot(X_plot, y_grid, label="monotone ↓")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.title("Smooth Monotonic-Decreasing Regression")
    plt.legend()
    plt.show()
    return b, d

coefficients_lcb, degree_lcb = approximate_regression_function(df_lcb, X_clean_lcb, "livecodebench", deriv_inliers_all_lcb)
coefficients_c2t, degree_c2t = approximate_regression_function(df_c2t, X_clean_c2t, "code2text_python", deriv_inliers_all_c2t)

In [None]:
def regression_rank(df, b, d):
    df["predicted_perf"] = np.vander(df["ene_eff"], N=d + 1, increasing=True) @ b.value
    df["score"] = df["perf"] / df["predicted_perf"]
    min_score, max_score = df["score"].min(), df["score"].max()
    five_intervals = (max_score - min_score) / 5
    df["regression_rank"] = np.ceil((df["score"] - min_score) / five_intervals)
    df.loc[df["regression_rank"] == 0, "regression_rank"] = 1
    df["regression_rank"] = df["regression_rank"].astype(int)
    return min_score, five_intervals


min_score_lcb, five_interval_lcb = regression_rank(df_lcb, coefficients_lcb, degree_lcb)
min_score_c2t, five_interval_c2t = regression_rank(df_c2t, coefficients_c2t, degree_c2t)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm, LinearSegmentedColormap

def draw_curve(d, b):
    X_plot = np.linspace(-0.1, 1.1, 100).reshape(-1, 1)
    X_grid = np.vander(X_plot.flatten(), N=d + 1, increasing=True)
    y_grid = X_grid @ b.value
    return X_plot, y_grid

def regression_class_computation(d, b, min_score, five_intervals):
    X, Y = point_creation()
    deg = np.arange(d + 1)  # 0..d
    # any real-valued function
    val = Y / ((X[..., None] ** deg) @ b.value)
    # ----- 3) bin into at most 5 classes -----
    classes = np.ceil((val - min_score) / five_intervals)
    classes[classes <= 0] = 1
    classes[classes > 5] = 5
    classes -= 1
    return classes


def regression_computation(
    df_lcb,
    coefficients_lcb,
    min_score_lcb,
    five_interval_lcb,
    degree_lcb,
    df_c2t,
    coefficients_c2t,
    min_score_c2t,
    five_interval_c2t,
    degree_c2t,
):
    lcb_classes = regression_class_computation(degree_lcb, coefficients_lcb, min_score_lcb, five_interval_lcb)
    c2t_classes = regression_class_computation(degree_c2t, coefficients_c2t, min_score_c2t, five_interval_c2t)

    x_lcb, y_lcb = draw_curve(degree_lcb, coefficients_lcb)
    x_c2t, y_c2t = draw_curve(degree_c2t, coefficients_c2t)
    sc1, sc2 = gradient_labeling_two_side(
        lcb_classes,
        c2t_classes,
        df_lcb,
        df_c2t,
        "regression_curve",
        curve_plot=(x_lcb, y_lcb, x_c2t, y_c2t),
    )
    return sc1, sc2


sc1, sc2 = regression_computation(df_lcb,
    coefficients_lcb,
    min_score_lcb,
    five_interval_lcb,
    degree_lcb,
    df_c2t,
    coefficients_c2t,
    min_score_c2t,
    five_interval_c2t,
    degree_c2t)


def regression_on_hover(df):
    def on_add(sel):
        idx = sel.index  # index of the point
        sample = df.iloc[idx]
        annotation_text = f"model name: {sample['model'].split('/')[1]}\nactual performance: {sample['perf']:.2f}\nexpected performance: {sample['predicted_perf']:.2f}\nscore: {(sample['score']):.2f}\nregression_rank: {sample['regression_rank']}"
        sel.annotation.set_text(annotation_text)
        bbox = sel.annotation.get_bbox_patch()
        bbox.set_facecolor("blue")
        bbox.set_alpha(0.9)  # make it more opaque
        bbox.set_boxstyle("round,pad=0.5")

        # set the annotation’s text color directly
        sel.annotation.set_color("white")
        # if you want to adjust font size:
        sel.annotation.set_fontsize(8)
    return on_add

cursor1 = mplcursors.cursor(sc1, hover=True)
cursor2 = mplcursors.cursor(sc2, hover=True)
cursor1.connect("add", regression_on_hover(df_lcb))
cursor2.connect("add", regression_on_hover(df_c2t))

# plt.show()

In [None]:
def metric_comparison(degree, coefficients, min_score, five_interval, df, file_name, plot_title):
    regression_classes = regression_class_computation(degree, coefficients, min_score, five_interval)
    distance_classes = distance_base_class_calc()

    x_lcb, y_lcb = draw_curve(degree, coefficients)
    sc1, sc2 = gradient_labeling_two_side(
        regression_classes,
        distance_classes,
        df,
        df,
        file_name,
        curve_plot=(x_lcb, y_lcb, None, None),
        title_left="OTE",
        title_right="COC",
        plot_title=plot_title
    )
    return sc1, sc2

metric_comparison(degree_lcb, coefficients_lcb, min_score_lcb, five_interval_lcb, df_lcb, "lcb_metric_comparison", "LiveCodeBench")

In [None]:
metric_comparison(degree_c2t, coefficients_c2t, min_score_c2t, five_interval_c2t, df_c2t, "c2t_metric_comparison", "CodeXGLUE")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import BoundaryNorm
import matplotlib.patheffects as pe  # ensure this import exists for annotations

def gradient_labeling_2x2(classes_tl, classes_tr, classes_bl, classes_br,
                          df_tl, df_tr, df_bl, df_br,
                          filename,
                          title_tl="LiveCodeBench", title_tr="CodeXGLUE",
                          title_bl="Dataset C", title_br="Dataset D",
                          curve_plot=None, plot_title=None):
    
    fontsize_annotation = 9
    fontsize_label = 16
    # --- colormap (unchanged) ---

    cmap = cm.get_cmap("YlGn", 5)
    norm = BoundaryNorm(np.arange(-0.5, 5.5, 1), cmap.N)

    # --- 2x2 grid, shared axes; leave room for bottom colorbar ---
    fig, axs = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(9, 9), layout="compressed")
    # fig.get_layout_engine().set(w_pad=4 / 72, h_pad=4 / 72, hspace=0,
    #                         wspace=0)

    ax_tl, ax_tr = axs[0, 0], axs[0, 1]
    ax_bl, ax_br = axs[1, 0], axs[1, 1]

    # --- optional fitted curves ---
    # If provided for all four: (x_tl, y_tl, x_tr, y_tr, x_bl, y_bl, x_br, y_br)
    # If provided for top row only: (x_tl, y_tl, x_tr, y_tr)
    if curve_plot is not None:
        try:
            x_tl, y_tl, x_tr, y_tr, x_bl, y_bl, x_br, y_br = curve_plot
            for ax, (x, y) in zip([ax_tl, ax_tr, ax_bl, ax_br],
                                  [(x_tl, y_tl), (x_tr, y_tr), (x_bl, y_bl), (x_br, y_br)]):
                ax.plot(x, y, c="#123455", linewidth=2.4, linestyle="--", label="Fitted Curve")
                ax.legend()
        except ValueError:
            x_tl, y_tl, x_tr, y_tr = curve_plot
            for ax, (x, y) in zip([ax_tl, ax_tr], [(x_tl, y_tl), (x_tr, y_tr)]):
                ax.plot(x, y, c="#123455", linewidth=2.4, linestyle="--", label="Fitted Curve")
                ax.legend()

    # --- backgrounds (all share SAME cmap/norm) ---
    im_tl = ax_tl.imshow(classes_tl, origin="lower", extent=[-0.1, 1.1, -0.1, 1.1],
                         cmap=cmap, norm=norm, interpolation="nearest", rasterized=True)
    ax_tr.imshow(classes_tr, origin="lower", extent=[-0.1, 1.1, -0.1, 1.1],
                 cmap=cmap, norm=norm, interpolation="nearest", rasterized=True)
    ax_bl.imshow(classes_bl, origin="lower", extent=[-0.1, 1.1, -0.1, 1.1],
                 cmap=cmap, norm=norm, interpolation="nearest", rasterized=True)
    ax_br.imshow(classes_br, origin="lower", extent=[-0.1, 1.1, -0.1, 1.1],
                 cmap=cmap, norm=norm, interpolation="nearest", rasterized=True)

    # --- halo markers (unchanged style) ---
    def halo_scatter(ax, x, y):
        ax.scatter(x, y, s=60, c="#0B2578", marker="o", linewidths=0, zorder=4)     # halo
        return ax.scatter(x, y, s=24, c="#1a1a1a", marker="o",
                          edgecolors="white", linewidths=0.7, zorder=5)

    sc_tl = halo_scatter(ax_tl, df_tl["ene_eff"], df_tl["perf"])
    sc_tr = halo_scatter(ax_tr, df_tr["ene_eff"], df_tr["perf"])
    sc_bl = halo_scatter(ax_bl, df_bl["ene_eff"], df_bl["perf"])
    sc_br = halo_scatter(ax_br, df_br["ene_eff"], df_br["perf"])



    # --- limits, spines, titles ---
    for ax in (ax_tl, ax_tr, ax_bl, ax_br):
        ax.set_xlim(-0.05, 1.05); ax.set_ylim(-0.05, 1.05)
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        # clear labels; we'll set only for bottom-left
        ax.set_xlabel("")
        ax.set_ylabel("")

    for ax in (ax_tl, ax_tr, ax_bl, ax_br):
        ax.xaxis.set_ticks(np.linspace(0, 1, 6))  # 6 evenly spaced ticks from 0 to 1
        ax.yaxis.set_ticks(np.linspace(0, 1, 6))  # match y-axis ticks if you want symmetry
        ax.tick_params(axis='both', labelsize=15)


    ax_tl.set_title(title_tl, fontsize=fontsize_label, fontweight="bold")
    ax_tr.set_title(title_tr, fontsize=fontsize_label, fontweight="bold")
    ax_bl.set_title(title_bl, fontsize=fontsize_label, fontweight="bold")
    ax_br.set_title(title_br, fontsize=fontsize_label, fontweight="bold")

    ax_bl.set_xlabel("Energy Efficiency", fontsize=fontsize_label)
    ax_bl.set_ylabel("Accuracy", fontsize=fontsize_label)
    ax_br.set_xlabel("Energy Efficiency", fontsize=fontsize_label)
    ax_tl.set_ylabel("Accuracy", fontsize=fontsize_label)

    

    # --- parameter annotations (unchanged) ---
    def annotate_params(ax, x, y, params):
        for xi, yi, pi in zip(x, y, params):
            ax.annotate(pi, (xi, yi),
                        xytext=(5, 4), textcoords="offset points",
                        fontsize=fontsize_annotation, color="black", zorder=7,
                        path_effects=[pe.withStroke(linewidth=2.2, foreground="white")])


    annotate_params(ax_tl, df_tl["ene_eff"], df_tl["perf"], [str(i+1) for i in range(len(df_tl))])
    annotate_params(ax_tr, df_tr["ene_eff"], df_tr["perf"], [str(i+1) for i in range(len(df_tl))])
    annotate_params(ax_bl, df_bl["ene_eff"], df_bl["perf"], [str(i+1) for i in range(len(df_tl))])
    annotate_params(ax_br, df_br["ene_eff"], df_br["perf"], [str(i+1) for i in range(len(df_tl))])

    # --- single shared horizontal colorbar (same look as before) ---
    labels = ["Weakest (1)", "Weak (2)", "Moderate (3)", "Strong (4)", "Strongest (5)"]
    cbar = fig.colorbar(im_tl, ax=axs.ravel(), orientation="horizontal",
                        ticks=range(5), pad=0.03)
    
    cbar.ax.set_xticklabels(labels, fontsize=13.5, fontweight="bold")

    # for ax in axs.ravel():
    #     ax.set_aspect('equal')
    # plt.tight_layout()
    

    if plot_title is not None:
        fig.suptitle(plot_title, fontsize=16, y=0.98)

    plt.savefig(f"{filename}.pdf", bbox_inches="tight", dpi=400)
    return sc_tl, sc_tr, sc_bl, sc_br


def metric_comparison(degree, coefficients, min_score, five_interval, df, file_name, plot_title):
    regression_classes = regression_class_computation(degree, coefficients, min_score, five_interval)
    distance_classes = distance_base_class_calc()

    x_lcb, y_lcb = draw_curve(degree, coefficients)
    sc1, sc2 = gradient_labeling_two_side(
        regression_classes,
        distance_classes,
        df,
        df,
        file_name,
        curve_plot=(x_lcb, y_lcb, None, None),
        title_left="OTE",
        title_right="COC",
        plot_title=plot_title
    )
    return sc1, sc2
regression_classes_lcb = regression_class_computation(degree_lcb, coefficients_lcb, min_score_lcb, five_interval_lcb)
x_lcb, y_lcb = draw_curve(degree_lcb, coefficients_lcb)
regression_classes_c2t = regression_class_computation(degree_c2t, coefficients_c2t, min_score_c2t, five_interval_c2t)
x_c2t, y_c2t = draw_curve(degree_c2t, coefficients_c2t)
distance_classes = distance_base_class_calc()

gradient_labeling_2x2(regression_classes_lcb, regression_classes_c2t, distance_classes, distance_classes, df_lcb, df_c2t, df_lcb, df_c2t, "all_in_one", curve_plot=(x_lcb, y_lcb, x_c2t, y_c2t), title_bl="LiveCodeBench-CIRC", title_br="CodeXGLUE-CIRC", title_tl="LiveCodeBench-OTER", title_tr="CodeXGLUE-OTER")

In [None]:
df_lcb.drop(columns="predicted_perf").to_excel(f"livecodebench_rating.xlsx")
# df_lcb.drop(columns="predicted_perf")

In [None]:
df_c2t.drop(columns="predicted_perf").to_excel(f"code2text_python_rating.xlsx")
# df_c2t.drop(columns="predicted_perf")

In [None]:
merged_df = pd.merge(df_lcb, df_c2t, on="model", how="inner")
merged_df.columns = [x.replace("_x", "_lcb") if x[-1] == "x" else x.replace("_y", "_c2t") for x in merged_df.columns]
merged_df = merged_df.drop(columns=["task_name_lcb", "task_name_c2t", "params_c2t", "predicted_perf_lcb", "predicted_perf_c2t"]).rename(columns={"params_lcb": "params"})
merged_df.to_excel("all_results.xlsx")

In [None]:


import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe
from matplotlib.ticker import MultipleLocator

def plot_two_score_bars(
    df,
    model_col="model",
    score_cols=("distance_rank_lcb", "regression_rank_lcb"),
    titles=None,
    sort_by="mean",                     # "mean", score_cols[0], or score_cols[1]
    title="Model Scores",
    filename="scores_pretty.pdf",
):
    a_col, b_col = score_cols
    m2id = {df.iloc[id]["model"].split("/")[1]: id for id in range(len(df))}
    # ---- order rows (optional) ----
    if sort_by == "mean":
        order = (df[[a_col, b_col]].mean(axis=1)).argsort()[::-1]
    elif sort_by in score_cols:
        order = df[sort_by].argsort()[::-1]
    else:
        order = np.arange(len(df))
    d = df.iloc[order].reset_index(drop=True)
    d[model_col] = d[model_col].apply(lambda x: x.split("/")[1])
    models = d[model_col].astype(str).values
    A = d[a_col].astype(float).values
    B = d[b_col].astype(float).values

    # ---- layout ----
    x = np.arange(len(models))
    h = 0.38       
    fig, ax = plt.subplots(figsize=(10, 4))

    # ---- colors (kept) ----
    col_a = "#065C48"
    col_b = "#A7D500"

    # ---- bars (grouped, vertical) ----
    bar_a = ax.bar(x - h/2, A, width=h, color=col_a, edgecolor="#2b2b2b",
                   linewidth=0.6, label=titles[0])
    bar_b = ax.bar(x + h/2, B, width=h, color=col_b, edgecolor="#2b2b2b",
                   linewidth=0.6, label=titles[1], hatch="//")

    # ---- value labels (removed) ----
    # (no annotations on top of bars)

    # ---- axes, ticks, grid ----
    ax.set_xticks(x)
    # ax.set_xticklabels(["$\\text{M}_{" + str(m2id[m] + 1) + "}$" for m in models], ha="center", rotation=45, fontsize=13.5) 
    ax.set_xticklabels(["M" + str(m2id[m] + 1) for m in models], ha="center", rotation=45, fontsize=14.5) 
    # ax.set_xlabel("Model")
    ax.set_ylabel("Rate", fontsize=20)
    ax.set_yticklabels([i-1 for i in range(7)], fontsize=17)
    y_max = max(A.max(), B.max())
    ax.set_ylim(0, y_max * 1.10)
    ax.yaxis.set_major_locator(MultipleLocator(1)) 
    ax.grid(axis="y", linestyle="--", alpha=0.25)

    # clean frame
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

    # legend above, centered (kept)
    # ax.legend(frameon=False, ncol=2, loc="lower center",
    #           bbox_to_anchor=(0.5, 1.02), borderaxespad=0)
    ax.legend(fontsize=17)

    fig.tight_layout()
    fig.savefig(filename, bbox_inches="tight", dpi=400)


plot_two_score_bars(merged_df, model_col="model", score_cols=("distance_rank_lcb","regression_rank_lcb"),
                    titles=("LiveCodeBench-CIRC", "LiveCodeBench-OTER"), title="Model Scores", sort_by="mean", filename="lcb_scores_comparison.pdf")


In [None]:
["$\text{M}_{" + str(i+1) + "}$" for i in range(10)]

In [None]:
plot_two_score_bars(merged_df, model_col="model", score_cols=("distance_rank_lcb","distance_rank_c2t"),
                    titles=("LiveCodeBench-CIRC", "CodeXGLUE-CIRC"), title="Model Scores", sort_by="mean", filename="scores_distance_comparison.pdf")

In [None]:
plot_two_score_bars(merged_df, model_col="model", score_cols=("regression_rank_lcb","regression_rank_c2t"),
                    titles=("LiveCodeBench-OTER", "CodeXGLUE-OTER"), title="Model Scores", sort_by="mean", filename="scores_regression_comparison.pdf")

In [None]:
plot_two_score_bars(merged_df, model_col="model", score_cols=("distance_rank_c2t","regression_rank_c2t"),
                    titles=("CodeXGLUE-CIRC", "CodeXGLUE-OTER"), title="Model Scores", sort_by="mean", filename="c2t_scores_comparison.pdf")

### Until Here, the rest are some random experiments

# ----------------------------------------------------