## Rank Correlation
This notebook computes and visualizes rank correlation scores. Cross-correlation scores are computed between different studies. To compute self-correlation scores, we use a repeated set of experiments.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB.tb_highlight = "bg:#3e0054"

In [None]:
from syftr.optuna_helper import get_study_names

SUCCESS_RATE = 0.9
ONLY_SEEDING = True
PARETO_DATASET = "financebench_hf"
GROUP_BY = "user_attrs_flow"
RANK_BY = "values_0"

PARETO_STUDY_NAME = "rank0--rag-and-agents--financebench_hf"

METHOD_NAMES = {
    "kendall": "Kendall's tau",
    "spearman": "Spearman's rho",
    "pearson": "Pearson's r",
}

SYMBOLS = {
    "kendall": "τ",
    "spearman": "ρ",
    "pearson": "r",
}

INCLUDE_REGEX_1 = ["rank1--rag-and-agents.*"]
EXCLUDE_REGEX_1 = []

INCLUDE_REGEX_2 = ["rank2--rag-and-agents.*"]
EXCLUDE_REGEX_2 = []

study_names_1 = get_study_names(
    include_regex=INCLUDE_REGEX_1,
    exclude_regex=EXCLUDE_REGEX_1,
)

study_names_2 = get_study_names(
    include_regex=INCLUDE_REGEX_2,
    exclude_regex=EXCLUDE_REGEX_2,
)

In [None]:
import pandas as pd

from syftr.analytics import get_rank_correlation


def evaluate(method, df1, df2):

    df_results = pd.DataFrame()
    df_p_value = pd.DataFrame()
    df_support = pd.DataFrame()

    if not df1.empty:

        datasets = df1["user_attrs_dataset"].unique()
        df_results = pd.DataFrame(columns=datasets, index=datasets, dtype=float)
        df_p_value = pd.DataFrame(columns=datasets, index=datasets, dtype=float)
        df_support = pd.DataFrame(columns=datasets, index=datasets, dtype=int)

        for i, dataset1 in enumerate(datasets):
            for j, dataset2 in enumerate(datasets):
                if i >= j:
                    continue
                df_tmp_a: pd.DataFrame = df1[df1["user_attrs_dataset"] == dataset1]
                df_tmp_b: pd.DataFrame = df1[df1["user_attrs_dataset"] == dataset2]

                correlation = get_rank_correlation(df_tmp_a, df_tmp_b)

                df_results.at[dataset1, dataset2] = correlation[method]["correlation"]
                df_results.at[dataset2, dataset1] = correlation[method]["correlation"]
                df_p_value.at[dataset1, dataset2] = correlation[method]["p_value"]
                df_p_value.at[dataset2, dataset1] = correlation[method]["p_value"]
                df_support.at[dataset1, dataset2] = correlation[method]["support"]
                df_support.at[dataset2, dataset1] = correlation[method]["support"]

        if not df2.empty:
            for dataset in datasets:

                df_tmp_a: pd.DataFrame = df1[df1["user_attrs_dataset"] == dataset]
                df_tmp_b: pd.DataFrame = df2[df2["user_attrs_dataset"] == dataset]

                if df_tmp_a.empty or df_tmp_b.empty:
                    continue

                correlation = get_rank_correlation(df_tmp_a, df_tmp_b)
                
                df_results.at[dataset, dataset] = correlation[method]["correlation"]
                df_p_value.at[dataset, dataset] = correlation[method]["p_value"]
                df_support.at[dataset, dataset] = correlation[method]["support"]

    return df_results, df_p_value, df_support

In [None]:
import matplotlib.pyplot as plt
from syftr.configuration import cfg
from syftr.optuna_helper import get_completed_trials

df1 = get_completed_trials(study_names_1)
df2 = get_completed_trials(study_names_2)

if ONLY_SEEDING:
    print("Filtering for Pareto flows")
    
    df1 = df1[df1["user_attrs_is_seeding"] == True]
    df2 = df2[df2["user_attrs_is_seeding"] == True]

print(f"Study 1: {len(study_names_1)} studies, {len(df1)} trials")
print(f"Study 2: {len(study_names_2)} studies, {len(df2)} trials")

print("The total number of trials is:", len(df1) + len(df2))

datasets = df1["user_attrs_dataset"].unique()

for method in METHOD_NAMES.keys():

    df_results, df_p_value, df_support = evaluate(method, df1, df2)    

    if not df_results.empty:
        df_results.style.background_gradient(cmap="coolwarm", axis=None)
        plt.figure(figsize=(2*len(datasets), len(datasets)))
        plt.imshow(df_results, cmap="coolwarm", interpolation="nearest", vmin=-1, vmax=1, alpha=0.8)
        plt.colorbar()
        plt.xticks(range(len(datasets)), datasets, rotation=90)
        plt.yticks(range(len(datasets)), datasets)
        plt.title(f"{METHOD_NAMES[method]} for Flow Accuracy")

        total_support = 0
        for i in range(len(datasets)):
            for j in range(len(datasets)):
                if i > j:
                    continue
                value = df_results.iloc[i, j]
                if not pd.isna(value):
                    support = int(df_support.iloc[i, j])
                    p_value = df_p_value.iloc[i, j]
                    total_support += support
                    plt.text(j-0.4, i, f"{SYMBOLS[method]}={value:.2f}\np={p_value:.2f}\ns={support}", ha="left", va="center", color="black")
                    if i < j:
                        plt.text(i-0.4, j, f"{SYMBOLS[method]}={value:.2f}\np={p_value:.2f}\ns={support}", ha="left", va="center", color="black")

        print("Total support:", total_support)
        plt.tight_layout()
        plt.savefig(cfg.paths.results_dir / f"rank_correlation_{method}.pdf", dpi=300)
        plt.show()
    else:
        print("Not enough data to plot")

In [None]:
import json
from syftr.configuration import UNSUPPORTED_PARAMS
from syftr.optuna_helper import get_flows_from_trials, get_pareto_df


df_pareto = get_pareto_df(PARETO_STUDY_NAME, 0.9)
df_pareto = df_pareto.sort_values(by='values_0', ascending=False)
df_pareto = df_pareto.reset_index(drop=True)

_pareto_flows = get_flows_from_trials(df_pareto)
pareto_flows = []
for flow in _pareto_flows:
    f = flow.copy()
    for k, v in flow.items():
        if k in UNSUPPORTED_PARAMS:
            del f[k]
    pareto_flows.append(f)

for study_name in study_names_1:
    df1["flow"] = get_flows_from_trials(df1) 

for study_name in study_names_2:
    df2["flow"] = get_flows_from_trials(df2)

df1_pareto = df1[df1["flow"].isin(pareto_flows)].copy()
print(f"Pareto flows in study 1: {len(df1_pareto)}")

df2_pareto = df2[df2["flow"].isin(pareto_flows)].copy()
print(f"Pareto flows in study 2: {len(df2_pareto)}")

df1_pareto["rank"] = df1_pareto.groupby("user_attrs_dataset")["values_0"].rank(method="dense", ascending=False)
df2_pareto["rank"] = df2_pareto.groupby("user_attrs_dataset")["values_0"].rank(method="dense", ascending=False)

df1_pareto["relative rank"] = df1_pareto["rank"] - 1
df2_pareto["relative rank"] = df2_pareto["rank"] - 1

df1_pareto["relative rank"] /= df1_pareto.groupby("user_attrs_dataset")["relative rank"].transform("max")
df2_pareto["relative rank"] /= df2_pareto.groupby("user_attrs_dataset")["relative rank"].transform("max")

df1_pareto["relative rank"] *= len(df_pareto) - 1
df2_pareto["relative rank"] *= len(df_pareto) - 1

df1_pareto["relative rank"] += 1
df2_pareto["relative rank"] += 1

In [None]:
plt.figure(figsize=(10, 6))
has_pareto_dataset = False
has_non_pareto_dataset = False
for i, flow in enumerate(pareto_flows):
    try:
        flow_str = json.dumps(flow, sort_keys=True)
        ranks = []
        ranks.extend(df1_pareto[df1_pareto["flow"] == flow]["relative rank"].values)
        ranks.extend(df2_pareto[df2_pareto["flow"] == flow]["relative rank"].values)
        if not ranks:
            continue
        parts = plt.violinplot(ranks, positions=[i+1], widths=0.5, showextrema=False, showmedians=False)
        for body in parts["bodies"]:
            body.set_facecolor("0.7")
            body.set_edgecolor("0.7")
            body.set_linewidth(0.5)
            body.set_alpha(0.5)

        x = i+1
        for dataset in datasets:
            has_label = False
            if dataset == PARETO_DATASET:
                label = dataset if not has_pareto_dataset else None
                has_pareto_dataset = True
                color = "red"
                marker = "o"
            else:
                continue
                # label = f"non-{PARETO_DATASET}" if not has_non_pareto_dataset else None
                # has_non_pareto_dataset = True
                # color = "blue"
                # marker = "x"
            df_tmp = df1_pareto[df1_pareto["user_attrs_dataset"] == dataset].copy()
            if not df_tmp.empty:
                has_label = True
                y = df_tmp[df_tmp["flow"] == flow]["relative rank"]
                plt.scatter([x] * len(y), y, alpha=0.4, color=color, label=label, marker=marker)
            df_tmp = df2_pareto[df2_pareto["user_attrs_dataset"] == dataset].copy()
            if not df_tmp.empty:
                if has_label:
                    label = None
                y = df_tmp[df_tmp["flow"] == flow]["relative rank"]
                plt.scatter([x] * len(y), y, alpha=0.4, color=color, label=label, marker=marker)
    except:
        print("No data for flow:", flow_str)

plt.grid(axis="y", linestyle="--", alpha=0.5)
plt.xticks(range(1, len(pareto_flows) + 1))
plt.yticks(range(1, len(pareto_flows) + 1))
plt.xlabel("Flows of Given Pareto-frontier Sorted by Accuracy Rank")
plt.ylabel("Rank of Flows in other Studies")
plt.title(f"Rank Distribution of Pareto-Flows")
plt.legend(loc="upper left", fontsize=8)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
for i, flow in enumerate(pareto_flows):
    try:
        flow_str = json.dumps(flow, sort_keys=True)
        ranks = []
        ranks.extend(df1_pareto[df1_pareto["flow"] == flow]["relative rank"].values)
        ranks.extend(df2_pareto[df2_pareto["flow"] == flow]["relative rank"].values)
        if not ranks:
            continue
        plt.boxplot(ranks, positions=[i+1], widths=0.5)
    except:
        print("No data for flow:", flow_str)

plt.grid(axis="y", linestyle="--", alpha=0.5)
plt.xticks(range(1, len(pareto_flows) + 1))
plt.yticks(range(1, len(pareto_flows) + 1))
plt.xlabel("Flows of Given Pareto-frontier Sorted by Accuracy Rank")
plt.ylabel("Rank of Flows in other Studies")
plt.title(f"Rank Distribution of Pareto-Flows")
plt.tight_layout()
plt.show()