In [1]:
import os, sys

dir2 = os.path.abspath("")
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)
os.chdir("../..")
os.environ['NUMEXPR_NUM_THREADS'] = '10'

In [None]:
from typing import List
from pathlib import Path
from itertools import combinations

from tqdm.auto import tqdm

import numpy as np
import pandas as pd

from scipy import mean
from scipy import stats

import matplotlib.pyplot as plt

from src.utils.aggregation import filter_data, aggregate_methods
from src.compare_methods import run_CD, run_dolan_more, bayes_scores
from src.compare_methods.votenrank import run_printtable_votenrank

%load_ext autoreload
%autoreload 2

In [10]:
PATH_TO_RES = os.path.join("results", "aggregation")
new_res = pd.read_csv(os.path.join(PATH_TO_RES, "metrics_ndcg_10.csv"), index_col=0)

# k = 10
# metric = "ndcg"

# metrics_df = new_res[(new_res["Metric"] == metric) & (new_res["k"] == k)].copy()
# metrics_df.drop(columns=["Metric", "k"], inplace=True)
metrics_df = new_res.reset_index()

In [22]:
plt.rcParams['font.family'] = 'DejaVu Sans'
fontsize = 14

color_map = {
    0: "#1f77b4",
    1: "#ff7f0e",
    2: "#2ca02c",
    3: "#d62728",
    4: "#9467bd",
    5: "#8c564b",
    6: "#e377c2",
    7: "#7f7f7f",
    8: "#bcbd22",
    9: "#17becf",
}
name_map = {
    "dm_value": "DM AUC",
    "cd_score": "Mean ranks",
    "gm_value": "Geom. mean",
    "mr_value": "MA",
    "hm_value": "Harm. mean",
    "Copeland_value": "Copeland",
    "Minimax_value": "Minimax",
    "dm_lbo_value": "DM LBO",
}
name2idx = {
    "dm_value": 0,
    "cd_score": 1,
    "gm_value": 2,
    "mr_value": 3,
    "hm_value": 4,
    "Copeland_value": 5,
    "Minimax_value": 6,
    "dm_lbo_value": 7,
}
name2marker = {
    "dm_value": 0,
    "cd_score": 1,
    "gm_value": 2,
    "mr_value": 3,
    "hm_value": 4,
    "Copeland_value": 5,
    "Minimax_value": 6,
}


In [19]:
def flatten(l):
    return [item for sublist in l for item in sublist]


def calculate_metrics(data, votenrank_list=None, drop_cd: bool = False) -> pd.DataFrame:
    results_DM = run_dolan_more(data, save_image=False)
    results_DM.rename(columns={"score": "dm_score", "ranks": "dm_value"}, inplace=True)
    if not drop_cd:
        results_CD = run_CD(data)
        results_CD.rename(
            columns={"score": "cd_score", "ranks": "cd_ranks"}, inplace=True
        )
        q = pd.merge(results_DM, results_CD, on=["Model_name"])
    else:
        q = results_DM

    results_new_DM = run_dolan_more(data, save_image=False, mode="leave_best_out")
    # results_new_DM.rename(columns={"score": "dm_lbo_score", "ranks": "dm_lbo_value"}, inplace=True)
    q["dm_lbo_value"] = q["Model_name"].map(results_new_DM)

    # q = pd.merge(q, results_new_DM, on=['Model_name'])

    q.set_index("Model_name", inplace=True)
    metrics_MR = data.copy()

    metrics_MR["gm_score"] = metrics_MR.groupby("Method")["Value"].transform(
        stats.gmean
    )
    metrics_MR["mr_score"] = metrics_MR.groupby("Method")["Value"].transform(mean)
    metrics_MR["hm_score"] = metrics_MR.groupby("Method")["Value"].transform(
        stats.hmean
    )

    metrics_MR = metrics_MR.drop(columns=["Dataset", "Value"]).drop_duplicates()

    metrics_MR["gm_value"] = metrics_MR["gm_score"].rank(
        method="average", ascending=False
    )
    metrics_MR["mr_value"] = metrics_MR["mr_score"].rank(
        method="average", ascending=False
    )
    metrics_MR["hm_value"] = metrics_MR["hm_score"].rank(
        method="average", ascending=False
    )
    metrics_MR.set_index("Method", inplace=True)
    q = pd.merge(q, metrics_MR, right_index=True, left_index=True)

    if votenrank_list is not None:
        metrics_votenrank = run_printtable_votenrank(data)
        for x in votenrank_list:
            metrics_method = (
                metrics_votenrank[x]
                .reset_index()
                .rename(columns={"Ranking position": f"{x}_value"})
            )
            metrics_method["Model_name"] = metrics_method.apply(
                lambda y: str(y[x]).split(": ")[1].split("\n")[0], axis=1
            )
            metrics_method[x + "_score"] = metrics_method.apply(
                lambda y: float(str(y[x]).split(": ")[0]), axis=1
            )
            metrics_method.set_index("Model_name", inplace=True)
            # metrics_method.drop(columns=[x], inplace=True)
            q = pd.merge(q, metrics_method, right_index=True, left_index=True)
    return q

# Plot drop data case

In [None]:
datasets_list = metrics_df["Dataset"].unique()
drop_list = [list(combinations(datasets_list, i)) for i in range(1, 4)]
drop_list = flatten(drop_list)

sampling = 100
n_data = metrics_df["Dataset"].nunique()
votenrank_list = ["Copeland", "Minimax"]
current_res = calculate_metrics(metrics_df, votenrank_list=votenrank_list)

ranks_columns = [x for x in current_res.columns if "value" in x or x == "cd_score"]
name2idx = {x: i for i, x in enumerate(ranks_columns)}

# methods_list = list(metrics_df['Method'].unique())
res_sp_mean = {x: [] for x in ranks_columns}
res_sp_std = {x: [] for x in ranks_columns}

sample_size = []

# fix seed
np.random.seed(0)

for i in tqdm(range(1, n_data)):  # len(datasets_list) - 1
    sp_df = pd.DataFrame()
    drop_list = list(combinations(datasets_list, i))
    # print(drop_list)
    if len(drop_list) < sampling:
        drop_case = drop_list
        sample_size.append(len(drop_list))
    else:
        data_idx = np.random.choice(
            np.arange(len(drop_list)), size=sampling, replace=False
        )
        drop_case = [drop_list[x] for x in data_idx]
        sample_size.append(sampling)

    for j, drop_group in enumerate(drop_case):
        tmp_df = metrics_df[~metrics_df["Dataset"].isin(drop_group)].copy()
        output_df = calculate_metrics(tmp_df, votenrank_list=votenrank_list)
        # output_df.to_csv(save_path / f"{'_'.join(drop_group)}.csv")

        for x in ranks_columns:
            # ken_df.loc[j, x] = stats.kendalltau(current_res[x], output_df[x])[0]
            sp_df.loc[j, x] = stats.spearmanr(current_res[x], output_df[x])[0]
    for x in ranks_columns:
        res_sp_mean[x] += [sp_df.mean().to_dict()[x]]
        res_sp_std[x] += [sp_df.std().to_dict()[x]]

In [23]:
# Plotting
plt.figure(figsize=(7, 5), dpi=200)

for r_method in res_sp_mean:
    plt.plot(
        n_data - np.arange(1, n_data),
        res_sp_mean[r_method],
        label=name_map[r_method],
        # marker='.',
        color=color_map[name2idx[r_method]],
    )
    # some confidence interval
    ci = 1.96 * np.array(res_sp_std[r_method]) / np.sqrt(sampling)
    plt.fill_between(
        n_data - np.arange(1, n_data),
        np.array(res_sp_mean[r_method]) - ci,
        np.array(res_sp_mean[r_method]) + ci,
        color=color_map[name2idx[r_method]],
        alpha=0.1,
    )

plt.legend(
    loc="lower right",
    fontsize=fontsize - 2,
)
plt.xticks(
    fontsize=fontsize,
)
plt.yticks(
    fontsize=fontsize,
)
plt.xlabel(
    r"Number of datasets",
    fontsize=fontsize,
)
plt.ylabel(
    r"Spearman's $\rho$",
    fontsize=fontsize,
)
plt.show()
plt.savefig(os.path.join(PATH_TO_RES, "sp_drop_data_with_ci_test.pdf"))

# Hyperparam tuning for DM

In [24]:
beta_list = np.linspace(1.5, 10, 40)

dm_corr = []
dm_new_corr = []

results_DM = run_dolan_more(metrics_df, save_image=False)
results_DM_new = run_dolan_more(
    metrics_df,
    save_image=False,
    mode="leave_best_out",
)
results_DM["dm_lbo_value"] = results_DM["Model_name"].map(results_DM_new)


results_DM.set_index("Model_name", inplace=True)

for beta in tqdm(beta_list):
    results_DM_os = run_dolan_more(
        metrics_df,
        save_image=False,
        max_beta=beta,
    )
    results_DM_ns = run_dolan_more(
        metrics_df,
        save_image=False,
        max_beta=beta,
        mode="leave_best_out",
    )
    results_DM_os["dm_lbo_value"] = results_DM_os["Model_name"].map(results_DM_ns)
    results_DM_os.set_index("Model_name", inplace=True)

    dm_corr.append(stats.spearmanr(results_DM_os["ranks"], results_DM["ranks"])[0])
    dm_new_corr.append(
        stats.spearmanr(results_DM_os["dm_lbo_value"], results_DM["dm_lbo_value"])[0]
    )


100%|██████████| 40/40 [00:09<00:00,  4.36it/s]


In [25]:
# Plotting
beta_default = 3.0

plt.figure(figsize=(7, 6), dpi=200)
plt.plot(
    beta_list,
    dm_corr,
    label=name_map["dm_value"],
    # marker='.',
    color=color_map[name2idx["dm_value"]],
)
plt.plot(
    beta_list,
    dm_new_corr,
    label=name_map["dm_lbo_value"],
    # marker='.',
    color=color_map[name2idx["dm_lbo_value"]],
)
plt.axvline(
    x=beta_default,
    color=color_map[4],
    linestyle="--",
    label=r"Default $\hat{\beta}$ DM",
)

plt.legend(
    loc="lower right",
    fontsize=fontsize,
)
plt.xticks(
    fontsize=fontsize,
)
plt.yticks(
    fontsize=fontsize,
)
plt.xlabel(
    r"$\hat{\beta}$",
    fontsize=fontsize,
)
plt.ylabel(
    r"Spearman's $\tau$",
    fontsize=fontsize,
)
plt.show()
plt.savefig(os.path.join(PATH_TO_RES, "test_hp_changing_test.pdf"))

# Drop methods case

In [None]:
drop_cd = False
n_data = 8

current_res = calculate_metrics(
    metrics_df, votenrank_list=["Minimax", "Copeland"], drop_cd=drop_cd
)  #
current_res.index.names = ["Model_name"]
current_res.reset_index(inplace=True)
ranks_columns = [x for x in current_res.columns if "value" in x or x == "cd_score"]
name2idx = {x: i for i, x in enumerate(ranks_columns)}
res_ken_mean = {x: [] for x in ranks_columns}
res_ken_std = {x: [] for x in ranks_columns}

sampling = []

for i in tqdm(range(1, n_data)):
    drop_list = combinations(metrics_df["Method"].unique(), i)
    tmp_ken_mean = {x: [] for x in ranks_columns}
    k = 0
    for x in drop_list:
        tmp_data = metrics_df[~metrics_df["Method"].isin(x)].copy()
        tmp_res = calculate_metrics(
            tmp_data, votenrank_list=["Minimax", "Copeland"], drop_cd=drop_cd
        )
        tmp_current_res = current_res[~current_res["Model_name"].isin(x)]
        for r_method in ranks_columns:
            tmp_ken_mean[r_method].append(
                stats.spearmanr(tmp_res[r_method], tmp_current_res[r_method])[0]
            )
        k += 1
    for r_method in ranks_columns:
        res_ken_mean[r_method].append(np.mean(tmp_ken_mean[r_method]))
        res_ken_std[r_method].append(np.std(tmp_ken_mean[r_method]))
    sampling.append(k)


In [27]:
# Plotting

plt.figure(figsize=(7, 5), dpi=200)
for r_method in res_ken_mean:
    plt.plot(
        n_data + 2 - np.arange(1, n_data),
        res_ken_mean[r_method],
        label=name_map[r_method],
        # marker='.',
        color=color_map[name2idx[r_method]],
    )
    # some confidence interval
    ci = 1.96 * np.array(res_ken_std[r_method]) / np.sqrt(np.array(sampling))
    plt.fill_between(
        n_data + 2 - np.arange(1, n_data),
        np.array(res_ken_mean[r_method]) - ci,
        np.array(res_ken_mean[r_method]) + ci,
        color=color_map[name2idx[r_method]],
        alpha=0.1,
    )

plt.legend(
    loc="lower right",
    fontsize=fontsize,
)
plt.xticks(
    fontsize=fontsize,
)
plt.yticks(
    fontsize=fontsize,
)
plt.xlabel(
    r"Number of methods",
    fontsize=fontsize,
)
plt.ylabel(
    r"Spearman's $\rho$",
    fontsize=fontsize,
)
plt.show()
plt.savefig(os.path.join(PATH_TO_RES, "test_sp_drop_methods_ci_wo_cd_test.pdf"))

# Adding better method case

In [None]:
def increase_column(value, alpha):
    return min(value * alpha, 1.0)


alpha_space = np.linspace(1.0, 4.0, 40)

drop_cd = False  # True

current_res = calculate_metrics(
    metrics_df, votenrank_list=["Minimax", "Copeland"], drop_cd=drop_cd
).reset_index()
ranks_columns = [x for x in current_res.columns if "value" in x or x == "cd_score"]
name2idx = {x: i for i, x in enumerate(ranks_columns)}
res_ken_mean = {x: [] for x in ranks_columns}
res_ken_std = {x: [] for x in ranks_columns}

sampling = []

new_method = "Method A"
best_res = {"Value": dict(metrics_df.groupby(by="Dataset")["Value"].max())}

for alpha in tqdm(alpha_space):
    best_res_df = pd.DataFrame(best_res)

    best_res_df.index.names = ["Dataset"]
    best_res_df["Method"] = new_method
    incr_value = lambda x: increase_column(x, alpha)
    best_res_df["Value"] = best_res_df["Value"].apply(incr_value)

    best_res_df.reset_index(inplace=True)
    tmp_df = pd.concat([metrics_df, best_res_df], ignore_index=True)

    tmp_res = calculate_metrics(
        tmp_df, votenrank_list=["Minimax", "Copeland"], drop_cd=drop_cd
    )
    tmp_res.drop(index=new_method, inplace=True)

    for r_method in ranks_columns:
        res_ken_mean[r_method].append(
            stats.spearmanr(tmp_res[r_method], current_res[r_method])[0]
        )


In [29]:
# Plotting
plt.figure(figsize=(7, 5), dpi=200)

for r_method in res_ken_mean:
    plt.plot(
        alpha_space,
        res_ken_mean[r_method],
        label=name_map[r_method],
        # marker='.',
        color=color_map[name2idx[r_method]],
    )

plt.legend(
    loc="lower right",
    fontsize=fontsize,
)
plt.xticks(
    fontsize=fontsize,
)
plt.yticks(
    fontsize=fontsize,
)
plt.xlabel(
    r"$\alpha$",
    fontsize=fontsize,
)
plt.ylabel(
    r"Spearman's $\tau$",
    fontsize=fontsize,
)
plt.show()
plt.savefig(os.path.join(PATH_TO_RES, "add_better_method_case_test.pdf"))

# Add new similar method

In [30]:
def increase_column(value, alpha):
    return min(value * alpha, 1.0)

In [31]:
alpha_space = np.linspace(1.01, 1.15, 40)

drop_cd = False  # True
methods_list = metrics_df["Method"].unique()

current_res = calculate_metrics(
    metrics_df, votenrank_list=["Minimax", "Copeland"], drop_cd=drop_cd
).reset_index()
ranks_columns = [x for x in current_res.columns if "value" in x or "cd_score" == x]
name2idx = {x: i for i, x in enumerate(ranks_columns)}
res_sp_mean = {x: [] for x in ranks_columns}
res_sp_std = {x: [] for x in ranks_columns}

sampling = []


best_res = {"Value": dict(metrics_df.groupby(by="Dataset")["Value"].max())}

for alpha in tqdm(alpha_space):
    tmp_sp_res = {x: [] for x in ranks_columns}
    sampling.append(len(methods_list))
    for method in methods_list:
        new_method = method + "*"
        new_df = {
            "Value": dict(
                metrics_df[metrics_df["Method"] == method]
                .groupby(by="Dataset")["Value"]
                .max()
            )
        }
        best_res_df = pd.DataFrame(new_df)
        best_res_df.index.names = ["Dataset"]
        best_res_df["Method"] = new_method
        incr_value = lambda x: increase_column(x, alpha)
        best_res_df["Value"] = best_res_df["Value"].apply(incr_value)
        best_res_df.reset_index(inplace=True)
        tmp_df = pd.concat([metrics_df, best_res_df], ignore_index=True)

        tmp_res = calculate_metrics(
            tmp_df, votenrank_list=["Minimax", "Copeland"], drop_cd=drop_cd
        )
        tmp_res.drop(index=new_method, inplace=True)

        for r_method in ranks_columns:
            tmp_sp_res[r_method].append(
                stats.spearmanr(tmp_res[r_method], current_res[r_method])[0]
            )
    for r_method in ranks_columns:
        res_sp_mean[r_method].append(np.mean(tmp_sp_res[r_method]))
        res_sp_std[r_method].append(np.std(tmp_sp_res[r_method]))

100%|██████████| 40/40 [07:39<00:00, 11.48s/it]


In [32]:
# Plotting

plt.figure(figsize=(9, 5), dpi=200)
for r_method in res_sp_mean:
    if r_method != "Minimax_value":
        plt.plot(
            alpha_space,
            res_sp_mean[r_method],
            label=name_map[r_method],
            # marker='.',
            color=color_map[name2idx[r_method]],
        )
        # some confidence interval
        ci = 1.96 * np.array(res_sp_std[r_method]) / np.sqrt(np.array(sampling))
        plt.fill_between(
            alpha_space,
            np.array(res_sp_mean[r_method]) - ci,
            np.array(res_sp_mean[r_method]) + ci,
            color=color_map[name2idx[r_method]],
            alpha=0.1,
        )

plt.legend(
    loc="lower left",
    fontsize=fontsize,
)
plt.xticks(
    fontsize=fontsize,
)
plt.yticks(
    fontsize=fontsize,
)
plt.xlabel(
    r"$\alpha$",
    fontsize=fontsize,
)
plt.ylabel(
    r"Spearman's $\rho$",
    fontsize=fontsize,
)
plt.show()
plt.savefig(os.path.join(PATH_TO_RES, "test_sp_add_sim_method_ci.pdf"))

In [33]:
alpha_space = np.linspace(0.85, 1.15, 80)

drop_cd = False
methods_list = metrics_df["Method"].unique()

current_res = calculate_metrics(
    metrics_df, votenrank_list=["Minimax", "Copeland"], drop_cd=drop_cd
).reset_index()
ranks_columns = [x for x in current_res.columns if "value" in x or "cd_score" == x]
name2idx = {x: i for i, x in enumerate(ranks_columns)}
res_sp_mean = {x: [] for x in ranks_columns}
res_sp_std = {x: [] for x in ranks_columns}

sampling = []


best_res = {"Value": dict(metrics_df.groupby(by="Dataset")["Value"].max())}

for alpha in tqdm(alpha_space):
    if alpha == 1.0:
        continue
    tmp_sp_res = {x: [] for x in ranks_columns}
    sampling.append(len(methods_list))
    for method in methods_list:
        new_method = method + "*"
        new_df = {
            "Value": dict(
                metrics_df[metrics_df["Method"] == method]
                .groupby(by="Dataset")["Value"]
                .max()
            )
        }
        best_res_df = pd.DataFrame(new_df)
        best_res_df.index.names = ["Dataset"]
        best_res_df["Method"] = new_method
        incr_value = lambda x: increase_column(x, alpha)
        best_res_df["Value"] = best_res_df["Value"].apply(incr_value)
        best_res_df.reset_index(inplace=True)
        tmp_df = pd.concat([metrics_df, best_res_df], ignore_index=True)

        tmp_res = calculate_metrics(
            tmp_df, votenrank_list=["Minimax", "Copeland"], drop_cd=drop_cd
        )
        tmp_res.drop(index=new_method, inplace=True)

        for r_method in ranks_columns:
            tmp_sp_res[r_method].append(
                stats.spearmanr(tmp_res[r_method], current_res[r_method])[0]
            )
    for r_method in ranks_columns:
        res_sp_mean[r_method].append(np.mean(tmp_sp_res[r_method]))
        res_sp_std[r_method].append(np.std(tmp_sp_res[r_method]))

100%|██████████| 80/80 [15:15<00:00, 11.45s/it]


In [34]:
# Plotting

plt.figure(figsize=(9, 5), dpi=200)
for r_method in res_sp_mean:
    if r_method != "Minimax_value":
        plt.plot(
            alpha_space,
            res_sp_mean[r_method],
            label=name_map[r_method],
            # marker='.',
            color=color_map[name2idx[r_method]],
        )
        # some confidence interval
        ci = 1.96 * np.array(res_sp_std[r_method]) / np.sqrt(np.array(sampling))
        plt.fill_between(
            alpha_space,
            np.array(res_sp_mean[r_method]) - ci,
            np.array(res_sp_mean[r_method]) + ci,
            color=color_map[name2idx[r_method]],
            alpha=0.1,
        )

plt.legend(
    loc="lower left",
    fontsize=fontsize,
)
plt.xticks(
    fontsize=fontsize,
)
plt.yticks(
    fontsize=fontsize,
)
plt.xlabel(
    r"$\alpha$",
    fontsize=fontsize,
)
plt.ylabel(
    r"Spearman's $\rho$",
    fontsize=fontsize,
)
plt.show()
plt.savefig(os.path.join(PATH_TO_RES, "test_sp_add_sim_method_ci_test.pdf"))