In [1]:
import glob
from datetime import datetime

import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

import warnings 
warnings.filterwarnings('ignore')

In [2]:
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 600

path = "nonlocal"
nonlocal_measure_pat = "IAA|div|HD|IWO|II-F|AI-F" #without IBO

x_axis_label = "# additional relevant items"

In [3]:
def clean_measure_name(df_col):
    return df_col\
                .str.replace("true_","")\
                .str.replace("IFD_div$", "IFD_div_ori", regex=True)\
                .str.replace("IFD_mul$", "IFD_mul_ori", regex=True)\
                .str.replace("HD","HD_ori")\
                .str.replace("_div", "$_\\div$", regex=False)

def separate_measure_version(df_col):
    return df_col.str.rsplit("_", n=1, expand=True)

In [4]:
#load the original score (without additional relevant item) 
combined_base = pd.read_csv("combined_base/csv_combined_result_2025-01-04_015916.csv", index_col=0)
combined_base = combined_base.query("reranking=='-'")
combined_base.drop(columns=["BPR", "ItemKNN", "MultiVAE", "reranking"], inplace=True)
combined_base.dropna(inplace=True)

nonlocal_combined_base = combined_base[combined_base.measures.str.contains(nonlocal_measure_pat)]
nonlocal_combined_base["measures"] = clean_measure_name(nonlocal_combined_base["measures"])
nonlocal_combined_base[["measures","version"]] = separate_measure_version(nonlocal_combined_base["measures"])

nonlocal_combined_base.rename(columns={"measures":"measure", "NCL":"score"}, inplace=True)

nonlocal_combined_base.reset_index(drop=True)

nonlocal_combined_base[x_axis_label] = 0

df_all = nonlocal_combined_base.copy()
df_all

Unnamed: 0,dataset,measure,score,version,# additional relevant items
28,Lastfm,IWO,0.758481,our,0
30,Lastfm,IAA,0.003691,ori,0
32,Lastfm,IAA,0.748816,our,0
34,Lastfm,IFD$_\div$,0.075941,ori,0
36,Lastfm,IFD$_\div$,0.464325,our,0
42,Lastfm,HD,0.091756,ori,0
46,Lastfm,II-F,0.001333,ori,0
48,Lastfm,II-F,0.738996,our,0
50,Lastfm,AI-F,1.7e-05,ori,0
80,Amazon-lb,IWO,0.961722,our,0


In [5]:
# load nonlocal result

def load_result(exp_type):
    list_files = glob.glob(f"{path}/*.pickle")
    if exp_type == "front":
        list_files = [f for f in list_files if "back" not in f]
    elif exp_type == "back":
        list_files = [f for f in list_files if exp_type in f]
    
    
    df_all = pd.DataFrame()

    for file in list_files:

        num_missing = file\
                            .rstrip(".pickle")\
                            .rsplit("_", maxsplit=1)[1]
        
        dataset = file\
                    .replace("back_","")\
                    .split("_", maxsplit=2)[1]

        df_for_file = pd.DataFrame()
        
        result_dict = pd.read_pickle(file)
        cols = pd.Series(result_dict)

        df_for_file["measure"] = cols.index
        df_for_file["score"] = cols.values
        df_for_file[x_axis_label] = num_missing
        df_for_file["dataset"] = dataset

        df_for_file["measure"] = clean_measure_name(df_for_file["measure"])
        df_for_file[["measure","version"]] = separate_measure_version(df_for_file["measure"])

        df_for_file["version"] = df_for_file["version"].str.strip("@10")

        df_for_file = df_for_file[~df_for_file.version.str.contains("att|cut")]

        df_all = pd.concat([df_all, df_for_file], ignore_index=True)

    df_all["measure"] = df_all.measure[df_all.measure.str.contains(nonlocal_measure_pat)]
    df_all.dropna(inplace=True)
    df_all.reset_index(drop=True, inplace=True)
 
    return df_all

def combine_results(df_all, df_result):
    df_all = pd.concat([df_all, df_result], ignore_index=True)
    df_all = df_all.sort_values("version", kind="stable")
    return df_all

non_local_front = load_result("front")
non_local_back = load_result("back")

non_local_front = combine_results(df_all, non_local_front)
non_local_back =  combine_results(df_all, non_local_back)



non_local_front["strategy"] = "top"
non_local_back["strategy"] = "bottom"

df_all_non_local = pd.concat([non_local_front,non_local_back], ignore_index=True)

In [6]:
df_all_non_local["measure"] = df_all_non_local["measure"] + "$_{" + df_all_non_local["version"] + "}$"

In [7]:
selected_index = ['IWO$_{our}$', 'IAA$_{ori}$', 'IAA$_{our}$', 'IFD$_\\div$$_{ori}$', 'IFD$_\\div$$_{our}$', 'HD$_{ori}$', 
                'II-F$_{ori}$', 'II-F$_{our}$', 'AI-F$_{ori}$']

In [8]:

df_all_non_local = df_all_non_local.set_index(["measure","version", x_axis_label,"strategy","dataset"])
df_all_non_local = df_all_non_local.reindex(selected_index, level=0)
df_all_non_local.reset_index(inplace=True)

In [None]:
import matplotlib.ticker as ticker


markers = [11,10]
plotting_kws = dict(x=x_axis_label, y="score", hue="measure", kind="line", row="measure", col="dataset", style="strategy", 
                    palette="colorblind",
                    markers=markers, dashes=True, aspect=1.05, height=1.025)
facet_kws = {'sharey': "row", 'sharex': True,  "margin_titles":True, "legend_out": True}
grid = sns.relplot(df_all_non_local, facet_kws=facet_kws, **plotting_kws)
grid.set_titles(col_template = '{col_name}', row_template = '$\\downarrow${row_name}',size=7)

for ax in grid.axes.flatten():
    ax.ticklabel_format(axis='y', style='sci', scilimits=(-1.5,0), useMathText=True) 
    ax.yaxis.get_offset_text().set_fontsize(5.5)
    ax.xaxis.label.set_size(fontsize=5.5)
    ax.tick_params(axis='y', which='major', labelsize=6, rotation=0)
    ax.tick_params(axis='x', which='major', labelsize=6, rotation=0)
    ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=4))
    for _, spine in ax.spines.items():
        spine.set_visible(True) 

margin_names = list(grid._margin_titles_texts)
for ax, margin in zip(grid.axes, grid._margin_titles_texts):
    ax[0].set_ylabel(margin.get_text())
    ax[0].yaxis.label.set_size(fontsize=7)

grid.set_titles(col_template = '{col_name}', row_template = "",size=7)
handles = grid._legend.legendHandles

sns.move_legend(grid, handles=handles[-3:], loc="upper center", ncols=3, bbox_to_anchor=(0.525, 1.015), frameon=True,fontsize=7, title=None, markerscale=0.8)

grid.fig.tight_layout(w_pad=0.05, h_pad=0.12)

now = datetime.now()
time = str(now.strftime("%Y-%m-%d_%H%M%S"))
grid.savefig(f"{path}/nonlocalisation_{time}.pdf",bbox_inches="tight")   

# Check values

In [None]:
df_all_non_local[df_all_non_local.measure.str.contains("II-F") * df_all_non_local.measure.str.contains("ori")].sort_values( ["dataset","# missing relevance label"]).query("strategy=='front'")

In [None]:
list_pat = ["II-F.*our", "II-F.*ori" ,"IAA.*our", "IAA.*ori"]
for pat in list_pat:

    max_min_diff = df_all_non_local[df_all_non_local.measure.str.contains(pat)]\
                                                        .sort_values( ["dataset","# missing relevance label"])\
                                                        .query("strategy=='front'")\
                                                        .groupby("dataset")["score"]\
                                                        .agg(["max", "min"])
    max_min_diff = max_min_diff["max"] - max_min_diff["min"]
    print(pat)
    display(max_min_diff)