In [1]:
#Import data from combined_result

import os
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import warnings
import re
import matplotlib

sns.set_style("darkgrid")
warnings.simplefilter("ignore")

newest_file = #insert newest file
list_dataset = [
                "Lastfm",
                "Amazon-lb", 
                "QK-video",
                "Jester",
                "ML-10M",
                "ML-20M"
                ]


plt.rcParams['figure.dpi'] = 400
plt.rcParams['savefig.dpi'] = 600

list_color = ['#029e73', '#d55e00', '#cc78bc', '#ca9161'] #from colorblind palette
list_rerank = ["none", "BC", "CM", "GS"]
dict_color = dict(zip(list_rerank, list_color)) 
list_models = ["BPR", "ItemKNN", "MultiVAE", "NCL"]
list_markers = ["x", "1", "+", "2"]

In [2]:
def load_data(file_path, est=None):
    if not est:
        list_files = [f for f in os.listdir(f"../pareto/{file_path}") if "pickle" in f and "dftest" not in f and "10" in f and "temp" not in f and "time" not in f and "oraclefair" in f]
    elif est:
        list_files = [f for f in os.listdir(f"../pareto/{file_path}") if "pickle" in f and "dftest" not in f and "10" in f and "temp" not in f and "time" not in f and "oraclefair" in f and "pareto" not in f and f"with{est}." in f]
        #the dot is important to distinguish with1.pickle vs with10.pickle
    
    assert(len(list_files) == len(list_dataset), print(len(list_files)))
    big_df = pd.DataFrame()

    for f in list_files:
        df = pd.read_pickle(f"../pareto/{file_path}/{f}")

        splitted_f = f.split("_")
        df["dataset"] = splitted_f[2]


        big_df = pd.concat([big_df, df])

    rel_measures = big_df.columns[(~big_df.columns.str.contains("our|ori|dataset"))&(big_df.columns.str.contains("@", regex=False))].to_list()
    fair_measures = big_df.columns[(big_df.columns.str.contains("Jain|QF|Ent|Gini|FSat"))&(big_df.columns.str.contains("our"))].to_list()

    big_df["source"] = "pareto"

    model_scores = pd.read_csv(f"combined_base/csv_combined_result_{newest_file}.csv", index_col=0)
    model_scores.sort_values(["dataset","reranking"], inplace=True)
    model_scores = model_scores.melt(id_vars=["dataset", "measures", "reranking"]).set_index("measures")
    model_scores.index += "@10"
    model_scores["source"] = model_scores.apply(lambda x: x.variable + "-" + x.reranking if x.reranking != "-" else x.variable, axis=1)
    model_scores.drop(columns=["reranking", "variable"], inplace=True)

    model_scores = model_scores\
                    .reset_index()\
                    .pivot_table(index=["measures"], columns=["dataset","source"])\
                    .T\
                    .reset_index()

    model_scores.drop(columns="level_0", inplace=True)

    model_scores["dataset"] = model_scores.dataset.str.replace("\\rotatebox[origin=r]{90}{","", regex=False).str.rstrip("}")

    selected_cols = ['HR@10', 'MRR@10', 'P@10', 'MAP@10', 'R@10', 'NDCG@10', 'Jain_our@10','QF_our@10', 'Ent_our@10', 'Gini_our@10', 'FSat_our@10', 
                     'dataset', 'source']
    
    
    combined_df = big_df.append(
        model_scores.loc[:,selected_cols]
    )

    return combined_df, model_scores, rel_measures, fair_measures

To annotate best rel, best fair, and best distance

In [3]:
def get_best_rel_best_fair(this_data):
    for_val = this_data.loc[this_data.source!="pareto"]

    max_val = for_val.loc[:, ~for_val.columns.str.contains("Gini|dataset|source")].max()
    min_val = for_val.loc[:,for_val.columns.str.contains("Gini")].min()

    best_rel_or_fair = {}

    for measure, val in zip(max_val.index, max_val):
        best_rel_or_fair[measure] = for_val.loc[for_val[measure]==val, "source"].to_list()

    for measure, val in zip(min_val.index, min_val):
        best_rel_or_fair[measure] = for_val.loc[for_val[measure]==val, "source"].to_list()

    return best_rel_or_fair

# Pareto gradient

In [4]:
def get_pareto_gradient(combined_df, dataset):
    the_data = combined_df.query("dataset==@dataset & source=='pareto'").drop(columns=["source", "dataset"])

    selected_col = rel_measures + fair_measures

    the_data = the_data.loc[:,selected_col]

    for i in [0,-1]:  
        print(i)
        assert all((the_data.iloc[i] == the_data.min()) | (the_data.iloc[i] == the_data.max())), print(the_data.shape, the_data.idxmax(), the_data.idxmin()) #check if they are really the end points

    start_point = the_data.iloc[0] #most ideal relevance
    end_point = the_data.iloc[-1] #most fair
    differences = start_point - end_point

    series_gradient = pd.Series()

    for fm in fair_measures:
        for rm in rel_measures:
            series_gradient.loc[f"{rm}-{fm}"] = differences[fm] / differences[rm]

    series_gradient.index = series_gradient.index.str.replace("@10","")
    series_gradient = series_gradient.sort_index()

    return series_gradient

def detect_good(el):
    if np.isinf(el) or np.isnan(el):
        return False
    elif el==0:
        return False
    else:
        return True
    
def conclude(number):
    if number==len(list_dataset):
        return "always good"
    elif number==0:
        return "always bad"
    else:
        return "inconsistent"

# Distance

In [5]:
def euclidean_dist(df1, df2, cols=['x_coord','y_coord']):
    return np.linalg.norm(df1[cols].values - df2[cols].values, axis=1)

def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx], idx

def find_nearest_all(df, alpha=0.5):
    result_dict = {}
    rel_measures = df.columns[~df.columns.str.contains("our|dataset|ori")]
    fair_measures = df.columns[df.columns.str.contains("our|ori")]

    for rel_measure in rel_measures:
        result_dict[rel_measure] = {}
        for fair_measure in fair_measures:


            df_copy = df[[rel_measure, fair_measure]]\
                                                    .copy()\
                                                    .sort_values(rel_measure, ascending=False, kind="stable")\
                                                    .drop_duplicates(rel_measure, keep="last") 
            #we keep last because fairness would be higher at the last index

            #if both measures have 0 std, get their value instead
            if ~df_copy.describe().loc["std"].any():
                val_rel = df_copy[rel_measure].unique()
                val_fair = df_copy[fair_measure].unique()
                assert val_rel.shape[0] == 1
                assert val_fair.shape[0] == 1

                result_dict[rel_measure][fair_measure] = np.array(val_rel[0], val_fair[0])
                print(f"Allocating the only values for {rel_measure}, {fair_measure}")

            else:
                df1 = df_copy.iloc[:-1]
                df2 = df_copy.shift(-1).iloc[:-1]

                dist = euclidean_dist(df1, df2, cols=df1.columns)
                # print(len(dist), len(df_copy))
                cumsumdist = np.cumsum(dist) #this distance is from the right side (oracle), because we start from the point with highest rel

                alpha_distance = alpha*cumsumdist[-1]

                _, idx = find_nearest(cumsumdist, alpha_distance)
                #this df2.iloc[idx].values is the point *in* the PF that is closest to alpha*distance
                result_dict[rel_measure][fair_measure] = df2.iloc[idx].values
    
    return result_dict

def get_model_distance(df, path_integral_point, return_all_dist=False):
    best_model = {}
    for rel_measure in path_integral_point.columns:
        best_model[rel_measure] = {}
        for fair_measure in path_integral_point.index:
            df1 = df[[rel_measure, fair_measure]]

            #check na column:
            df1 = df1.dropna()
            df2 = path_integral_point.loc[fair_measure, rel_measure]

            if len(df1)==0:
                best_model[rel_measure][fair_measure] = ("-", np.nan)
                continue


            dist = np.linalg.norm(df1-df2, axis=1)
            if not return_all_dist:
                idx = dist.argmin()
                best_model[rel_measure][fair_measure] = (df.iloc[idx]["source"], df1.iloc[idx].values)
            else:
                best_model[rel_measure][fair_measure] = (df.iloc[np.argsort(dist, kind="stable")]["source"].values, np.sort(dist, kind="stable"))
                
    best_model = pd.DataFrame(best_model)
    return best_model

def get_model_distance_dict(combined_df):
    selected_merged = combined_df.query("source=='pareto'").drop(columns="source")

    model_distance_dict = {}
    path_integral_point_dict = {}

    for data in combined_df.dataset.unique():
        if data == "pareto":
            continue

        print(data)
        this_data = selected_merged.query("dataset==@data")

        if len(this_data) == 0:
            continue

        path_integral_dict = find_nearest_all(this_data)
        path_integral_point = pd.DataFrame(path_integral_dict)

        model_distance = get_model_distance(
                combined_df.query("dataset==@data & source!='pareto'"), path_integral_point, return_all_dist=True
                )

        # display(model_distance)

        model_distance_dict[data] = model_distance
        path_integral_point_dict[data] = path_integral_point
    
    return model_distance_dict, path_integral_point_dict

# Combine distance and pareto

In [6]:
def get_closest_model_to_pareto(dataset: str, model_distance_dict):
    model_distance_for_data = model_distance_dict[dataset].applymap(lambda x:x[1])
    model_distance_for_data = model_distance_for_data.dropna()
    assert all(
        model_distance_for_data.applymap(lambda x: np.unique(x).shape == x.shape)
    )
    model_distance_for_data = model_distance_dict[dataset].applymap(lambda x:x[0])
    
    df_of_closest_models = model_distance_for_data[model_distance_for_data.applymap(len)>1]\
                                                                .dropna()\
                                                                .applymap(lambda x: x[0])
    
    df_of_closest_models = pd.DataFrame(df_of_closest_models.unstack())

    return df_of_closest_models

In [7]:
def get_best_avg(this_data):

    for_val = this_data.loc[this_data.source!="pareto"]
    for_val_rel = for_val[rel_measures]
    for_val_fair = for_val[fair_measures]
    for_val_fair.loc[:,for_val_fair.columns.str.contains("Gini")] = 1 - for_val_fair.loc[:,for_val_fair.columns.str.contains("Gini")]

    df_average = pd.DataFrame(columns=["rel", "fair", "score", "source"])

    for col in for_val_fair.columns:
        avg_val_for_col = (for_val_rel.values + for_val_fair[col].values.reshape(-1,1))/2
        df_avg_col = pd.DataFrame(avg_val_for_col, columns=rel_measures)
        df_avg_col["source"] = for_val.source.values
        df_avg_col["fair"] = col
        melted = df_avg_col.melt(["fair", "source"], var_name="rel", value_name="score")
        df_average = pd.concat([df_average, melted])

    df_best_avg = pd.DataFrame(df_average.groupby(["rel","fair"])["score"].max())
    df_best_avg["source"] = pd.Series(dtype=object)

    for x, row in df_best_avg.iterrows():
        rel_now = x[0]
        fair_now = x[1]
        score_now = row.values[0]

        model_list = df_average.query("rel==@rel_now & fair==@fair_now & score==@score_now").source.values

        assert len(model_list) <= 1

        if len(model_list) == 1:
            df_best_avg.at[x, "source"] = model_list[0]

    return df_best_avg

In [8]:
def get_annotations_from_plot(g):
    result_dict = {}
    for each_ax in g.axes.flatten():
        xlabel = each_ax.get_xlabel()
        ylabel = each_ax.get_ylabel()
        result_dict[f"{xlabel}-{ylabel}"] = []
        for child in each_ax.get_children():
            if isinstance(child, matplotlib.text.Annotation):
                result_dict[f"{xlabel}-{ylabel}"].append(child.get_text())
    
    return result_dict

In [9]:
def forward(x):
    return np.where((x>0),x**(1/2),x)

def inverse(x):
    return np.where((x>0) ,x**2 * np.sign(x),x)

In [10]:
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker

legend_elements = [Line2D([0], [0], marker='.', mec='none',label='Pareto-optimal solution',
                          markerfacecolor="#0173b2",markersize=13, linestyle='None')]



def annotate_model_for_paper(the_model, xlabel, ylabel, thelabel, this_data, each_ax):
    x_coord, y_coord = this_data.query("source==@the_model")[[xlabel, ylabel]].values.flatten()

    
    each_ax.plot(x_coord, y_coord, 'o', ms=13, mec='black', mfc='none', mew=1)

    ha = "left"

    if "Rel" in thelabel:
        offset = -5
    elif "Fair" in thelabel:
        offset = 8
    elif "Pareto" == thelabel:
        offset = -5
    else:
        offset = 0.1

    
    thelabel = thelabel\
                        .replace("Pareto", "DPFR")\
                        .replace("Average", "Avg")\
                        .replace("Relevance","\\textsc{Rel}")\
                        .replace("Fairness","\\textsc{Fair}")


    each_ax.annotate(thelabel,
                xy=(x_coord, y_coord),
                xytext=(8, offset),
                textcoords='offset points',
                ha=ha, va='center',
                fontsize = "14"
                )

def plot_pareto_for_paper(combined_df, model_distance_dict):
    # plt.rcParams['figure.figsize']=(8.5,12)
    plt.rcParams['text.latex.preamble'] = r'\usepackage{sfmath} \boldmath'
    best_model_annotated = {}

    for dataset in list_dataset:

        this_data = combined_df.query("dataset==@dataset")

        select_data_now = this_data.loc[this_data.source.str.contains("BPR")]
        select_data_now["source"] = select_data_now\
                                        .apply(lambda x: x.source+"-none" if "-" not in x.source else x.source, axis=1)\
                                        .apply(lambda x: x.split("-")[1])
        ax1 = sns.scatterplot(data=select_data_now, x=rel_measures[0], y=fair_measures[1], palette=list_color, hue="source", hue_order=list_rerank)

        data_fake_plot = this_data.loc[~this_data.source.str.contains("-|pareto")]
        ax2 = sns.scatterplot(data=data_fake_plot, x=rel_measures[0], y=fair_measures[1], style="source", markers=list_markers)
        handles, labels = ax2.get_legend_handles_labels()           
        plt.close()

        filtered_rel_measures = [x for x in rel_measures if x.strip("@10") in ["NDCG", "MAP", "R", "P"]]
        filtered_fair_measures =  [x for x in fair_measures if "our" in x]
        filtered_fair_measures = [x for x in filtered_fair_measures if "Ent" in x or "Gini" in x or "Jain" in x]

        assert len(filtered_rel_measures) == 4
        assert len(filtered_fair_measures) == 3

        g = sns.PairGrid(
                        data=this_data,
                        x_vars=filtered_rel_measures,
                        y_vars=filtered_fair_measures,
                        palette=dict_color,
                        aspect=1.05
                        )


        #PLOT PARETO
        pareto_data = this_data.query("source=='pareto'")
        g.data = pareto_data
        g.map(sns.scatterplot, marker=".", linewidth=0.01, ec='none', color="#0173b2")

        #PLOT THE 4 MODELS SEPARATELY
        for model, marker in zip(list_models, list_markers):
            select_data_now = this_data.loc[this_data.source.str.contains(model)]
            select_data_now["source"] = select_data_now\
                                            .apply(lambda x: x.source+"-none" if "-" not in x.source else x.source, axis=1)\
                                            .apply(lambda x: x.split("-")[1])
            g.data = select_data_now
            g.map(sns.scatterplot, marker=marker, s=75, alpha=1, palette=dict_color, hue=select_data_now.source, hue_order=list_rerank, linewidth=2)

        #ANNOTATE BEST MODEL based on fairness/relevance/pareto/simple average
        best_rel_best_fair = get_best_rel_best_fair(this_data)
        best_avg = get_best_avg(this_data)
        best_pareto =  get_closest_model_to_pareto(dataset, model_distance_dict)

        g.figure.suptitle(f"{dataset}", y=1, fontsize=13)

        for each_ax in g.axes.flatten():
            xlabel = each_ax.get_xlabel()
            ylabel = each_ax.get_ylabel()

            if xlabel not in best_pareto.index.get_level_values(0) or ylabel not in best_pareto.index.get_level_values(1):
                continue

            best_rel_model = best_rel_best_fair[xlabel]
            best_fair_model = best_rel_best_fair[ylabel]
            best_avg_model = best_avg.loc[(xlabel, ylabel)].source
            best_pareto_model = best_pareto.loc[(xlabel, ylabel)].values[0]

            avg_flag = False
            pareto_flag = False

            annotate_model_kws = dict(xlabel=xlabel, ylabel=ylabel, this_data=this_data, each_ax=each_ax)

            if len(best_rel_model) == 1:
                the_model = best_rel_model[0]
                label = "Relevance"
                if best_avg_model == the_model:
                    label += " \& Average"
                    avg_flag = True
                if best_pareto_model == the_model:
                    label += " \& Pareto"
                    pareto_flag = True

                annotate_model_for_paper(the_model, thelabel=label, **annotate_model_kws)

            if len(best_fair_model) == 1:
                the_model = best_fair_model[0]
                label = "Fairness"
                if best_avg_model == the_model:
                    label += " \& Average"
                    avg_flag = True
                if best_pareto_model == the_model:
                    label += " \& Pareto"
                    pareto_flag = True
                annotate_model_for_paper(the_model, thelabel=label, **annotate_model_kws)


            if best_avg_model == best_pareto_model and not pareto_flag:
                label = "Average \& Pareto"
                annotate_model_for_paper(best_avg_model, thelabel=label,  **annotate_model_kws)
                avg_flag = True
                pareto_flag = True

            if not avg_flag:
                label = "Average"
                annotate_model_for_paper(best_avg_model, thelabel=label, **annotate_model_kws)
            
            if not pareto_flag:
                label = "Pareto"
                annotate_model_for_paper(best_pareto_model, thelabel=label, **annotate_model_kws)

            each_ax.set_xscale('function', functions=(forward, inverse))
            
            ylabel = ylabel.replace("_our","")

            # inverted Gini
            if "Gini" not in ylabel:
                each_ax.set_yscale('function', functions=(forward, inverse))
                each_ax.set_ylabel("\\textbf{$\\uparrow$" + ylabel+"}", fontsize=13)
            else:
                each_ax.set_yscale('function', functions=(inverse, forward))
                each_ax.set_ylabel("\\textbf{$\\downarrow$" + ylabel+"}", fontsize=13)
                each_ax.yaxis.set_inverted(True) 

            each_ax.set_xlabel("\\textbf{$\\uparrow$" + xlabel+"}", fontsize=13)
            each_ax.tick_params(axis='both', which='major', labelsize=9)
            each_ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=5))
            each_ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=5))

        best_model_annotated[dataset] = get_annotations_from_plot(g)

        argspareto = dict( loc="upper center",handletextpad=0.01, fontsize=14, labelspacing=0.2)

        if dataset in ["Lastfm", "Amazon-lb"]:
            g.figure.legend(handles[-4:-1]+handles[-1:], labels[-4:-1]+labels[-1:], title="Model", ncol=4, bbox_to_anchor=(0.15, 1.075), columnspacing=0.15, markerscale=1.5, **argspareto
                       )
            g.figure.legend(handles[-8:-4], labels[-8:-4], title="Re-ranker",  ncol=4, bbox_to_anchor=(0.488,1.075), columnspacing=0.15, **argspareto
                       )
            g.figure.legend(handles=legend_elements, title="Pareto Frontier",  ncol=1, bbox_to_anchor=(0.85,1.075), **argspareto)
            
        plt.subplots_adjust(hspace = 0.05, wspace=0.11)

        plt.savefig(f'pairplot/pairplot_{dataset.replace("-", "").lower()}.png', bbox_inches="tight") # uncomment this one to save
        plt.show()
        plt.close()
    return best_model_annotated

# Full Pareto

In [None]:
combined_df_full, model_scores, rel_measures, fair_measures = load_data("result_combined")
df_gradient = pd.DataFrame(columns=list_dataset)


for data in list_dataset:
    print(f"Doing {data}")
    gradient_for_data = get_pareto_gradient(combined_df_full, data)
    df_gradient[data] = gradient_for_data

df_gradient = df_gradient.round(2)
df_gradient = df_gradient[df_gradient.index.str.contains("our")]
df_gradient.index = df_gradient.index.str.replace("\_our","")
df_gradient["\# good"] = df_gradient.applymap(detect_good).sum(axis=1)
df_gradient["conclusion"] = df_gradient["\# good"].apply(lambda num: conclude(num))
df_gradient = df_gradient.fillna("-").replace(np.inf,"-").replace(-np.inf,"-")
print(
    df_gradient.to_latex(escape=False)
 )


In [None]:
good_pairs = df_gradient.query("conclusion=='always good'").index
good_pairs = good_pairs.to_list()

In [None]:
combined_df_full = combined_df_full.dropna(axis=1)

In [None]:
model_distance_dict_full, path_integral_point_full = get_model_distance_dict(combined_df_full)

In [15]:
import pickle

with open(f"efficiency/path_integral_point_full.pickle","wb") as f:
    pickle.dump(path_integral_point_full, f, pickle.HIGHEST_PROTOCOL)


In [29]:
with open(f"efficiency/model_distance_dict_full.pickle","wb") as f:
    pickle.dump(
        model_distance_dict_full, 
        f, 
        pickle.HIGHEST_PROTOCOL
)

## Plot Pareto

In [None]:
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "Helvetica"
})

plt.rcParams['figure.dpi'] = 100
best_model_annotated_full = plot_pareto_for_paper(combined_df_full, model_distance_dict_full)

# Get best model stats

In [16]:
def get_df_best_model(best_model_annotated):
    new_best_model_annotated = {}
    for key, val in best_model_annotated.items():
        new_best_model_annotated[key] = {}

        for measure, alist in val.items():
            new_measure = measure.replace("$\\uparrow$",'').replace("$\\downarrow$","")
            new_best_model_annotated[key][new_measure] = alist

    print(new_best_model_annotated)

    df_best_model = pd.DataFrame(new_best_model_annotated)
    df_best_model = df_best_model\
        .applymap(lambda x: [el for el in x if "Avg" in el or "DPFR" in el])\
        .applymap(lambda x: [el.replace("Best ", "") for el in x])

    return df_best_model


def create_comparison(df_best_model):
    total_valid_pair = df_best_model.applymap(lambda x: len(x)>0).astype(int).sum()
    same_average_and_pareto = df_best_model.applymap(lambda x: [el for el in x if "Avg" in el and "DPFR" in el]).applymap(len)
    
    set_based = same_average_and_pareto.index[~same_average_and_pareto.index.str.contains("MAP|NDCG")]
    rank_based = same_average_and_pareto.index[same_average_and_pareto.index.str.contains("MAP|NDCG")]

    same_set_based = same_average_and_pareto.loc[set_based].sum()
    same_rank_based = same_average_and_pareto.loc[rank_based].sum()

    num_set_based = set_based.shape[0]
    num_rank_based = rank_based.shape[0]

    disagreement_best = pd.DataFrame([same_set_based, same_rank_based]).T
    disagreement_best.columns = ["Set-based", "Rank-based"]


    disagreement_best["All"] = 100 * (len(same_average_and_pareto) - disagreement_best["Set-based"] - disagreement_best["Rank-based"]) / len(same_average_and_pareto)
    disagreement_best["Set-based"] = 100 * (num_set_based - disagreement_best["Set-based"]) / num_set_based 
    disagreement_best["Rank-based"] = 100 * (num_rank_based - disagreement_best["Rank-based"]) / num_rank_based 

    num_data = disagreement_best.shape[0]

    set_based_all = 100* (num_data*num_set_based - same_set_based.sum()) / (num_data*num_set_based )
    rank_based_all = 100*(num_data*num_rank_based - same_rank_based.sum()) / (num_data*num_rank_based)
    all_measures_all_dataset = 100*(num_data*len(same_average_and_pareto) - same_set_based.sum() - same_rank_based.sum() ) / (num_data*len(same_average_and_pareto))

    disagreement_best.loc["All datasets"] = [set_based_all, rank_based_all, all_measures_all_dataset]
    disagreement_best = disagreement_best.round(2)


    print(
        disagreement_best.to_latex(
            escape=False,
            label="tab:best_model_agreement",
            caption="")
    )


def get_best_model_stat(best_model_annotated):
    df_best_model = get_df_best_model(best_model_annotated)
    df_best_model.index =  df_best_model.index.str.replace("@10","")
    df_best_model = df_best_model.loc[good_pairs]

    create_comparison(df_best_model)
    return df_best_model

In [None]:
df_best_model_full = get_best_model_stat(best_model_annotated_full)

# Correlation

In [26]:
all_rank_based_on_measure = {}
all_rank_based_on_distance = {} 


def get_avg(this_data):

    for_val = this_data.loc[this_data.source!="pareto"]
    for_val_rel = for_val[rel_measures]
    for_val_fair = for_val[fair_measures]
    for_val_fair.loc[:,for_val_fair.columns.str.contains("Gini")] = 1 - for_val_fair.loc[:,for_val_fair.columns.str.contains("Gini")]

    df_average = pd.DataFrame(columns=["rel", "fair", "score", "source"])

    for col in for_val_fair.columns:
        avg_val_for_col = (for_val_rel.values + for_val_fair[col].values.reshape(-1,1))/2
        df_avg_col = pd.DataFrame(avg_val_for_col, columns=rel_measures)
        df_avg_col["source"] = for_val.source.values
        df_avg_col["fair"] = col
        melted = df_avg_col.melt(["fair", "source"], var_name="rel", value_name="score")
        df_average = pd.concat([df_average, melted])

    return df_average


def distance_based_rank_for_corr(model_distance_dict, data):
    rank_based_on_distance = model_distance_dict[data].unstack().reset_index()
    rank_based_on_distance.columns = ["rel","fair","models"]
    rank_based_on_distance = rank_based_on_distance.loc[rank_based_on_distance.rel.str.contains("^P|^R|NDCG|MAP")]
    rank_based_on_distance = rank_based_on_distance.loc[rank_based_on_distance.fair.str.contains("Jain|Gini|Ent")]
    rank_based_on_distance = rank_based_on_distance.loc[rank_based_on_distance.fair.str.contains("our")]
    rank_based_on_distance = rank_based_on_distance.loc[rank_based_on_distance.models.apply(lambda x: x[1]).dropna().index]
    rank_based_on_distance["col_name"] = rank_based_on_distance.rel + "-" + rank_based_on_distance.fair
    rank_based_on_distance = rank_based_on_distance[["col_name","models"]].T
    rank_based_on_distance.columns = rank_based_on_distance.loc["col_name"]
    rank_based_on_distance = rank_based_on_distance.iloc[1].T
    
    dict_rank_based_on_distance = {}

    for row, item in pd.DataFrame(rank_based_on_distance).iterrows():
        the_tup = item[0]
        model_name = the_tup[0]
        scores = the_tup[1]
        dict_rank_based_on_distance[row] = dict((key,val) for key,val in zip(model_name, scores))


    for_corr = pd.DataFrame(dict_rank_based_on_distance).T.applymap(lambda x: -x)

    return for_corr, dict_rank_based_on_distance


def plot_corr_heatmap(combined_df, model_distance_dict):
    fig, ax = plt.subplots(nrows=len(list_dataset),figsize=(8,16))

    for ax_id, data in zip(ax.flatten(), list_dataset):

        #rank based on measure
        rank_based_on_measure = {}
        model_scores_for_data = model_scores.query("dataset==@data")

        for col in model_scores_for_data.columns:
            if col in ["dataset", "source"]:
                pass
            else:
                if bool(re.search('AI|II|IAA|MME', col)):
                    sorted = model_scores_for_data.sort_values(col, ascending=True, kind="stable")
                    print(f"sorting {col} ascendingly")
                elif bool(re.search('IBO', col)):
                    sorted = model_scores_for_data.sort_values(col, ascending=False, kind="stable")
                    print(f"sorting {col} descendingly")
                else:
                    continue
                rank_based_on_measure[col] = sorted.source.values

        rank_based_on_measure = pd.DataFrame(rank_based_on_measure)
        all_rank_based_on_measure[data] = rank_based_on_measure

        for_corr = model_scores_for_data.loc[:,model_scores_for_data.columns.str.contains("AI|II|IAA|IBO|MME|source")]
        for_corr = for_corr.loc[:, ~for_corr.columns.str.contains("IBO_ori|IWO_ori")]
        for_corr = for_corr.T
        for_corr.columns = for_corr.loc["source"]
        for_corr.drop(index=["source"], inplace=True)

        for_corr.loc[for_corr.index.str.contains("AI|IAA|II|MME")] = for_corr.loc[for_corr.index.str.contains("AI|IAA|II|MME")].apply(lambda x: -x)

        to_append, dict_rank_based_on_distance = distance_based_rank_for_corr(model_distance_dict, data)
        for_corr_appended = for_corr.append(to_append)
        all_rank_based_on_distance[data] = dict_rank_based_on_distance

        #avg = higher score is better, so no need to invert
        this_data = combined_df.query("dataset==@data")
        avg = get_avg(this_data)
        avg["rel_fair"] = avg["rel"] + "-" + avg["fair"]
        avg = avg.drop(columns=["rel", "fair"])
        avg = avg.set_index("rel_fair")

        avg.index = avg.index.str.replace("@10","") + "-avg"
        for_corr_appended = for_corr_appended.append(avg.pivot(columns="source", values="score"))


        #plotting
        to_plot = for_corr_appended.T.reset_index(drop=True).applymap(float).corr(method="kendall").round(2)
        to_plot.dropna(how="all",inplace=True, axis=1)
        to_plot.dropna(how="all",inplace=True, axis=0)
        to_plot.columns = to_plot.columns.str.replace("@10", "")
        to_plot.index = to_plot.index.str.replace("@10","")

        #do the indexing automatically
        idx_index = to_plot.columns.tolist().index("MME_ori") +1
        idx_col = to_plot.index.tolist().index("MME_ori") +1


        filtered = to_plot.iloc[idx_index:, :idx_col]
        wo_avg = filtered[~filtered.index.str.contains("avg")]
        wo_avg["avg"] = pd.Series()
        only_avg = to_plot.loc[~to_plot.index.str.contains("AI|IAA|II|IBO|MME"),to_plot.columns.str.contains("avg")]

        for pair in wo_avg.index:
            wo_avg.loc[pair, "avg"] = only_avg.loc[pair, pair+"-avg"]

        wo_avg = wo_avg.T
        wo_avg.index = wo_avg.index\
                                .str.replace("_ori","")\
                                .str.replace("_our","")\
                                .str.replace("_true","")
        wo_avg.columns = wo_avg.columns\
                                .str.replace("_ori","")\
                                .str.replace("_our","")\

        wo_avg = wo_avg.loc[["IBO", "MME", "IAA", "II-F", "AI-F", "avg"]]

        sns.heatmap(wo_avg,
                    annot=True,
                    cmap="coolwarm_r",
                    vmin = -1,
                    vmax = 1,
                    square = True,
                    ax = ax_id,
                    cbar = True,
                    annot_kws={"size": 10}
                    )
        ax_id.set_yticklabels(ax_id.get_yticklabels(), rotation=0)
        if data != list_dataset[-1]:
           ax_id.set_xticklabels([])
        ax_id.set_title(f"{data}")

    plt.tight_layout()
    plt.savefig(f'corr/corr_heatmap_all.pdf', bbox_inches="tight")
    plt.show()
    plt.close()


In [None]:
plot_corr_heatmap(combined_df_full, model_distance_dict_full)

# Table for DPFR scores

In [None]:
df_all_rank_based_on_distance = pd.DataFrame.from_dict(all_rank_based_on_distance, orient="columns")
df_all_rank_based_on_distance.to_pickle("combined_base/rank_based_distance.pickle")

In [None]:
pareto = pd.read_pickle("combined_base/rank_based_distance.pickle")

pareto.index = pareto.index.str.replace("@10","").str.replace("_our","")

pareto = pareto.reset_index(names=["measure pair"])
pareto = pareto.melt(id_vars="measure pair", var_name="dataset")

pareto = pd.concat([pareto, pareto.value.apply(pd.Series)], axis=1)

pareto.drop(columns="value", inplace=True)
pareto = pareto.melt(id_vars=["dataset","measure pair"], var_name=["model"])

pareto["reranking"] = pareto.model.str.split("-").apply(lambda x: x[1] if len(x)==2 else "-")
pareto.model = pareto.model.str.split("-").apply(lambda x: x[0])


In [None]:
df_table = pareto.set_index(["dataset", "measure pair", "model", "reranking"]).unstack([2,3]).droplevel(0, axis=1)[["ItemKNN","BPR", "MultiVAE", "NCL"]]
df_table = df_table.round(3)
df_table = df_table.reindex(["-", "BC", "CM", "GS"],level=1, axis=1)\
                    .reindex(["Lastfm", "Amazon-lb", "QK-video","Jester", "ML-10M", "ML-20M"], level=0)

In [88]:
order = [
            "P-Jain",
            "P-Ent",
            "P-Gini",
            "MAP-Jain",
            "MAP-Ent",
            "MAP-Gini",
            "R-Jain",
            "R-Ent",
            "R-Gini",
            "NDCG-Jain",
            "NDCG-Ent",
            "NDCG-Gini",
            ]

In [90]:
df_table = df_table.reindex(order, level=1)

In [None]:
styler = df_table.style

def highlight_min(x):
    return np.where(x == np.nanmin(x.to_numpy()), f"font-weight: bold;", None)

styler.apply(highlight_min, axis=1)\
    .format(formatter="{:.3f}")

In [97]:
latex_code = styler.to_latex(
    hrules=True, 
    clines="skip-last;data",
    convert_css=True, 
    label="",
    caption= "",
    environment = "table*",
    column_format = "lll*{4}{r}|*{4}{r}|*{4}{r}|*{4}{r}",
    multicol_align = "c|"
    )

#erase last cline
last_cline_starts = latex_code.find("\\cline", -100,-1)
last_cline_ends = latex_code.find("\\bottomrule")
latex_code = latex_code[:last_cline_starts] + latex_code[last_cline_ends:]

In [None]:
print(latex_code)