In [66]:
import os
import pandas as pd
import numpy as np

import re
from datetime import datetime
import warnings 

pd.set_option("display.precision", 3)
pd.options.display.float_format = '{:.3f}'.format

warnings.filterwarnings('ignore')

In [67]:
def get_df_base_eval(path):

    result_file = [f for f in os.listdir(path) if "pickle" in f and "time" not in f]
    results = {}

    for f in result_file:
        result = pd.read_pickle(f"{path}/{f}")
        f = f.replace(".pickle", "").replace(f"base_", "")
        results[f] = result
    base = pd.DataFrame(results)
    base = base[base.index.str.contains("@10")]
    base.columns = base.columns + "_-_x_x"

    return base


def get_df_rerank_eval(path):

    reranking_result_file = [f for f in os.listdir(path) if "pickle" in f and "at10" in f and "rerank25" in f and "greedy" not in f]
    reranking_results = {}

    for f in reranking_result_file:
        result = pd.read_pickle(path+f)
        f = f.replace(".pickle", "").replace("base_","")

        reranking_results[f] = result

    rerank = pd.DataFrame(reranking_results)

    return rerank

In [68]:
base = get_df_base_eval("../eval/base")
MME = get_df_base_eval("../eval/MME")

In [69]:
rerank = get_df_rerank_eval("../reranking/result/")
rerank_MME = get_df_rerank_eval("../reranking/MME/")

In [None]:
combined_result = pd.concat([base, MME, rerank, rerank_MME], axis=1)
combined_result = pd.DataFrame(combined_result.stack()).reset_index()
combined_result

In [None]:
#rearrange this one separately, then concat to original dataset
ml20m = combined_result[(combined_result.level_1.str.contains("20M")) & (~combined_result.level_1.str.contains("x_x"))]
ml20m.head()

In [None]:
ml20m["level_1"] = ml20m.level_1.str.replace("_at10_rerank25","",regex=False) + "_at10_rerank25"
combined_result.loc[ml20m.index, "level_1"] = ml20m["level_1"]
combined_result.loc[ml20m.index]

In [None]:
combined_result = combined_result.reset_index(drop=True)
combined_result[["measures","k"]] = combined_result["level_0"].str.split("@", expand=True)
combined_result

In [74]:
def rotate_index(indices):
    indices = "\\rotatebox[origin=r]{90}{" + indices + "}"
    return indices

combined_result[["dataset", "model", "reranking", "dummy", "dummy2"]] = combined_result.level_1.str.split("_", expand=True)
combined_result.drop(columns=["level_0", "level_1", "dummy", "dummy2"], inplace=True)
combined_result["k"] = combined_result["k"].apply(int)
combined_result["reranking"] = combined_result["reranking"].str.replace("-subset-0.05", "").str.replace("borda", "BC").str.replace("combmnz", "CM")
combined_result["dataset"] = combined_result["dataset"].apply(rotate_index)


In [None]:
combined_result = combined_result.set_index(["dataset","measures","model", "reranking"]).drop(columns="k").unstack(2).droplevel(0, axis=1)

selected_index = pd.Index([
    'HR', 'MRR', 'P', 'MAP', 'R', 'NDCG', 
    'Jain_our', 'QF_our', 'Ent_our', 'FSat_our', 
    'Gini_our',
    'IBO_our',
    'MME_ori', 'IAA_true_ori',
    'II-F_ori', 'AI-F_ori'
])
combined_result = combined_result.reindex(selected_index, level=1)
combined_result


In [76]:
now = datetime.now()
time = str(now.strftime("%Y-%m-%d_%H%M%S"))
combined_result.reset_index().to_csv(f"combined_base/csv_combined_result_{time}.csv")

In [None]:
def add_arrows_and_sort(big_table):

    lower_is_better = "Gini|MME|II|AI|IAA"
    stripped_measures = big_table.index.get_level_values(1)
    mask_lower = stripped_measures.str.contains(lower_is_better)
    true_order = pd.Series(np.where(mask_lower)[0]).append(pd.Series(np.where(~mask_lower)[0]))
    measure_with_arrow = pd.Series("$\downarrow$ " + stripped_measures[mask_lower]).append(pd.Series("$\\uparrow$ " + stripped_measures[~mask_lower]))
    helper = pd.DataFrame([true_order.reset_index(drop=True),measure_with_arrow.reset_index(drop=True)], index=["true_order", "measure"]).T.sort_values("true_order")
    helper.measure = helper.measure\
                        .str.replace("_ori","}_{\\text{ori}")\
                        .str.replace("_our","}_{\\text{our}")\
                        .str.replace("_true","-true", regex=False)\
                        .str.replace(" ", " $\\text{") 
    helper.measure += "}$"
                        

    big_table = big_table.set_index([big_table.index.get_level_values(0),helper.measure.reset_index(drop=True)])
    big_table.columns.name = None

    return big_table


def get_measure_type(index):
    pattern = ["IBO|MME|IAA|II|AI", "Jain|QF|FSat|Ent|Gini", "arrow"]
    index[index.str.contains(pattern[0])] = "\textsc{Fair+Rel}"
    index[index.str.contains(pattern[1])] = "\textsc{Fair}"
    index[index.str.contains(pattern[2])] = "\textsc{Rel}"
    
    return index.values

def add_measure_type_sort_col(big_table):

    index_series = big_table.index\
                                .get_level_values(1)\
                                .to_series()
                            
    big_table["measure_type"] = get_measure_type(index_series)
    big_table["measure_type"] = big_table["measure_type"].apply(rotate_index)
    big_table = big_table.set_index(["measure_type"], append=True).reorder_levels([0, 2, 1])
    big_table.index.names = [None, None, None]
    big_table = big_table.reindex(["ItemKNN", "BPR", "MultiVAE", "NCL"], axis=1, level=0)

    return big_table



combined_result = combined_result.unstack(2)
combined_result = add_arrows_and_sort(combined_result)
combined_result = add_measure_type_sort_col(combined_result)

In [79]:
pairings = [
    ["Lastfm", "Amazon-lb", "QK-video"], 
     ["Jester", "ML-10M", "ML-20M"]
]

In [80]:
def highlight_max(x):
    return np.where(x == np.nanmax(x.to_numpy()), f"font-weight: bold;", None)

def highlight_min(x):
    return np.where(x == np.nanmin(x.to_numpy()), f"font-weight: bold;", None)

def add_cline(measure, latex_code, df):
    end_index = []
    for el in re.finditer(f"{measure}.*\\n",latex_code):
        end_index.append(el.end())
    j = df.shape[1] + 2
    for idx in reversed(end_index):
        latex_code= latex_code[:idx] + "\\cline{2-" + str(j) + "}\n" + latex_code[idx:]
    return latex_code

def format_row_wise(styler, measures, row_formatter):
    for measure in measures:
        row_num = styler.index.get_level_values(2).get_loc(measure)

        for col_num in range(len(styler.columns)):
            styler._display_funcs[(row_num, col_num)] = row_formatter
    return styler



def nicetable(df):

    the_index = df.index.levels[2]

    row_with_up = the_index[the_index.str.contains("uparrow")]
    row_with_down = the_index[the_index.str.contains("downarrow")]


    idx = pd.IndexSlice
    
    slice_max = idx[idx[:,:, row_with_up]]
    slice_min = idx[idx[:,:,row_with_down]]


    styler = df.style



    styler.apply(highlight_max, axis=1, subset=slice_max)\
        .apply(highlight_min, axis=1, subset=slice_min)\
        .format(formatter="{:.3f}")
   

    dataset_label = "_".join(pair)
    dataset_caption = " and ".join(pair)


    latex_code = styler.to_latex(
        hrules=True, 
        clines="skip-last;data",
        convert_css=True, 
        environment = "table*",
        column_format = "lll*{4}{r}|*{4}{r}|*{4}{r}|*{4}{r}",
        multicol_align = "c|"
        )

    #erase last cline
    last_cline_starts = latex_code.find("\\cline", -100,-1)
    last_cline_ends = latex_code.find("\\bottomrule")
    latex_code = latex_code[:last_cline_starts] + latex_code[last_cline_ends:]

    
    latex_code = latex_code.replace("\\begin{tabular}","\\resizebox{0.90\textwidth}{!}{\n\\begin{tabular}") #add resize box
    latex_code = latex_code.replace("\\multicolumn{4}{c|}{NCL} \\\\", "\\multicolumn{4}{c}{NCL} \\\\ \n\midrule") #add midrule after k
    latex_code = latex_code.replace("\end{tabular}","\end{tabular}}") #add } as part of resize box
    latex_code = latex_code.replace("\t","\\t")
    latex_code = latex_code.replace("\cline{1-19} \cline{2-19}", "\cline{1-19}")

    latex_code = latex_code.replace("_{\\text{our}}","")
    latex_code = latex_code.replace("_{\\text{ori}}","")
    latex_code = latex_code.replace("-true","")

    now = datetime.now()
    time = str(now.strftime("%Y-%m-%d_%H%M%S"))

    isExist = os.path.exists("combined_base/table")
    
    if not isExist:

        # Create a new directory because it does not exist
        os.makedirs("combined_base/table")
        print("Creating directory for result tables")

    print(latex_code)

    with open(f'base/table/base_{time}.tex', 'a+') as f:
        print(latex_code, file=f)

In [None]:
import pandas as pd

for pair in pairings:
    pair = [rotate_index(pair) for pair in pair]
    nicetable(combined_result.loc[pair])