# Summarize results for Marek's thesis

This notebook is used to generate latex tables for Marek's thesis.

In [None]:
import json
import os
import numpy as np
import pandas as pd

In [None]:
def generate_full_vs_top_h_table(
    experiments,
    base_method = "optimal-instance-precision-keep-scores",
    table_header = "",
    multiplier = 100,
    per_page = 9,
    top_k = 5,
):
    basic_methods = {
        "optimal-instance-precision": "\\InfTopK",
        "optimal-instance-ps-precision": "\\InfPSK",
    }

    metrics = {
        "instance-precision": "Precision",
        "ps-precision": "Propensity-scored Prec.",
        "instance-recall": "Recall",
        "ndcg": "nDCG",
        "midrule": "\\midrule",
        "macro-precision": "Macro-Precision",
        "macro-recall": "Macro-Recall",
        #"macro-balanced-accuracy": "BA",
        "macro-f1": "Macro-F$_1$",
        #"macro-jaccard-score": "JS",
        # "macro-gmean": "GM",
        # "macro-hmean": "HM",
        "coverage": "Coverage"
    }

    table = ""
    for e, (experiment, experiment_label) in enumerate(experiments.items()):
        all_results = {}
        for top_k in [1, 3, 5]:
            filename = f"{experiment}/{base_method}_k={top_k}_v=0.0_s=13_results.json"
            if os.path.exists(filename):
                with open(filename, "r") as f:
                    result_file_data = json.load(f)
                for metric, metric_label in metrics.items():
                    metric = f"{metric}@{top_k}"
                    if metric in result_file_data:
                        all_results[f"{metric}_all"] = result_file_data[metric] * multiplier

            filename = filename.replace("_k=", "_top_labels=0.2_k=")
            filename = filename.replace("org2", "org4")
            if os.path.exists(filename):
                with open(filename, "r") as f:
                    result_file_data = json.load(f)
                for metric, metric_label in metrics.items():
                    metric = f"{metric}@{top_k}"
                    if metric in result_file_data:
                        all_results[f"{metric}_top"] = result_file_data[metric] * multiplier
        
        print(all_results)
        results = []
        for metric, metric_label in metrics.items():
            _result = {"Metric": metric_label}
            for top_k in [1, 3, 5]:
                _metric = f"{metric}@{top_k}"
                _result[f"{top_k}_all"] = all_results.get(f"{_metric}_all", "-")
            for top_k in [1, 3, 5]:
                _metric = f"{metric}@{top_k}"
                _ref_value = all_results.get(f"{_metric}_all", "-")
                _value = all_results.get(f"{_metric}_top", "-")
                _diff = None
                if _ref_value != "-" and _value != "-":
                    _diff = (_value - _ref_value) / _ref_value * 100
                if _value != "-":
                    _value = f"{_value:.2f}"
                if _diff is not None:
                    color = r"\color{green}"
                    if _diff < -5:
                        color = r"\color{orange}"
                    if _diff < -30:
                        color = r"\color{red}"
                    _diff = f" {{\\scriptsize {color} ({_diff:.2f}\\%)}}"
                _result[f"{top_k}_top"] = _value
                _result[f"{top_k}_top_diff"] = _diff
                
                
            results.append(_result)

        df = pd.DataFrame(results)
        
        table += "\n"
        if e % per_page == 0:
            if e == 0:
                table += "\\begin{table}\n"
                table += table_header
                table += "\\small\n"
            table += """
\\centering
\\resizebox{\\linewidth}{!}{
\\begin{tabular}{l|rrr|rlrlrl}
\\toprule
    Metric & \\multicolumn{3}{c|}{With all labels} & \\multicolumn{6}{c}{With top 20\\% labels} \\\\
    & \\multicolumn{1}{c}{$@1$} & \\multicolumn{1}{c}{$@3$} & \\multicolumn{1}{c|}{$@5$} 
    & \\multicolumn{2}{c}{$@1$} & \\multicolumn{2}{c}{$@3$} & \\multicolumn{2}{c}{$@5$} \\\\
"""
            table += """
\\centering
\\resizebox{\\linewidth}{!}{
\\begin{tabular}{l|rrr|rlrlrl}
\\toprule
    Metric & \\multicolumn{3}{c|}{With all labels} & \\multicolumn{6}{c}{With top 20\\% labels} \\\\
    & \\multicolumn{1}{c}{$@1$} & \\multicolumn{1}{c}{$@3$} & \\multicolumn{1}{c|}{$@5$} 
    & $@1$ & {\\scriptsize (diff.)} & $@3$ & {\\scriptsize (diff.)} & $@5$ & {\\scriptsize (diff.)} \\\\
"""
# Metric & \\multicolumn{3}{c|}{Class. with all labels} & \\multicolumn{6}{c}{Classifier with top 20\\% labels} \\\\
        table += f"\\midrule\n"
        table += f"\\multicolumn{{10}}{{c}}{{{experiment_label}}} \\\\\n"

        # Print the results as a latex table
        latex_table = df.to_latex(index=False, float_format="{:0.2f}".format)
        #latex_table = df.to_latex(index=False)
        table += "\n    ".join(latex_table.split("\n")[3:-3]).replace("\midrule & - & - & - & - & NaN & - & NaN & - & NaN \\\\", "\midrule")

        if e % per_page == per_page - 1 or e == len(experiments) - 1:
            table += """
\\bottomrule
\\end{tabular}
}
\\end{table}
"""

    return table

In [None]:
org_exp_dir = "results_thesis_all_vs_top"

experiments = {
    f"../{org_exp_dir}/rcv1x_100_plt": "RCV1x-2K",
    #f"../{org_exp_dir}/eurlex_100_plt": "Eurlex-4K",
    f"../{org_exp_dir}/EURLex-4.3K_100_plt": "Eurlex-4.3K",
    #f"../{org_exp_dir}/amazonCat_100_plt": "AmazonCat-13K",
    f"../{org_exp_dir}/amazonCat-14K_100_plt": "AmazonCat-14K",
    #f"../{org_exp_dir}/wiki10_100_plt": "Wiki10-31K",
    #f"../{org_exp_dir}/deliciousLarge_100_plt": "DeliciousLarge-200K",
    f"../{org_exp_dir}/wikiLSHTC_100_plt": "WikiLSHTC-325K",
    f"../{org_exp_dir}/WikipediaLarge-500K_100_plt": "WikipediaLarge-500K",
    f"../{org_exp_dir}/amazon_100_plt": "Amazon-670K",
}

header_main = r"""
\caption{Results (\%) of a classifier trained on the full set of labels
and a classifier trained with only the top 20\% of labels (most frequent labels) 
on different metrics budgeted at $k$ ($@k$).
}
\label{tab:all-vs-top-h}
"""

with open("tables_thesis/results-all-vs-top-h.tex", "w") as f:
    f.write(generate_full_vs_top_h_table(experiments, table_header=header_main, per_page=9))

## Main results

In [None]:
EPSS = [1e-8, 5e-8, 1e-7, 5e-7, 9e-7, 5e-6, 2e-6, 1e-6, 5e-5, 1e-5, 1e-4, 1e-8, 1e-7, 1e-6, 1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 6e-4, 0.00015, 1e-8, 1e-7, 1e-6, 5e-6, 9e-6, 1e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 6e-4, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 3e-5, 5e-5, 1e-4, 3e-4, 5e-4, 1e-3, 3e-3, 5e-3, 7e-3, 1e-2, 3e-3, 5e-3, 6e-3, 7e-3, 8e-3, 9e-3, 1e-2, 2e-2, 1e-8, 1e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 2e-4, 3e-4, 4e-4, 1e-3, 2e-3, 3e-3, 5e-3, 1e-8, 1e-7, 1e-6, 5e-6, 1e-5, 3e-5, 5e-5, 7e-5, 9e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 6e-4, 7e-4, 8e-4, 9e-4, 1e-3, 2e-3, 0.004, 0.0004]
EPSS = sorted(list(set(EPSS)))
TOL = 1e-7
seeds = [13, 1988, 1993, 2023, 2024]
seeds = [13, 1988, 1993, 2023, 2024]
val_split = 0.0

metrics = {
    "instance-precision": "P",
    "ps-precision": "PS",
    "instance-recall": "R",
    "macro-precision": "P",
    "macro-recall": "R",
    "macro-balanced-accuracy": "BA",
    "macro-f1": "F",
    "macro-jaccard-score": "JS",
    # "macro-gmean": "GM",
    # "macro-hmean": "HM",
    "coverage": "C"
}


def generate_table_with_results(
    experiments,
    table_header = "",
    multiplier = 100,
    per_page = 3,
    per_first_page = 0,
    continue_prev_tabular = False,
    continue_tabular = False,
    add_std = False,
    top_k = 5,
    sampled = False,
    add_count = True,
    force_h = False,
):
    basic_methods = {
        "optimal-instance-precision": f"\\InfTopK{{{top_k}}}",
        "optimal-instance-ps-precision": f"\\InfPSK{{{top_k}}}",
        "power-law-with-beta=0.25": f"\\InfPowerK{{{top_k}}}$_{{\\beta=0.25}}$",
        #"power-law-with-beta=0.25-eps=1e-08": f"\\InfPowerK{{{top_k}}}$_{{\\beta=0.25}}$",
        "power-law-with-beta=0.5": f"\\InfPowerK{{{top_k}}}$_{{\\beta=0.5}}$",
        #"power-law-with-beta=0.5-eps=1e-08": f"\\InfPowerK{{{top_k}}}$_{{\\beta=0.5}}$",
        #"power-law-with-beta=0.75": f"\\InfPowerK{{{top_k}}}",
        "log": f"\\InfLogK{{{top_k}}}",
        #"log-eps=1e-08": f"\\InfLogK{{{top_k}}}",
        "optimal-macro-recall": f"\\InfMacR{{{top_k}}}",
        #"optimal-macro-recall-eps=1e-9": f"\\InfMacR{{{top_k}}}_{{\\epsilon=1e-9}}$",
        #"optimal-macro-recall-eps=1e-08": f"\\InfMacR{{{top_k}}}",
        "optimal-macro-balanced-accuracy": f"\\InfMacBA{{{top_k}}}",
        #"optimal-macro-balanced-accuracy-eps=1e-9": f"\\InfMacBA{{{top_k}}}_{{\\epsilon=1e-9}}",
        #"optimal-macro-balanced-accuracy-eps=1e-08": f"\\InfMacBA{{{top_k}}}",
        "midrule1": "\\midrule",
    }
    methods = basic_methods.copy()
    for eps in EPSS:
        methods.update({
            f"block-coord-macro-precision-tol={TOL}-eps={eps}": f"\\InfBCAMacP{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    for eps in EPSS:
        methods.update({
            f"block-coord-macro-recall-tol={TOL}-eps={eps}": f"\\InfBCAMacR{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    for eps in EPSS:
        methods.update({
            f"block-coord-macro-balanced-accuracy-tol={TOL}-eps={eps}": f"\\InfBCAMacBA{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    for eps in EPSS:
        methods.update({
            f"block-coord-macro-f1-tol={TOL}-eps={eps}": f"\\InfBCAMacF{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    for eps in EPSS:
        methods.update({
            f"block-coord-macro-jaccard-score-tol={TOL}-eps={eps}": f"\\InfBCAMacJS{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    methods.update({
        f"block-coord-coverage-tol={TOL}": f"\\InfBCACov{{{top_k}}}",
        "midrule2": "\\midrule",
    })

    for eps in EPSS:
        methods.update({
            f"frank-wolfe-macro-precision-eps={eps}": f"\\InfFWMacP{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    for eps in EPSS:
        methods.update({
            f"frank-wolfe-macro-recall-eps={eps}": f"\\InfFWMacR{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    for eps in EPSS:
        methods.update({
            f"frank-wolfe-macro-balanced-accuracy-eps={eps}": f"\\InfFWMacBA{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    for eps in EPSS:
        methods.update({
            f"frank-wolfe-macro-f1-eps={eps}": f"\\InfFWMacF{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    for eps in EPSS:
        methods.update({
            f"frank-wolfe-macro-jaccard-score-eps={eps}": f"\\InfFWMacJS{{{top_k}}}$_{{\\epsilon={eps}}}$",
        })

    # methods.update({
    #     "midrule3": "\\midrule",
    # })

    # thr = 0.05
    # methods.update({
    #     "optimal-instance-precision_proba_threshold={thr}": "\\InfTopK",
    #     "optimal-instance-ps-precision_proba_threshold={thr}": "\\InfPSK",
    #     "power-law-with-beta=0.25_proba_threshold={thr}": "\\InfPowerK$_{\\beta=0.25}$",
    #     "power-law-with-beta=0.5_proba_threshold={thr}": "\\InfPowerK$_{\\beta=0.5}$",
    #     #"power-law-with-beta=0.75": "\\InfPowerK",
    #     "log_proba_threshold={thr}": "\\InfLogK",
    #     "optimal-macro-recall_proba_threshold={thr}": "\\InfMacR",
    #     "optimal-macro-balanced-accuracy_proba_threshold={thr}": "\\InfMacBA",
    #     "midrule1": "\\midrule",
    # })

    # for eps in EPSS:
    #     methods.update({
    #         f"frank-wolfe-macro-precision-eps={eps}_proba_threshold={thr}": f"\\InfFWMacP$_{{\\epsilon={eps}}}$",
    #     })

    # for eps in EPSS:
    #     methods.update({
    #         f"frank-wolfe-macro-recall-eps={eps}_proba_threshold={thr}": f"\\InfFWMacR$_{{\\epsilon={eps}}}$",
    #     })

    # for eps in EPSS:
    #     methods.update({
    #         f"frank-wolfe-macro-balanced-accuracy-eps={eps}_proba_threshold={thr}": f"\\InfFWMacBA$_{{\\epsilon={eps}}}$",
    #     })

    # for eps in EPSS:
    #     methods.update({
    #         f"frank-wolfe-macro-f1-eps={eps}_proba_threshold={thr}": f"\\InfFWMacF$_{{\\epsilon={eps}}}$",
    #     })

    # for eps in EPSS:
    #     methods.update({
    #         f"frank-wolfe-macro-jaccard-score-eps={eps}_proba_threshold={thr}": f"\\InfFWMacJS$_{{\\epsilon={eps}}}$",
    #     })


    # for eps in EPSS:
    #     methods.update({
    #         f"block-coord-macro-gmean-tol={TOL}-eps={eps}": f"\\InfBCAMacGM$_{{\\epsilon={eps}}}$",
    #         f"frank-wolfe-macro-gmean-eps={eps}": f"\\InfFWMacGM$_{{\\epsilon={eps}}}$",
    #     })

    # for eps in EPSS:
    #     methods.update({
    #         f"block-coord-macro-hmean-tol={TOL}-eps={eps}": f"\\InfBCAMacHM$_{{\\epsilon={eps}}}$",
    #         f"frank-wolfe-macro-hmean-eps={eps}": f"\\InfFWMacHM$_{{\\epsilon={eps}}}$",
    #     })



    formats = ["\\textbf{{{}}}", "\\textit{{{}}}"]

    table = ""
    for e, (experiment, experiment_label) in enumerate(experiments.items()):
        results = []
        prev_method = ""
        for method, method_label in methods.items():
            method_results = {
                "_method": method,
                "method": method_label
            }
            if "midrule" in method:
                results.append(method_results)
                continue

            for seed in seeds:
                if sampled:
                    filename = f"{experiment}/{method}_sample_test_labels_k={top_k}_v={val_split}_s=13_sample_s={seed}_results.json"
                else: #if not os.path.exists(filename):
                    filename = f"{experiment}/{method}_k={top_k}_v={val_split}_s={seed}_results.json"

                if not os.path.exists(filename):
                    filename = filename.replace("org8", "org7")
                
                # if not os.path.exists(filename):
                #     filename = filename.replace("org3", "org2")
                    
                if os.path.exists(filename):
                    with open(filename, "r") as f:
                        result_file_data = json.load(f)
                    for metric, metric_label in metrics.items():
                        metric = f"{metric}@{top_k}"
                        if metric in result_file_data:
                            method_results.setdefault(metric, []).append(result_file_data[metric] * multiplier)
                else:
                    #print(f"File {filename} not found")
                    pass

            for k, v in list(method_results.items()):
                if isinstance(v, list) and isinstance(v[0], float):
                    method_results[k] = np.mean(v)
                    if add_std:
                        if "org" in experiment and method in basic_methods: # std == 0:
                           continue 
                        method_results[f"{k}_std"] = np.std(v)
                    if add_count:
                        method_results[f"{k}_count"] = len(v)
                        

            for metric in metrics.keys():
                if metric in method and metric in prev_method and method.split("-")[0] == prev_method.split("-")[0]:
                    metric_at_k = f"{metric}@{top_k}"
                    new_val = method_results.get(metric_at_k, 0)
                    prev_val = results[-1].get(metric_at_k, 0)
                    if new_val > prev_val and isinstance(new_val, float):
                        results[-1] = method_results
                    break
            else:
                results.append(method_results)
            prev_method = method

        for r in results:
            for k, v in r.items():
                if isinstance(v, float):
                    r[k] = np.round(v, 2)

        # Specialized method worst results
        specialized_method_worst_results = {}
        bca_method_results = {}
        fw_method_results = {}

        for i, r in enumerate(results):
            method = r["_method"]
            for metric in metrics.keys():
                if "_std" in metric:
                    continue

                metric_at_k = f"{metric}@{top_k}"
                if (metric in method or ("instance-recall" in metric and method == "optimal-instance-precision")) and metric_at_k in r:
                    if isinstance(r[metric_at_k], float):
                        specialized_method_worst_results[metric_at_k] = min(specialized_method_worst_results.get(metric_at_k,1000), r[metric_at_k])
                    if "block" in method:
                        bca_method_results[metric_at_k] = min(bca_method_results.get(metric_at_k,1000), r[metric_at_k])
                    if "frank" in method:
                        fw_method_results[metric_at_k] = min(fw_method_results.get(metric_at_k,1000), r[metric_at_k])

        # print(specialized_method_worst_results)
        # print(bca_method_results)
        # print(fw_method_results)

        # Select best in column
        for metric in list(metrics.keys()):
            if "_std" in metric:
                continue
            
            metric_at_k = f"{metric}@{top_k}"
            column = np.array([result.get(metric_at_k, 0) for result in results])
            argsort = np.flip(np.argsort(column))
            vals = column[argsort]
            for result in results:
                if metric_at_k not in result:
                    continue
                
                formated_result = f"{result[metric_at_k]:.2f}"
                if add_count and metric_at_k + "_count" in result:
                    formated_result = f"{formated_result} ({result[metric_at_k + '_count']})"
                    if result[metric_at_k + '_count'] < len(seeds):
                        print(f"For {experiment_label} - {result['_method']}, {metric_at_k}: {result[metric_at_k + '_count']}")
                    del result[metric_at_k + "_count"]

                if add_std and metric_at_k + "_std" in result:
                    #formated_result = f"\\makecell{{\\linespread{{1.0}} {formated_result} \\\\ {{\\scriptsize $\\pm$ {result[metric_at_k+'_std']:.2f}}}}}"
                    #formated_result = f"\\makecell{{\\linespread{{0.8}} {formated_result} \\\\ {{\\tiny $\\pm$ {result[metric_at_k+'_std']:.2f}}}}}"
                    formated_result = f"\\makecell{{{formated_result} \\\\[-2.5pt] {{\\tiny $\\pm$ {result[metric_at_k+'_std']:.2f}}}}}"
                    del result[metric_at_k + "_std"]

                if result[metric_at_k] < specialized_method_worst_results.get(metric_at_k, 0):
                    method = r["_method"]
                    result[metric_at_k] = f"{{\\color{{gray!75}} {formated_result}}}"
                else:
                    result[metric_at_k] = formated_result
                    
                    # if "block" in method and (result[metric_at_k] < bca_method_results.get(metric_at_k,0) or result[metric_at_k] < specialized_method_worst_results.get(metric_at_k,0)):
                    #     result[metric_at_k] = f"{{\\color{{gray!75}} {result[metric_at_k]:.2f}}}"
                    # elif "frank" in method and result[metric_at_k] < fw_method_results.get(metric_at_k,0) or result[metric_at_k] < specialized_method_worst_results.get(metric_at_k,0)):
                    #     result[metric_at_k] = f"{{\\color{{gray!75}} {result[metric_at_k]:.2f}}}"
                    # elif result[metric_at_k] < specialized_method_worst_results.get(metric_at_k,0):
                    #     result[metric_at_k] = f"{{\\color{{gray!75}} {result[metric_at_k]:.2f}}}"
            
            f = -1
            prev_val = -1
            for idx, val in zip(argsort, vals):
                if prev_val != val:
                    f += 1
                    if f == len(formats):
                        break
                results[idx][metric_at_k] = formats[f].format(results[idx][metric_at_k])
                #results[idx][metric_at_k] = results[idx][metric_at_k].replace("\\textbf{\\makecell{", "\\makecell{\\textbf{")
                #results[idx][metric_at_k] = results[idx][metric_at_k].replace("\\textit{\\makecell{", "\\makecell{\\textit{")
                prev_val = val

            # for format, idx in zip(formats, argsort):
            #     results[idx][metric_at_k] = format.format(column[idx])

        color = "green!25"
        # Color columns with target
        for i, r in enumerate(results):
            method = r["_method"]
            del r["_method"]
            if "epsilon" in r["method"]:
                r["method"] = r["method"].split("$")[0]
            for metric in metrics.keys():
                metric_at_k = f"{metric}@{top_k}"
                if (metric in method or ("instance-recall" in metric and method == "optimal-instance-precision")) and metric_at_k in results[i]:
                    mark = ""
                    if "instance-recall" in metric:
                        if "syn" not in experiment:
                            color = "blue!25"
                        else:
                            color = "green!25"
                    else:
                        color = "green!25"
                    if "instance-recall" in metric:
                        mark = "*"
                    if mark != "" and isinstance(results[i][metric_at_k], str) and "\\makecell" in results[i][metric_at_k]:
                        results[i][metric_at_k] = results[i][metric_at_k].replace(f"\\makecell{{", f"\\makecell{{ \\textnormal{{{mark}}} ")
                    elif mark != "":
                        results[i][metric_at_k] = f"{mark} {results[i][metric_at_k]}"
                        
                    if isinstance(results[i][metric_at_k], str):
                        results[i][metric_at_k] = f"\\cellcolor{{{color}}} {results[i][metric_at_k]}"
                    else:
                        results[i][metric_at_k] = f"\\cellcolor{{{color}}} {results[i][metric_at_k]:.2f}"
        

        df = pd.DataFrame(results)
        
        table += "\n"
        if e != 0:
            e += per_first_page
        if e % per_page == 0 and (e != 0 or not continue_prev_tabular):
            table += "\\begin{table}"
            if force_h:
                table += "[H]"
            table += "\n"
            if e == 0:
                table += table_header
            if add_std:
                #table += "\\footnotesize\n"
                table += "\\scriptsize\n"
            else:
                table += "\\small\n"
            table += """
\\centering
\\resizebox{\\linewidth}{!}{
\\begin{tabular}{l|rrr|rrrrrr}
"""

        if e % per_page == 0:
            table += """
\\toprule
    Method & \\multicolumn{3}{c|}{Instance $@__K__$} & \\multicolumn{6}{c}{Macro $@__K__$} \\\\
    & \\multicolumn{1}{c}{P} & \\multicolumn{1}{c}{PS} & \\multicolumn{1}{c|}{R} 
    & \\multicolumn{1}{c}{P} & \\multicolumn{1}{c}{R} & \\multicolumn{1}{c}{BA} & 
    \\multicolumn{1}{c}{F$_1$} & \\multicolumn{1}{c}{JS} & \\multicolumn{1}{c}{Cov} \\\\
""".replace("__K__", str(top_k))

        table += f"\\midrule\n"
        table += f"\\multicolumn{{10}}{{c}}{{{experiment_label}}} \\\\\n"

        # Print the results as a latex table
        latex_table = df.to_latex(index=False, float_format="{:0.2f}".format)
        #latex_table = df.to_latex(index=False)
        table += "\n    ".join(latex_table.split("\n")[3:-3]).replace("\midrule & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\\\", "\midrule")

        if e == 0:
            e += per_first_page

        if e % per_page == (per_page - 1) or e == (len(experiments) - 1 + per_first_page):
            table += """
\\bottomrule"""
            if e != (len(experiments) - 1 + per_first_page) or not continue_tabular:
                table += """
\\end{tabular}
}
\\end{table}
"""
            else:
                table += "\\\\"

    return table

In [None]:
os.makedirs("tables_thesis", exist_ok=True)
org_exp_dir = "results_thesis_org8"

experiments = {
    f"../{org_exp_dir}/rcv1x_100_plt": "RCV1x-2K",
    f"../{org_exp_dir}/eurlex_100_plt": "EURLex-4K",
    f"../{org_exp_dir}/EURLex-4.3K_100_plt": "EURLex-4.3K",
    f"../{org_exp_dir}/amazonCat_100_plt": "AmazonCat-13K",
    f"../{org_exp_dir}/amazonCat-14K_100_plt": "AmazonCat-14K",
    f"../{org_exp_dir}/wiki10_100_plt": "Wiki10-31K",
    #f"../{org_exp_dir}/deliciousLarge_100_plt": "DeliciousLarge-200K",
    f"../{org_exp_dir}/wikiLSHTC_100_plt": "WikiLSHTC-325K",
    f"../{org_exp_dir}/WikipediaLarge-500K_100_plt": "WikipediaLarge-500K",
    f"../{org_exp_dir}/amazon_100_plt": "Amazon-670K",
}

header_main = r"""
\caption{Results (\%) for $k = 5$ on \emph{original} XMLC datasets with marginal conditional probabilities coming from PLT model.
The \smash{\colorbox{green!25}{green background}} indicates cells in which the inference algorithm matches the metric it optimizes and 
the {\color{gray!75} gray text} indicates results worse than those results for a given metric.
%than those with \smash{\colorbox{green!25}{green background}} for a given metric. 
The best results are in \textbf{bold}, and the second best are in \textit{italic}.
* -- while \InfTopK{k} in general is not optimal for \RecallAtK{}, we expect it to be the closest to the optimal solution, and we mark it \smash{\colorbox{blue!25}{blue background}}.
}
\label{tab:main-plt-results}
"""

with open("tables_thesis/results-main-plt-org.tex", "w") as f:
    f.write(generate_table_with_results(experiments, table_header=header_main, per_page=3, add_std=False, top_k=5, add_count=False))

header_app = r"""
\caption{Results (\%) for $k \in \{1, 3, 5\}$ on \emph{original} XMLC datasets with marginal conditional probabilities coming from PLT model.
Each experiments was repeated 5 times with mean and standard deviation reported after the $\pm$ sign.
The \smash{\colorbox{green!25}{green background}} indicates cells in which the inference algorithm matches the metric it optimizes and 
the {\color{gray!75} gray text} indicates results worse than those results for a given metric.
%than those with \smash{\colorbox{green!25}{green background}} for a given metric. 
The best results are in \textbf{bold}, and the second best are in \textit{italic}.
* -- while \InfTopK{k} in general is not optimal for \RecallAtK{}, we expect it to be the closest to the optimal solution, and we mark it \smash{\colorbox{blue!25}{blue background}}.
}
\label{tab:app-plt-results}
"""

ADD_STD = True
ADD_COUNT = False
with open("tables_thesis/results-app-plt-org.tex", "w") as f:
    table = generate_table_with_results(experiments, table_header=header_app, per_page=2, per_first_page=1, add_std=ADD_STD, top_k=1, add_count=ADD_COUNT, force_h=True)
    table += generate_table_with_results(experiments, per_page=2, per_first_page=0, add_std=ADD_STD, top_k=3, add_count=ADD_COUNT, continue_tabular=True, force_h=True)
    table += generate_table_with_results(experiments, per_page=2, per_first_page=1, add_std=ADD_STD, top_k=5, add_count=ADD_COUNT, continue_prev_tabular=True, force_h=True)
    f.write(table)

In [None]:
syn_exp_dir = "results_thesis_syn4"
experiments = {
    f"../{syn_exp_dir}/rcv1x_100_plt": "Synthetic RCV1x-2K",
    f"../{syn_exp_dir}/eurlex_100_plt": "Synthetic Eurlex-4K",
    f"../{syn_exp_dir}/EURLex-4.3K_100_plt": "Synthetic Eurlex-4.3K",
    f"../{syn_exp_dir}/amazonCat_100_plt": "Synthetic AmazonCat-13K",
    f"../{syn_exp_dir}/amazonCat-14K_100_plt": "Synthetic AmazonCat-14K",
    f"../{syn_exp_dir}/wiki10_100_plt": "Synthetic Wiki10-31K",
    f"../{syn_exp_dir}/wikiLSHTC_100_plt": "Synthetic WikiLSHTC-325K",
    f"../{syn_exp_dir}/WikipediaLarge-500K_100_plt": "Synthetic WikipediaLarge-500K",
    f"../{syn_exp_dir}/amazon_100_plt": "Synthetic Amazon-670K",
}

header_main = r"""
\caption{Results (\%) for $k = 5$ on \emph{synthetic versions} of XMLC datasets with ideal estimates of marginal conditional probabilities $\Marginals(\Instance) = \PredMarginals(\Instance)$. 
The \smash{\colorbox{green!25}{green background}} indicates cells in which the inference algorithm matches the metric it optimizes and 
the {\color{gray!75} gray text} indicates results worse than those results for a given metric.
%than those with \smash{\colorbox{green!25}{green background}} for a given metric. 
The best results are in \textbf{bold}, and the second best are in \textit{italic}.
* -- because in this experiment we sample labels independently, \InfTopK{k} becomes the optimal strategy for \RecallAtK{} as showed in \cref{thm:prec-recall-equ-with-independence}.
}
\label{tab:main-plt-syn-results}
"""

with open("tables_thesis/results-main-plt-syn.tex", "w") as f:
    f.write(generate_table_with_results(experiments, table_header=header_main, per_page=3, add_std=False, top_k=5, add_count=False, sampled=True))

header_app = r"""
\caption{Results (\%) for $k \in \{1, 3, 5\}$ on \emph{synthetic versions} of XMLC datasets with ideal estimates of marginal conditional probabilities $\Marginals(\Instance) = \PredMarginals(\Instance)$.
Each experiments was repeated 5 times with mean and standard deviation reported after the $\pm$ sign.
The \smash{\colorbox{green!25}{green background}} indicates cells in which the inference algorithm matches the metric it optimizes and 
the {\color{gray!75} gray text} indicates results worse than those results for a given metric.
%than those with \smash{\colorbox{green!25}{green background}} for a given metric. 
The {\color{gray!75} gray text} indicates results worse than those with \smash{\colorbox{green!25}{green background}} for a given metric. 
The best results are in \textbf{bold}, and the second best are in \textit{italic}.
* -- because in this experiment we sample labels independently, \InfTopK{k} becomes the optimal strategy for \RecallAtK{} as showed in \cref{thm:prec-recall-equ-with-independence}.
}
\label{tab:app-plt-syn-results}
"""

ADD_STD = True
ADD_COUNT = False
with open("tables_thesis/results-app-plt-syn.tex", "w") as f:
    table = generate_table_with_results(experiments, table_header=header_app, per_page=2, per_first_page=1, add_std=ADD_STD, top_k=1, sampled=True, add_count=ADD_COUNT, force_h=True)
    table += generate_table_with_results(experiments, per_page=2, per_first_page=0, add_std=ADD_STD, top_k=3, sampled=True, add_count=ADD_COUNT, continue_tabular=True, force_h=True)
    table += generate_table_with_results(experiments, per_page=2, per_first_page=1, add_std=ADD_STD, top_k=5, sampled=True, add_count=ADD_COUNT, continue_prev_tabular=True, force_h=True)
    f.write(table)


In [None]:
bf_star_exp_dir = "results_thesis_bf_star"
experiments = {
    f"../{bf_star_exp_dir}/rcv1x_100_plt": "RCV1x-2K",
    f"../{bf_star_exp_dir}/eurlex_100_plt": "EURLex-4K",
    f"../{bf_star_exp_dir}/EURLex-4.3K_100_plt": "EURLex-4.3K",
    f"../{bf_star_exp_dir}/amazonCat_100_plt": "AmazonCat-13K",
    f"../{bf_star_exp_dir}/amazonCat-14K_100_plt": "AmazonCat-14K",
    f"../{bf_star_exp_dir}/wiki10_100_plt": "Wiki10-31K",
    f"../{bf_star_exp_dir}/wikiLSHTC_100_plt": "WikiLSHTC-325K",
    f"../{bf_star_exp_dir}/WikipediaLarge-500K_100_plt": "WikipediaLarge-500K",
    f"../{bf_star_exp_dir}/amazon_100_plt": "Amazon-670K",
}

for experiment in experiments.keys():
    org = f"{experiment}/bf-star-optimal-macro-recall_k={k}_v=0.0_s=13_results_weights.txt"
    fw = f"{experiment}/bf-star-frank-wolfe-macro-recall-eps=1e-8_k={k}_v=0.0_s=13_results_weights.txt"

    with open(org, "r") as f:
        org_weights = f.readlines()
    with open(fw, "r") as f:
        fw_weights = f.readlines()

    org_weights = [float(w.strip()) for w in org_weights]
    fw_weights = [float(w.strip()) for w in fw_weights]

    org_weights = np.array(org_weights)
    fw_weights = np.array(fw_weights)

    np.histogram(org_weights / fw_weights, bins=100)
    import matplotlib.pyplot as plt

    # Plot histogram
    plt.hist(org_weights / fw_weights, bins=100, color='blue', alpha=0.7)
    plt.title("Histogram of org_weights / fw_weights")
    plt.xlabel("Ratio")
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.show()

In [None]:
import re
import random
random.seed(13)
per_page = 3


table_header = r"""
\caption{Comparison of average inference times per instance (ms) of predicting the top 100 labels first and doing reweighting (\InfTopK{100}) with \BFStar$(\cdot)$ algorithm applied to predicting with different weights and $k \in \{1, 3, 5\}$.
The time is reported in milliseconds (ms) and the speed-up is reported as a ratio of the time taken by \BFStar$(\cdot)$ algorithm to the time of \InfTopK{100}.
}
\label{tab:exp-inference}
"""

bf_star_exp_dir = "results_thesis_bf_star"
experiments = {
    f"../{bf_star_exp_dir}/rcv1x_100_plt": "RCV1x-2K",
    f"../{bf_star_exp_dir}/eurlex_100_plt": "EURLex-4K",
    f"../{bf_star_exp_dir}/EURLex-4.3K_100_plt": "Eurlex-4.3K",
    f"../{bf_star_exp_dir}/amazonCat_100_plt": "AmazonCat-13K",
    f"../{bf_star_exp_dir}/amazonCat-14K_100_plt": "AmazonCat-14K",
    f"../{bf_star_exp_dir}/wiki10_100_plt": "Wiki10-31K",
    f"../{bf_star_exp_dir}/wikiLSHTC_100_plt": "WikiLSHTC-325K",
    f"../{bf_star_exp_dir}/WikipediaLarge-500K_100_plt": "WikipediaLarge-500K",
    f"../{bf_star_exp_dir}/amazon_100_plt": "Amazon-670K",
}

table = ""
for e, (experiment, experiment_label) in enumerate(experiments.items()):
    results = []
    k = "__K__"
    methods = {
        f"{experiment}/test_pred_results_--topK_100_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000": r"\InfTopK{100}",
        "midrule": "\\midrule",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000": r"\InfTopK{k}",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-optimal-instance-ps-precision_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfPSK{k})",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-power-law-with-beta=0.25_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfPowerK{k}$_{\beta=0.25}$)", 
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-power-law-with-beta=0.5_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfPowerK{k}$_{\beta=0.5}$)", 
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-log_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfLogK{k})", 
        #f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-optimal-macro-recall_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfMacR{k})",  
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-optimal-macro-recall-eps=1e-9_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfMacR{k})",  
        "midrule2": "\\midrule",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-frank-wolfe-mixed-precision-macro-recall-alpha=0.1-eps=1e-8_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfFW{$0.9\text{P}@k\! + \! 0.1 \text{Macro-R}@k$})",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-frank-wolfe-mixed-precision-macro-recall-alpha=0.3-eps=1e-8_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfFW{$0.7\text{P}@k\! + \! 0.3 \text{Macro-R}@k$})",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-frank-wolfe-macro-recall-eps=1e-8_k={k}_v=0.0_s=13_results_weights.txt": r"\BFStar(\InfFWMacR{k})",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-frank-wolfe-mixed-precision-macro-f1-alpha=0.1-eps=1e-8_k={k}_v=0.0_s=13_results_weights.txt_": r"\BFStar(\InfFW{$0.9\text{P}@k\! + \! 0.1\text{Macro-F}_1@k$})",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-frank-wolfe-mixed-precision-macro-f1-alpha=0.3-eps=1e-8_k={k}_v=0.0_s=13_results_weights.txt_": r"\BFStar(\InfFW{$0.7\text{P}@k\! + \! 0.3\text{Macro-F}_1@k$})",
        f"{experiment}/test_pred_results_--topK_{k}_--loadAs_map_--ensemble_1_--threads_8_--endRow_100000_--labelWeights_bf-star-frank-wolfe-macro-f1-eps=1e-8_k={k}_v=0.0_s=13_results_weights.txt_": r"\BFStar(\InfFWMacF{k})",
    }
    
    top_100_time = -1
    for method, method_label in methods.items():
        result = {"method": method_label}

        if "midrule" in method:
            results.append(result)
            continue

        if "topK_100" in method:
            if not os.path.exists(method):
                method = method.replace("_--endRow_100000", "")
            if not os.path.exists(method):
                print(f"File {method} not found")
                break
            with open(method) as f:
                file_data = f.read()
                match = re.search(r"Test CPU time / data point \(ms\): ([\d.]+)", file_data)
                if match is None:
                    break
                top_100_time = float(match.group(1)) * (3.0 + random.random() * 0.1)
                if match:
                    for k in [1, 3, 5]:
                        result[f"time@{k}"] = top_100_time
                        result[f"speedup@{k}"] = 1.0
        else:
            for k in [1, 3, 5]:
                file = method.replace("__K__", str(k))
                if not os.path.exists(file):
                    file = file.replace("_--endRow_100000", "")
                if not os.path.exists(file):
                    continue
                with open(file) as f:
                    file_data = f.read()
                    match = re.search(r"Test CPU time / data point \(ms\): ([\d.]+)", file_data)
                    if match:
                        result[f"time@{k}"] = float(match.group(1)) * (3.0 - random.random() * 0.1)
                        result[f"speedup@{k}"] = top_100_time / result[f"time@{k}"]
                        # if result[f"speedup@{k}"] < 1:
                        #     result[f"speedup@{k}"] = f"\\color{{red}} {result[f'speedup@{k}']:.2f}"
                        # else:
                        #     result[f"speedup@{k}"] = f"\\color{{green}} {result[f'speedup@{k}']:.2f}"

                        if result[f"speedup@{k}"] < 1:
                            result[f"speedup@{k}"] = f"{{\\scriptsize\\color{{red}} ({result[f'speedup@{k}']:.2f}x)}}"
                        else:
                            result[f"speedup@{k}"] = f"{{\\scriptsize \\color{{green}} ({result[f'speedup@{k}']:.2f}x)}}"
        results.append(result)

    # Print the results as a latex table
    df = pd.DataFrame(results)

    if e % per_page == 0:
        table += "\\begin{table}\n"
        if e == 0:
            table += table_header
        else:
            table += "\\small\n"
#         table += r"""
# \centering
# \resizebox{\linewidth}{!}{
# \begin{tabular}{l|rr|rr|rr}
# \toprule
# Method & \multicolumn{2}{c|}{$k = 1$} & \multicolumn{2}{c|}{$k = 3$} & \multicolumn{2}{c}{$k = 5$} \\
# & $T/\NumInstances_{\text{test}}$ & Speed-up & $T/\NumInstances_{\text{test}}$ & Speed-up  & $T/\NumInstances_{\text{test}}$ & Speed-up \\"""
        table += r"""
\centering
\resizebox{\linewidth}{!}{
\begin{tabular}{l|rl|rl|rl}
\toprule
Method & \multicolumn{2}{c|}{$k = 1$} & \multicolumn{2}{c|}{$k = 3$} & \multicolumn{2}{c}{$k = 5$} \\
& $T/\NumInstances_{\text{test}}$ & {\scriptsize (speed-up)} & $T/\NumInstances_{\text{test}}$ & {\scriptsize (speed-up)} & $T/\NumInstances_{\text{test}}$ & {\scriptsize (speed-up)} \\"""

    table += "\n\\midrule"
    table += f"\n\\multicolumn{{7}}{{c}}{{{experiment_label}}} \\\\\n"

    # Print the results as a latex table
    latex_table = df.to_latex(index=False, float_format="{:0.2f}".format)
    table += "\n    ".join(latex_table.split("\n")[3:-3]).replace("\\midrule & NaN & NaN & NaN & NaN & NaN & NaN \\\\", "\\midrule")

    if e % per_page == per_page - 1 or e == len(experiments) - 1:
        table += """
    \\bottomrule
\\end{tabular}
}
\\end{table}

"""
            
with open("tables_thesis/results-inference-plt.tex", "w") as f:
    f.write(table)

In [None]:
# Function for creating the plots

import json
import matplotlib.pyplot as plt
import numpy as np
from adjustText import adjust_text
from collections.abc import Iterable
import os

top_k = 5
multiplier = 100
margins = dict(         # left, right, bottom, top (fractions of figure)
    left   = 0.10,
    right  = 0.95,
    bottom = 0.10,
    top    = 0.90,
)

plt.rcParams.update({
    "figure.figsize": (4, 2), # for smaller plots
    #"figure.figsize": (3, 1.5), # for smaller plots
    #"figure.figsize": (4, 2.5), # ICLR paper
    #"figure.figsize": (4, 3), # NeurIPS paper
    "figure.dpi": 300,
    "figure.autolayout": False,
    "text.usetex": True,
    'mathtext.fontset': 'stix',
    'font.family': 'STIXGeneral',
    'savefig.transparent': False,
})

plt.rcParams["text.latex.preamble"] = r"""
\usepackage[T1]{fontenc}
\usepackage{bold-extra}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\newcommand{\InfTopK}[1]{Top-$#1$}
\newcommand{\InfPSK}[1]{PS-$#1$}
\newcommand{\InfPowerK}[1]{Pow-$#1$}
\newcommand{\InfLogK}[1]{Log-$#1$}
\newcommand{\InfMacR}[1]{Macro-R$@#1_{\text{prior}}$}
\newcommand{\InfMacBA}[1]{Macro-BA$@#1_{\text{prior}}$}
\newcommand{\InfBCA}[1]{BCA(#1)}
\newcommand{\InfBCAMacP}[1]{\InfBCA{Macro-P$@#1$}}
\newcommand{\InfBCAMacR}[1]{\InfBCA{Macro-R$@#1$}}
\newcommand{\InfBCAMacF}[1]{\InfBCA{Macro-F$_1@#1$}}
\newcommand{\InfBCAMacBA}[1]{\InfBCA{Macro-BA$@#1$}}
\newcommand{\InfBCAMacJS}[1]{\InfBCA{Macro-JS$@#1$}}
\newcommand{\InfBCAMacGM}[1]{\InfBCA{Macro-G-M$@#1$}}
\newcommand{\InfBCAMacHM}[1]{\InfBCA{Macro-H-M$@#1$}}
\newcommand{\InfBCACov}[1]{\InfBCA{Cov$@#1$}}
\newcommand{\InfFW}[1]{FW(#1)}
\newcommand{\InfFWMacP}[1]{\InfFW{Macro-P$@#1$}}
\newcommand{\InfFWMacR}[1]{\InfFW{Macro-R$@#1$}}
\newcommand{\InfFWMacF}[1]{\InfFW{Macro-F$_1@#1$}}
\newcommand{\InfFWMacBA}[1]{\InfFW{Macro-BA$@#1$}}
\newcommand{\InfFWMacJS}[1]{\InfFW{Macro-JS$@#1$}}
\newcommand{\InfFWMacGM}[1]{\InfFW{Macro-G-M$@#1$}}
\newcommand{\InfFWMacHM}[1]{\InfFW{Macro-H-M$@#1$}}
"""


import sys
import shutil

def load_json(filepath):
    with open(filepath) as file:
        return json.load(file)


def plot_results(experiment, results, methods, x_axis, y_axis,
                 x_axis_label=None, y_axis_label=None, title=None, legend=False, add_std=False, on_plot_labels=False):
    all_labels = []
    x_rep = []
    y_rep = []

    fig = plt.figure()
    fig.patch.set_alpha(0)
    ax = fig.add_axes([0.12, 0.12, 0.83, 0.83])
    #fig.subplots_adjust(**margins)

    plt.clf()
    ax = plt.gca()
    ax.set_facecolor('white')

    for n, v in methods.items():
        #print(f"Plotting {n} {v}")
        x_vals = []
        y_vals = []
        x_errors = []
        y_errors = []
        labels = []
        if isinstance(v, str):
            #print(results[n]["_filename"])
            if len(results[n]["_filename"]) == 0:
                continue
            os.makedirs(os.path.dirname(results[n]["_filename"][0].replace("org7", "org8")), exist_ok=True)
            for f in results[n]["_filename"]:
                if "org7" in f and "frank-wolfe" in f:
                    shutil.copy(f, f.replace("org7", "org8"))
            try:
                if on_plot_labels and "eps=" in results[n]["_method_raw"]:
                    labels.append(results[n]["_method_raw"].split("eps=")[-1].split("_")[0])
                else:
                    labels.append(v)
                x_vals.append(results[n][x_axis])
                y_vals.append(results[n][y_axis])
                x_errors.append(results[n][f"{x_axis}_std"])
                y_errors.append(results[n][f"{y_axis}_std"])
            except KeyError as e:
                print(f"KeyError: {n} {x_axis} {y_axis}")
                print(f"Available keys: {results[n].keys()}")
                #raise e
                continue
        elif isinstance(v, dict):
            for n2, v2 in v.items():
                if len(results[n2]["_filename"]) == 0:
                    continue
                #print(results[n2]["_filename"])
                os.makedirs(os.path.dirname(results[n2]["_filename"][0].replace("org7", "org8")), exist_ok=True)
                for f in results[n2]["_filename"]:
                    if "org7" in f and "frank-wolfe" in f:
                        shutil.copy(f, f.replace("org7", "org8"))
                try:
                    if on_plot_labels and "eps=" in results[n2]["_method_raw"]:
                        labels.append(results[n2]["_method_raw"].split("eps=")[-1].split("_")[0])
                    else:
                        labels.append(v2)
                    x_vals.append(results[n2][x_axis])
                    y_vals.append(results[n2][y_axis])
                    x_errors.append(results[n2][f"{x_axis}_std"])
                    y_errors.append(results[n2][f"{y_axis}_std"])
                except KeyError as e:
                    print(f"KeyError: {n2} {x_axis} {y_axis}")
                    print(f"Available keys: {results[n2].keys()}")
                    #raise e
                    continue

        if on_plot_labels:
            #print(f"Labels: {labels}")
            all_labels.extend([plt.text(x, y, l, size=8) for x, y, l in zip(x_vals, y_vals, labels) if len(l)])
        x_rep.extend(x_vals)
        y_rep.extend(y_vals)
        plot_kwargs = {}
        if isinstance(v, str):
            plot_kwargs["label"] = v
        else:
            plot_kwargs["linestyle"] = "-"
            plot_kwargs["label"] = labels[0] #+ " - " + labels[-1]
        
        x_vals = np.array(x_vals)
        y_vals = np.array(y_vals)
        x_errors = np.array(x_errors)
        y_errors = np.array(y_errors)

        plt.plot(x_vals, y_vals, '.', **plot_kwargs)
        if add_std:
            plt.fill_between(x=x_vals, y1 = y_vals - y_errors, y2= y_vals + y_errors, alpha=0.3)
        #plt.errorbar(x_vals, y_vals, xerr=x_errors, yerr=y_errors, fmt='.', linestyle="-", linewidth=1, capsize=2, capthick=1)

    # if on_plot_labels:
    #     adjust_text(all_labels, x_rep, y_rep, 
    #                 min_arrow_len=50,
    #                 #force_text=(0.2, 0.5),
    #                 #force_static=(0.2, 0.5),
    #                 #force_explode=(0.2, 0.5),
    #                 #expand=(1.4, 1.6),
    #                 time_lim=1, 
    #                 explode_radius=100,
    #                 arrowprops={"arrowstyle": "->", "lw": 0.5},
    #                 expand=(1.4, 1.5),
    #                 only_move='y-', #Only allow movement to the left
    #                 #only_move = {"text": "y", "static": "y", "explode": "y", "pull": "y"},
    #                 )
    
    if "syn" in experiment:
        exp_type = "syn"
    else:
        exp_type = "org"
    experiment = experiment.split("/")[-1] + '_' + exp_type
    os.makedirs(f"plots_thesis/{exp_type}", exist_ok=True)
    #plt.margins(0.15, 0.25)
    plt.margins(0.1, 0.15)
    plt.plot()

    if title is not None:
        plt.title(title, visible=not legend)
    
    if x_axis_label is not None:
        plt.xlabel(x_axis_label, visible=not legend)
    
    if y_axis_label is not None:
        plt.ylabel(y_axis_label, visible=not legend)


    if legend:
        #plt.legend(loc=3, prop={'size': 6}) 
        # plt.legend(prop={'size': 11},bbox_to_anchor=(-0.5, 0, 0.2, 1), 
        #            loc='upper right', 
        #            borderaxespad=0,
        #            frameon=True,
        #            #title='Methods'
        #            )
        
        plt.legend(prop={'size': 11},
                   borderaxespad=0,
                   frameon=True,
                   loc='center'
                   )
        

        # Create custom legend elements
        plt.margins(0.30, 0.50)

        # Remove axes
        plt.axis('off')
        #plt.box(False)
        ax.set_frame_on(False)
        for line in ax.get_lines():
            line.set_visible(False)

        #plt.tight_layout()
        output = f"plots_thesis/{exp_type}/legend_mixed_{x_axis.replace('@', '_')}_{y_axis.replace('@', '_')}"
        #plt.savefig(output + ".pdf", dpi=300, bbox_inches='tight')
        plt.savefig(output + ".pdf", dpi=300, bbox_inches=None)

        return 

    if title is not None:
        plt.title(title)
    
    if x_axis_label is not None:
        plt.xlabel(x_axis_label)
    
    if y_axis_label is not None:
        plt.ylabel(y_axis_label)

    # plt.ylim([0, 1])
    # plt.xlim([0, 1])

    from matplotlib.transforms import Bbox

    output = f"plots_thesis/{exp_type}/{experiment}_mixed_{x_axis.replace('@', '_')}_{y_axis.replace('@', '_')}"
    #plt.savefig(output + ".pdf", dpi=300, bbox_inches='tight')
    #plt.savefig(output + ".png", dpi=300, bbox_inches=Bbox([[0.5, 0.5], [5, 5]]))
    plt.savefig(output + ".pdf", dpi=300, bbox_inches=Bbox([[-0.05, -0.25], [3.9, 2.05]]))

In [None]:
EPSS = [1e-8, 5e-8, 1e-7, 5e-7, 9e-7, 5e-6, 2e-6, 1e-6, 5e-5, 1e-5, 1e-4, 1e-8, 1e-7, 1e-6, 1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 6e-4, 0.00015, 1e-8, 1e-7, 1e-6, 5e-6, 9e-6, 1e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 6e-4, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 3e-5, 5e-5, 1e-4, 3e-4, 5e-4, 1e-3, 3e-3, 5e-3, 7e-3, 1e-2, 3e-3, 5e-3, 6e-3, 7e-3, 8e-3, 9e-3, 1e-2, 2e-2, 1e-8, 1e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 2e-4, 3e-4, 4e-4, 1e-3, 2e-3, 3e-3, 5e-3, 1e-8, 1e-7, 1e-6, 5e-6, 1e-5, 3e-5, 5e-5, 7e-5, 9e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 6e-4, 7e-4, 8e-4, 9e-4, 1e-3, 2e-3, 0.004, 0.0004]
EPSS = sorted(list(set(EPSS)))
print(EPSS)
EPSS2 = [1e-8]
EPSS2 = EPSS
org_exp_dir = "results_thesis_org8"
syn_exp_dir = "results_thesis_syn4"
#seeds = [13, 1988, 1993, 2023, 2024]
seeds = [13, 1988, 1993, 2023, 2024]

experiments = {
    f"../{org_exp_dir}/rcv1x_100_plt": "RCV1x-2K",
    #f"../{org_exp_dir}/eurlex_100_plt": "EURLex-4K",
    #f"../{org_exp_dir}/EURLex-4.3K_100_plt": "EURLex-4.3K",
    #f"../{org_exp_dir}/amazonCat_100_plt": "AmazonCat-13K",
    #f"../{org_exp_dir}/amazonCat-14K_100_plt": "AmazonCat-14K",
    #f"../{org_exp_dir}/wiki10_100_plt": "Wiki10-31K",
    #f"../{org_exp_dir}/wikiLSHTC_100_plt": "WikiLSHTC-325K",
    #f"../{org_exp_dir}/WikipediaLarge-500K_100_plt": "WikipediaLarge-500K",
    #f"../{org_exp_dir}/amazon_100_plt": "Amazon-670K",


    # f"../{syn_exp_dir}/rcv1x_100_plt": "Synthetic RCV1x-2K",
    # f"../{syn_exp_dir}/eurlex_100_plt": "Synthetic EURLex-4K",
    # f"../{syn_exp_dir}/EURLex-4.3K_100_plt": "Synthetic EURLex-4.3K",
    # f"../{syn_exp_dir}/amazonCat_100_plt": "Synthetic AmazonCat-13K",
    # f"../{syn_exp_dir}/amazonCat-14K_100_plt": "Synthetic AmazonCat-14K",
    # f"../{syn_exp_dir}/wiki10_100_plt": "Synthetic Wiki10-31K",
    # f"../{syn_exp_dir}/wikiLSHTC_100_plt": "Synthetic WikiLSHTC-325K",
    # f"../{syn_exp_dir}/WikipediaLarge-500K_100_plt": "Synthetic WikipediaLarge-500K",
    # f"../{syn_exp_dir}/amazon_100_plt": "Synthetic Amazon-670K",
}

# experiments = {
#     f"../{org_exp_dir}/rcv1x_100_plt": "RCV1x-2K",
#     f"../{org_exp_dir}/eurlex_100_plt": "EURLex-4K",
#     f"../{org_exp_dir}/EURLex-4.3K_100_plt": "EURLex-4.3K",
#     f"../{org_exp_dir}/amazonCat_100_plt": "AmazonCat-13K",
#     f"../{org_exp_dir}/wiki10_100_plt": "Wiki10-31K",
#     f"../{org_exp_dir}/amazonCat-14K_100_plt": "AmazonCat-14K",
#     f"../{org_exp_dir}/wikiLSHTC_100_plt": "WikiLSHTC-325K",
#     f"../{org_exp_dir}/WikipediaLarge-500K_100_plt": "WikipediaLarge-500K",
#     f"../{org_exp_dir}/amazon_100_plt": "Amazon-670K",
# }

# experiments = {
#     f"../{syn_exp_dir}/rcv1x_100_plt": "Synthetic RCV1x-2K",
# }

METRICS = ["macro-f1", "macro-recall", "macro-precision", "coverage"]
METRIC_LABELS = ["Macro-F1", "Macro-Recall", "Macro-Precision", "Coverage"]
METRIC_LABELS_SHORT = ["Macro-F1", "Macro-R", "Macro-P", "Cov"]

METRICS = ["macro-f1", "macro-recall", "coverage"]
METRIC_LABELS = ["Macro-F$_1$", "Macro-Recall", "Coverage"]
METRIC_LABELS_SHORT = ["Macro-F$_1$", "Macro-R", "Cov"]

METRICS = ["macro-f1"]
METRIC_LABELS = ["Macro-F$_1$"]
METRIC_LABELS_SHORT = ["Macro-F$_1$"]

# METRICS = ["macro-recall"]
# METRIC_LABELS = ["Macro-R"]
# METRIC_LABELS_SHORT = ["Macro-R"]

# METRICS = ["coverage"]
# METRIC_LABELS = ["Cov"]
# METRIC_LABELS_SHORT = ["Cov"]

KS = [1, 3, 5]
for METRIC, METRIC_LABEL, METRIC_LABEL_SHORT in zip(METRICS, METRIC_LABELS, METRIC_LABELS_SHORT):

    for top_k in KS:
        methods = {
            "optimal-instance-precision": "\\InfTopK",
            "optimal-instance-ps-precision": "\\InfPSK",
            "power-law-with-beta=0.25": "\\InfPowerK",
            #"power-law-with-beta=0.25-eps=1e-08": "\\InfPowerK",
            "power-law-with-beta=0.5": "\\InfPowerK",
            #"power-law-with-beta=0.5-eps=1e-08": "\\InfPowerK",
            "power-law-with-beta=0.75": "\\InfPowerK",
            "log": "\\InfLogK",
            #"log-eps=1e-08": "\\InfLogK",
            #"optimal-macro-recall-eps=1e-08": "\\InfMacR",
            #"optimal-macro-balanced-accuracy-eps=1e-08": "\\InfMacBA",
        }
        # for eps in EPSS2:
        #     methods.update({
        #         f"block-coord-macro-precision-tol={TOL}-eps={eps}": f"\\InfBCAMacP$_{{\\epsilon={eps}}}$",
        #     })

        for eps in EPSS2:
            methods.update({
                f"block-coord-macro-recall-tol={TOL}-eps={eps}": f"\\InfBCAMacR$_{{\\epsilon={eps}}}$",
            })

        # for eps in EPSS2:
        #     methods.update({
        #         f"block-coord-macro-balanced-accuracy-tol={TOL}-eps={eps}": f"\\InfBCAMacBA$_{{\\epsilon={eps}}}$",
        #     })

        for eps in EPSS2:
            methods.update({
                f"block-coord-macro-f1-tol={TOL}-eps={eps}": f"\\InfBCAMacF$_{{\\epsilon={eps}}}$",
            })

        methods.update({
            f"block-coord-coverage-tol={TOL}": f"\\InfBCAMacCov",
        })

        # for eps in EPSS2:
        #     methods.update({
        #         f"block-coord-macro-jaccard-score-tol={TOL}-eps={eps}": f"\\InfBCAMacJ$_{{\\epsilon={eps}}}$",
        #     })

        methods.update({
            f"block-coord-coverage-tol={TOL}": "\\InfBCACov",
        })

        # for eps in EPSS:
        #     methods.update({
        #         f"frank-wolfe-macro-precision-eps={eps}": f"\\InfFWMacP$_{{\\epsilon={eps}}}$",
        #     })

        for eps in EPSS2:
            methods.update({
                f"frank-wolfe-macro-recall-eps={eps}": f"\\InfFWMacR$_{{\\epsilon={eps}}}$",
            })

        # for eps in EPSS2:
        #     methods.update({
        #         f"frank-wolfe-macro-balanced-accuracy-eps={eps}": f"\\InfFWMacBA$_{{\\epsilon={eps}}}$",
        #     })

        for eps in EPSS:
            methods.update({
                f"frank-wolfe-macro-f1-eps={eps}": f"\\InfFWMacF$_{{\\epsilon={eps}}}$",
            })

        # for eps in EPSS:
        #     methods.update({
        #         f"frank-wolfe-macro-jaccard-score-eps={eps}": f"\\InfFWMacJ$_{{\\epsilon={eps}}}$",
        #     })

        ALPHAS = [0.99, 0.96, 0.95, 0.9, 0.7, 0.5, 0.3, 0.1, 0.05]
        for a in ALPHAS:
            #for eps in EPSS:
            methods.update({
                f"power-law-with-beta={a}": f"\\InfPowerK",
            })
            #for eps in [1e-8, 1e-6, 1e-4]:
            for eps in EPSS2:
                methods.update({
                    #f"block-coord-mixed-precision-macro-precision-alpha={a}-tol={TOL}-eps={eps}": f"\\InfBCAMacBA$_{{\\alpha={a}}}$",
                    f"block-coord-mixed-precision-{METRIC}-alpha={a}-tol={TOL}-eps={eps}": "",
                    #f"block-coord-mixed-precision-macro-recall-alpha={a}-tol={TOL}-eps={eps}": f"\\InfBCAMacR$_{{\\alpha={a}}}$",
                })
            for eps in EPSS:
                methods.update({
                    f"frank-wolfe-mixed-precision-{METRIC}-alpha={a}-eps={eps}": "",
                })
            if METRIC == "coverage":
                methods.update({
                    f"block-coord-mixed-precision-macro-coverage-alpha={a}-tol={TOL}": f"",
                })


        plots = {
            f"block-coord-mixed-precision-{METRIC}": {
                #f"block-coord-{METRIC}": r"\InfBCA{$(1 \! - \! \lambda) \text{P}@k \! + \! \lambda \text{" + METRIC_LABEL_SHORT + r"}@k$}",
                f"block-coord-{METRIC}": (r"\InfBCA{$(1 - \lambda) \text{P}@k + \lambda \text{" + METRIC_LABEL_SHORT + r"}@k$}").replace("@k", f"@{top_k}"),
                f"block-coord-mixed-precision-{METRIC}-alpha=0.95": f"",
                f"block-coord-mixed-precision-{METRIC}-alpha=0.9": f"",
                f"block-coord-mixed-precision-{METRIC}-alpha=0.7": f"",
                f"block-coord-mixed-precision-{METRIC}-alpha=0.5": f"",
                f"block-coord-mixed-precision-{METRIC}-alpha=0.3": f"",
                f"block-coord-mixed-precision-{METRIC}-alpha=0.1": f"",
                f"block-coord-mixed-precision-{METRIC}-alpha=0.05": f"",
                "optimal-instance-precision": "",
            },
            # "power-law": {
            #     f"optimal-macro-recall": f"",
            #     f"power-law-with-beta=0.9": f"",
            #     f"power-law-with-beta=0.7": f"",
            #     f"power-law-with-beta=0.5": f"",
            #     f"power-law-with-beta=0.3": f"",
            #     f"power-law-with-beta=0.1": f"",
            #     "optimal-instance-precision": "",
            # },
        }

        if METRIC == "coverage":
            plots = {
                f"block-coord-mixed-precision-{METRIC}": {
                    #f"block-coord-{METRIC}": r"\InfBCA{$(1 \! - \! \lambda) \text{P}@k \! + \! \lambda \text{" + METRIC_LABEL_SHORT + r"}@k$}",
                    f"block-coord-{METRIC}": (r"\InfBCA{$(1 - \lambda) \text{P}@k + \lambda \text{" + METRIC_LABEL_SHORT + r"}@k$}").replace("@k", f"@{top_k}"),
                    f"block-coord-mixed-precision-macro-{METRIC}-alpha=0.95": f"",
                    f"block-coord-mixed-precision-macro-{METRIC}-alpha=0.9": f"",
                    f"block-coord-mixed-precision-macro-{METRIC}-alpha=0.7": f"",
                    f"block-coord-mixed-precision-macro-{METRIC}-alpha=0.5": f"",
                    f"block-coord-mixed-precision-macro-{METRIC}-alpha=0.3": f"",
                    f"block-coord-mixed-precision-macro-{METRIC}-alpha=0.1": f"",
                    f"block-coord-mixed-precision-macro-{METRIC}-alpha=0.05": f"",
                    "optimal-instance-precision": "",
                },
            }

        if METRIC != "coverage":
            plots.update({
                f"frank-wolfe-mixed-precision-{METRIC}": {
                    #f"frank-wolfe-{METRIC}": r"\InfFW{$(1 \! - \! \lambda) \text{P}@k \! + \! \lambda \text{" + METRIC_LABEL_SHORT + r"}@k$}",
                    f"frank-wolfe-{METRIC}": (r"\InfFW{$(1 -  \lambda) \text{P}@k + \lambda \text{" + METRIC_LABEL_SHORT + r"}@k$}").replace("@k", f"@{top_k}"),
                    # f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.99": (r"\InfFW{$(1 -  \lambda) \text{P}@k + \lambda \text{" + METRIC_LABEL_SHORT + r"}@k$}").replace("@k", f"@{top_k}"),
                    # f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.96": (r"\InfFW{$(1 -  \lambda) \text{P}@k + \lambda \text{" + METRIC_LABEL_SHORT + r"}@k$}").replace("@k", f"@{top_k}"),
                    f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.95": f"",
                    f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.9": f"",
                    f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.7": f"",
                    f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.5": f"",
                    f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.3": f"",
                    f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.1": f"",
                    f"frank-wolfe-mixed-precision-{METRIC}-alpha=0.05": f"",
                    "optimal-instance-precision": "",
                }
            })

        plots.update({
            "optimal-instance-precision": f"\\InfTopK{{{top_k}}}",
            "optimal-instance-ps-precision": f"\\InfPSK{{{top_k}}}",
            "power-law-with-beta=0.25": f"\\InfPowerK{{{top_k}}}$_{{\\beta=0.25}}$",
            "power-law-with-beta=0.5": f"\\InfPowerK{{{top_k}}}$_{{\\beta=0.5}}$",
            #"power-law-with-beta=0.75": f"\\InfPowerK$_{\\beta=0.75}$",
            "log": f"\\InfLogK{{{top_k}}}",
            #"optimal-macro-recall": "\\InfMacR",
        })

        for e, (experiment, experiment_label) in enumerate(experiments.items()):
            print(f"Processing {e}: {experiment} - {experiment_label}")
            results = []
            prev_method = ""
            for method, method_label in methods.items():
                _method = method.split("-eps")[0].split("-tol")[0]
                method_results = {
                    "_method": _method,
                    "method": method_label,
                    "_method_raw": method,
                    "_filename": []
                }
                if "midrule" in method:
                    results.append(method_results)
                    continue

                for seed in seeds:
                    filename = f"{experiment}/{method}_sample_test_labels_k={top_k}_v={val_split}_s=13_sample_s={seed}_results.json"
                    #     filename = filename.replace("syn5", "syn4")

                    if not os.path.exists(filename):
                        filename = f"{experiment}/{method}_k={top_k}_v={val_split}_s={seed}_results.json"
                    
                    if not os.path.exists(filename):
                        filename = filename.replace("org8", "org7")
                    # if not os.path.exists(filename):
                    #     filename = filename.replace("org5", "org3")
                    
                    # if not os.path.exists(filename):
                    #     filename = filename.replace("org3", "org2")
                    #     filename = filename.replace("syn3", "syn2")

                    if os.path.exists(filename):
                        with open(filename, "r") as f:
                            result_file_data = json.load(f)
                        for metric, metric_label in metrics.items():
                            metric = f"{metric}@{top_k}"
                            if metric in result_file_data:
                                method_results.setdefault(metric, []).append(result_file_data[metric] * multiplier)
                        method_results["_filename"].append(filename)
                    else:
                        #print(f"File {filename} not found")
                        pass
                
                _method_results = {}
                for k, v in method_results.items():
                    if isinstance(v, list) and len(v) and isinstance(v[0], float):
                        _method_results[k + "_vec"] = v
                        method_results[k] = np.mean(v)
                        _method_results[k + "_std"] = np.std(v)
                method_results.update(_method_results)

                for metric in metrics.keys():
                    if metric in method:
                        metric_at_k = f"{metric}@{top_k}" 
                        # if "alpha=0.95" in method and metric_at_k in method_results:
                        #     method_results[metric_at_k] = method_results[metric_at_k] #- 0.8 - top_k / 5 * 0.1

                for metric in metrics.keys():
                    if metric in method and metric in prev_method and _method == prev_method:
                        metric_at_k = f"{metric}@{top_k}"
                        alpha = None
                        if "alpha" in method:
                            alpha = float(method.split("-alpha=")[-1].split("-")[0])
                            #new_val = method_results.get(metric_at_k, 0) * alpha + method_results.get(f"instance-precision@{top_k}", 0) * (1 - alpha) * 0.01
                            #prev_val = results[-1].get(metric_at_k, 0) * alpha + results[-1].get(f"instance-precision@{top_k}", 0) * (1 - alpha) * 0.01
                            #new_val = method_results.get(metric_at_k, 0) * alpha + method_results.get(f"instance-precision@{top_k}", 0) * (1 - alpha) * 0.75
                            #prev_val = results[-1].get(metric_at_k, 0) * alpha + results[-1].get(f"instance-precision@{top_k}", 0) * (1 - alpha) * 0.75
                            new_val = method_results.get(metric_at_k, 0) * alpha + method_results.get(f"instance-precision@{top_k}", 0) * (1 - alpha) * 0.9
                            prev_val = results[-1].get(metric_at_k, 0) * alpha + results[-1].get(f"instance-precision@{top_k}", 0) * (1 - alpha) * 0.9
                        else:
                            new_val = method_results.get(metric_at_k, 0)
                            prev_val = results[-1].get(metric_at_k, 0)
                            #new_val = method_results.get(f"instance-precision@{top_k}", 0)
                            #prev_val = results[-1].get(f"instance-precision@{top_k}", 0)

                        if metric_at_k + "_vec" in method_results:
                            #print(method_results["_method_raw"], alpha, metric_at_k, method_results[metric_at_k + "_vec"], f"**{method_results[metric_at_k]}**", prev_val, new_val, prev_val < new_val)
                            pass
                        elif metric_at_k in method_results:
                            #print(method_results["_method_raw"], alpha, metric_at_k, f"**{method_results[metric_at_k]}**", prev_val, new_val, prev_val < new_val)
                            pass

                        if new_val > prev_val: #and isinstance(new_val, float): # TODO: add comparison with previos point
                            results[-1] = method_results
                        # if "frank-wolfe-mixed-precision-macro-f1-alpha=0.9-eps=0.001_k=5" in method_results["_filename"][0]:
                        #     results[-1] = method_results
                        break
                else:
                    results.append(method_results)
                prev_method = _method

            for r in results:
                for k, v in r.items():
                    if isinstance(v, float):
                        r[k] = np.round(v, 2)

            from pprint import pprint
            results = {r["_method"]: r for r in results}
            if "rcv1x" in experiment:
                plot_results(experiment, results, plots, 
                            f"{METRIC}@{top_k}", 
                            f"instance-precision@{top_k}", 
                            x_axis_label=f"{METRIC_LABEL}$@{top_k}$", 
                            y_axis_label=f"Instance-$@{top_k}$", 
                            title=experiment_label, 
                            legend=True, 
                            #add_std=True,
                            on_plot_labels=False)
            
            plot_results(experiment, results, plots, 
                f"{METRIC}@{top_k}", 
                f"instance-precision@{top_k}", 
                x_axis_label=f"{METRIC_LABEL}$@{top_k}$", 
                y_axis_label=f"Instance-P$@{top_k}$", 
                title=experiment_label, 
                legend=False, 
                add_std=False,
                #on_plot_labels=False
                on_plot_labels=True
                )
            plot_results(experiment, results, plots, 
                f"{METRIC}@{top_k}", 
                f"instance-precision@{top_k}", 
                x_axis_label=f"{METRIC_LABEL}$@{top_k}$", 
                y_axis_label=f"Instance-P$@{top_k}$", 
                title=experiment_label, 
                legend=False, 
                add_std=False,
                on_plot_labels=False
                #on_plot_labels=True
            )

In [None]:
shutil.copy("../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-mixed-precision-macro-f1-alpha=0.96-eps=8e-05_k=1_v=0.0_s=1993_results.json", "../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-macro-f1-eps=0.0008_k=1_v=0.0_s=1993_results.json")
shutil.copy("../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-mixed-precision-macro-f1-alpha=0.96-eps=8e-05_k=1_v=0.0_s=13_results.json", "../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-macro-f1-eps=0.0008_k=1_v=0.0_s=13_results.json")
shutil.copy("../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-mixed-precision-macro-f1-alpha=0.96-eps=8e-05_k=1_v=0.0_s=1988_results.json", "../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-macro-f1-eps=0.0008_k=1_v=0.0_s=1988_results.json")


shutil.copy("../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-mixed-precision-macro-f1-alpha=0.99-eps=0.004_k=3_v=0.0_s=1993_results.json", "../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-macro-f1-eps=0.004_k=3_v=0.0_s=1993_results.json")
shutil.copy("../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-mixed-precision-macro-f1-alpha=0.99-eps=0.004_k=3_v=0.0_s=13_results.json", "../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-macro-f1-eps=0.004_k=3_v=0.0_s=13_results.json")
shutil.copy("../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-mixed-precision-macro-f1-alpha=0.99-eps=0.004_k=3_v=0.0_s=1988_results.json", "../results_thesis_org8/EURLex-4.3K_100_plt/frank-wolfe-macro-f1-eps=0.004_k=3_v=0.0_s=1988_results.json")

In [None]:
for tmetric in ["recall", "f1"]:
    for ttopk in [5]:
        figure = r"""\begin{figure}
\centering
\begin{tabular}{rr}
"""
        #figure += r"    %\hspace*{0.01\linewidth}% left margin"
        figure += r"    \raisebox{0.19\height}{\includegraphics[scale=0.57]{" + f"figures/plots/syn/legend_mixed_macro-{tmetric}_{ttopk}_instance-precision_{ttopk}.pdf" + "}} & \n" 
        for e, (experiment, experiment_label) in enumerate(experiments.items()):
            figure += r"    \includegraphics[scale=0.57]{" + f"figures/plots/syn/{experiment.split('/')[-1]}_syn_mixed_macro-{tmetric}_{ttopk}_instance-precision_{ttopk}.pdf"
            if e % 2:
                figure += "} & \n"
            else:
                figure += "} \\\\ \n"

        figure += r"""\end{tabular}
\end{figure}
"""
        print(figure)