# Plotting the distance performance metric over multiple values of K

This notebook contains the code to visualise a computed posterior by `results.py`. The `filename` variable has to be changed to the name of the file containing the results.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

filename = 'path/to/posterior.csv'
aggregation = pd.read_csv(filename)

pta_sigmas = [0.03, 0.05, 0.1]
metrics = ["l1", "linf", "lninf" ]
param_dropouts = aggregation["param_dropout"].unique()

param_drops_colors = ['#DDAA33', '#004488', '#000000']
ks = aggregation["K"].unique()

for pta_sigma in pta_sigmas:
    fig, ax = plt.subplots(nrows=1, ncols=len(metrics), figsize=(len(metrics)*3, 2.6))
    for i, metric in enumerate(metrics):
        
        # Add model performance lines
        for j, param_drop in enumerate(param_dropouts):
            df = aggregation[(aggregation["param_dropout"] == param_drop) & (aggregation["p_theta_alpha_sigma"] == pta_sigma)]
            means = [df[df["K"] == k][metric].mean() for k in ks]
            stds = [df[df["K"] == k][metric].std() for k in ks]
            sns.lineplot(x=ks, y=means, color=param_drops_colors[j], label=param_drop, legend=False, ax=ax[i])
            ax[i].fill_between(ks, [m - s for m, s in zip(means, stds)], [m + s for m, s in zip(means, stds)],
                            color=param_drops_colors[j], alpha=0.2)
            ax[i].set_xscale('log')

        # Add random baseline
        random_df = aggregation[(aggregation["p_theta_alpha_sigma"] == pta_sigma)]
        random = [df[df["K"] == k][f"{metric}-random"].mean() for k in ks]
        random_std = [df[df["K"] == k][f"{metric}-random"].std() for k in ks]
        sns.lineplot(x=ks, y=random, color="black", label="random", legend=False, ax=ax[i], linestyle="--")

        # Add best performance line
        best_df = aggregation[(aggregation["p_theta_alpha_sigma"] == pta_sigma)]
        best = [df[df["K"] == k][f"{metric}-best"].mean() for k in ks]
        best_std = [df[df["K"] == k][f"{metric}-best"].std() for k in ks]
        sns.lineplot(x=ks, y=best, color="black", label="best", legend=False, ax=ax[i])

    ax[-1].legend(bbox_to_anchor=(1.02, 0.7), loc='upper left', frameon=False)

    plt.tight_layout()

