In [None]:
import os

import matplotlib.pyplot as plt
from nessai_models import MixtureOfDistributions
import numpy as np
import seaborn as sns
from scipy import stats
from thesis_utils.io import load_hdf5
from thesis_utils.plotting import set_plotting, save_figure, get_default_figsize, pp_plot
from thesis_utils import colours
import pandas as pd

set_plotting()

In [None]:
results = dict(
    no_cvm = load_hdf5("outdir/no_cvm/result.hdf5"),
    no_cvm_fuzz = load_hdf5("outdir/no_cvm_fuzz/result.hdf5"),
    cvm = load_hdf5("outdir/cvm/result.hdf5"),
    cvm_98 = load_hdf5("outdir/cvm_98/result.hdf5"),
    cvm_reset = load_hdf5("outdir/cvm_reset/result.hdf5"),
)

In [None]:
result_labels = dict(
    no_cvm = r"\textbf{No CVM - Default}",
    no_cvm_fuzz = r"\textbf{No CVM - }$\epsilon_{FF} = 1.3$",
    cvm = r"\textbf{CVM - Default}",
    cvm_98 = r"\textbf{CVM  - }$\rho_\textrm{CVM}=0.98$",
    cvm_reset = r"\textbf{CVM - Reset}",
)

In [None]:
model = MixtureOfDistributions(
    distributions={"gaussian": 4, "uniform": 4, "halfnorm": 4, "gamma": 4}
)


true_posterior = {}
for name, bounds in model.bounds_mapping.items():
    x = np.linspace(model.bounds_mapping[name][0], model.bounds_mapping[name][1], 100, endpoint=True)
    y = np.exp(model.base_mapping[name](x))
    cdf = np.cumsum(y) / np.sum(y)
    true_posterior[name] = {"x": x, "pdf": y, "cdf": cdf}

In [None]:

distributions_kwargs = {}
distributions = dict(
    gaussian=stats.norm(
        **distributions_kwargs.get("gaussian", {})
    ),
    uniform=stats.uniform(
        model.bounds_mapping["uniform"][0],
        np.ptp(model.bounds_mapping["uniform"]),
        **distributions_kwargs.get("uniform", {}),
    ),
    gamma=stats.gamma(
        **distributions_kwargs.get("gamma", {"a": 1.99})
    ),
    halfnorm=stats.halfnorm(
        **distributions_kwargs.get("halfnorm", {})
    ),
)

In [None]:
p_values = {}
for i, (key, result) in enumerate(results.items()):
    p_values[key] = {}
    for j, name in enumerate(model.bounds_mapping.keys()):
        for k in range(4):
            p = name + f"_{k}"
            samples = np.sort(result["posterior_samples"][p])
            pp_data = distributions[name].cdf(samples)
            p_values[key][p] = stats.kstest(pp_data, "uniform").pvalue
    p_values[key]["combined"] = stats.combine_pvalues(list(p_values[key].values()))[1]

In [None]:
dist_labels = dict(
    gaussian="Gaussian",
    halfnorm="Half-Gaussian",
    gamma="Gamma",
    uniform="Uniform",
)

latex_labels = {}
for key in distributions:
    for k in range(4):
        n = key + f"_{k}"
        l = dist_labels.get(key)
        latex_labels[n] = f"{l} {k + 1}"
        #r"$x_\textrm{" + l + "}_{" + str(k) + "}$"



In [None]:
latex_labels

In [None]:
df = pd.DataFrame(p_values)
df = df.rename(latex_labels)
df = df.style.format(decimal='.', precision=2)


In [None]:
df

colormat = np.where(df > 0.05, "background-color: green", "background-color: red")
df.style.apply(lambda _: colormat, axis=None)

In [None]:
latex_table = df.to_latex()
with open("results/p_values_table.tex", "w") as f:
    f.write(latex_table)

In [None]:
fig, axs = plt.subplots(4, 4, sharex="col", sharey=False)

post_results = [results["no_cvm"], results["cvm_reset"]]
true_key = "pdf"

hist_kwargs = dict(
    density=True,
    histtype="step",
    cumulative=False,
    ls="--",
    bins=32,
)

for i, name in enumerate(model.bounds_mapping.keys()):
    for j in range(4):
        axs[j, i].plot(
            true_posterior[name]["x"],
            true_posterior[name][true_key],
            color="k"
        )
        axs[j, i].set_xlim(*model.bounds_mapping[name])
        parameter = name + f"_{j}"
        for r in post_results:
            axs[j, i].hist(r["posterior_samples"][parameter], **hist_kwargs)

axs[-1, 0].set_xlabel(r"$x_\textrm{Gaussian}$")
axs[-1, 1].set_xlabel(r"$x_\textrm{Uniform}$")
axs[-1, 2].set_xlabel(r"$x_\textrm{Gamma}$")
axs[-1, 3].set_xlabel(r"$x_\textrm{Half-Gaussian}$")

plt.tight_layout()
plt.show()

save_figure(fig, "nessai_cvm_posteriors")


In [None]:
cdf = False

hist_kwargs = dict(
    density=True,
    histtype="step",
    cumulative=cdf,
    ls="-",
)

true_key = "cdf" if cdf else "pdf"

colours = sns.color_palette("colorblind", n_colors=4)

figsize = get_default_figsize()
figsize[1] *= 1.5

n_results = len(results)

fig, axs = plt.subplots(n_results, 4, sharex="col", sharey=cdf, figsize=figsize)

for i, name in enumerate(model.bounds_mapping.keys()):
    for j in range(n_results):
        axs[j, i].plot(
            true_posterior[name]["x"],
            true_posterior[name][true_key],
            color="k"
        )
        axs[j, i].set_xlim(*model.bounds_mapping[name])

for i, (key, result) in enumerate(results.items()):
    if i >= n_results:
        break
    for j, name in enumerate(model.bounds_mapping.keys()):
        for k in range(4):
            p = name + f"_{k}"
            samples = np.sort(result["posterior_samples"][p])
            pp_data = distributions[name].cdf(samples)
            n_bins = 256 if cdf else 32
            bins = np.linspace(*model.bounds[p], n_bins)
            axs[i, j].hist(samples, bins=bins, color=colours[k], **hist_kwargs)
            # pp_plot(
            #     pp_data,
            #     ax=axs[i, j],
            #     labels=[None],
            #     colours=[colours[k]],
            #     confidence_intervals=[0.997],
            # )


for i, key in enumerate(results):
    axs[i, 0].set_title(result_labels[key], x=-0.25, loc="left", pad=8)


axs[-1, 0].set_xlabel(r"$x_\textrm{Gaussian}$")
axs[-1, 1].set_xlabel(r"$x_\textrm{Uniform}$")
axs[-1, 2].set_xlabel(r"$x_\textrm{Gamma}$")
axs[-1, 3].set_xlabel(r"$x_\textrm{Half-Gaussian}$")

plt.tight_layout()
plt.show()
save_figure(fig, "nessai_cvm_posteriors")