In [None]:
import matplotlib.pyplot as plt
import xarray as xr
from pathlib import Path
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from statsmodels.stats import multitest as smm
from matplotlib.ticker import ScalarFormatter
from results_plot import fmt_case

plt.style.use("default")

ALPHA = 0.01
run_len = "1year"
rolling = 12
niter = 1000

In [None]:
cases = [
    ("ctl", "ctl"),
    ("effgw_oro-0p5pct",) * 2,
    ("effgw_oro-1p0pct",) * 2,
    ("effgw_oro-10p0pct",) * 2,
    ("effgw_oro-20p0pct",) * 2,
    ("effgw_oro-30p0pct",) * 2,
    ("effgw_oro-40p0pct",) * 2,
    ("effgw_oro-50p0pct",) * 2,
    ("clubb_c1-1p0pct",) * 2,
    ("clubb_c1-3p0pct",) * 2,
    ("clubb_c1-5p0pct",) * 2,
    ("clubb_c1-10p0pct",) * 2,
]
pcts = [0.5, 1, 5, 10, 20, 30, 40, 50]
# pcts = [0.5, 1]
files = [
    Path(
        "bootstrap_data/bootstrap_output.{}_{}avg.{}_{}_n{}.nc".format(
            run_len, rolling, *case, niter
        )
    )
    for case in cases
]
print("[")
for _file in files:
    print(f"\t{_file}\t\t{_file.exists()}")
print("]")

ks_pval_cr = {}

n_reject = {}
reject_test = {}

n_reject_cr = {}
reject_test_cr = {}
rejections = {}


for _ix, _file in enumerate(files):
    case_a, case_b = cases[_ix]
    n_iter = int(_file.stem.split("_")[-1][1:])
    case_a = fmt_case(case_a)
    case_b = fmt_case(case_b)
    ks_res = xr.open_dataset(_file)
    ks_pval = ks_res["pval"].values

    # fig, axes = plt.subplots(1, 2, figsize=(15, 4))
    quantile = ALPHA * 100
    time_step = np.arange(ks_res.time.shape[0])

    # n_reject = np.array((ks_pval < ALPHA).sum(axis=1))
    n_reject[(case_a, case_b)] = np.array((ks_pval < ALPHA).sum(axis=1))

    n_reject_mean = np.median(n_reject[(case_a, case_b)], axis=0)
    n_reject_lq = np.percentile(n_reject[(case_a, case_b)], quantile, axis=0)
    n_reject_uq = np.percentile(n_reject[(case_a, case_b)], 100 - quantile, axis=0)

    _pval_cr = []
    for jdx in range(ks_pval.shape[0]):
        _pval_cr.append(
            smm.fdrcorrection(
                ks_pval[jdx].flatten(),
                alpha=ALPHA,
                method="n",
                is_sorted=False,
            )[1].reshape(ks_pval[jdx].shape)
        )
    ks_pval_cr[(case_a, case_b)] = np.array(_pval_cr)
    n_reject_cr[(case_a, case_b)] = np.array(
        (ks_pval_cr[(case_a, case_b)] < ALPHA).sum(axis=1)
    )
    n_reject_mean_cr = np.median(n_reject_cr[(case_a, case_b)], axis=0)
    n_reject_lq_cr = np.percentile(n_reject_cr[(case_a, case_b)], quantile, axis=0)
    n_reject_uq_cr = np.percentile(
        n_reject_cr[(case_a, case_b)], 100 - quantile, axis=0
    )

    rejections[(case_a, case_b)] = {
        f"{100 * (1 - ALPHA)}%": n_reject_uq,
        f"{100 * (1 - ALPHA)}% [Corrected]": n_reject_uq_cr,  # .max(axis=0),
    }

In [None]:
reject_data = {"Case": [], "Rejected fields": [], "iteration": [], "Parameter": [], "Median": [], "Pct": []}

# for _row in _reject.iterrows():
    # for idx, _val in enumerate(_row[1]):
for _case in n_reject:
    for idx, nrej in enumerate(n_reject[_case]):
        _pct = _case[0].split(" ")[-1]
        try:
            _pct = float(_pct[:-1])
        except ValueError:
            _pct = 0
        reject_data["Case"].append(_case[0])
        reject_data["Parameter"].append(_case[0].split(" ")[0])
        reject_data["Rejected fields"].append(nrej[-1])
        reject_data["iteration"].append(idx)
        reject_data["Median"].append(np.median(n_reject[_case][:, -1]))
        reject_data["Pct"].append(_pct)
reject_data = pd.DataFrame(reject_data)

In [None]:
# fig, axis = plt.subplots(1, 1, figsize=(10, 5))
# axis.bar(
#     x=np.arange(len(cases)),
#     height=[
#         # np.percentile(n_reject[(fmt_case(_case_a), fmt_case(_case_b))][:, -1], 99)
#         np.max(n_reject[(fmt_case(_case_a), fmt_case(_case_b))][:, -1])
#         for _case_a, _case_b in cases
#     ],
# )
# axis.set_xticks(np.arange(len(cases)), [fmt_case(_case[0]) for _case in cases], rotation=45)

fig, axis = plt.subplots(1, 1, figsize=(12.5 / 2.54, 6.25 / 2.54), dpi=300)
_reject = pd.DataFrame({_case[0]: n_reject[_case][:, -1] for _case in n_reject})
# _reject = pd.DataFrame({_case[0]: n_reject_cr[_case][:, -1] for _case in n_reject_cr})
sns.boxenplot(_reject, orient="h", ax=axis)
axis.axvline(_reject.quantile(1 - ALPHA).median(), ls="--", color="k")
axis.set_xlabel("Number of rejected variables")

plt.tight_layout()
# plt.savefig("plt_control_nrej_cr.png")

In [None]:
fig, axis = plt.subplots(1, 1, figsize=(12.5 / 2.54, 6.25 / 2.54), dpi=600)
with sns.plotting_context(context="paper", font_scale=.8, rc=None):
    _plot = sns.boxenplot(
        reject_data,
        orient="h",
        x="Rejected fields",
        y="Case",
        hue="Parameter",
        palette="Set2",
        ax=axis,
    )
    axis.tick_params(labelsize=8)
    axis.axvline(np.median(reject_data["Rejected fields"].quantile(1 - ALPHA)), ls="--", color="k")
    plt.tight_layout()
    plt.savefig(f"plt_control_nrej_a{ALPHA}.pdf")