In [None]:
import matplotlib.pyplot as plt
import xarray as xr
from pathlib import Path
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
plt.style.use("ggplot")

In [None]:
files = sorted(Path("./").glob("bootstrap_output_*n500.nc"))
REJECT_THR = 0.05

In [None]:
for _file in files:
    case_a, case_b = _file.stem.split("_")[2:4]
    n_iter = int(_file.stem.split("_")[-1][1:])
    
    ks_res = xr.open_dataset(_file)
    ks_pval = ks_res["pval"].values
    
    fig, axes = plt.subplots(2, 1, figsize=(6, 6))
    quantile = REJECT_THR * 100
    time_step = np.arange(ks_res.time.shape[0])

    n_reject = np.array((ks_pval < REJECT_THR).sum(axis=1))
    n_reject_mean = np.median(n_reject, axis=0)
    n_reject_lq = np.percentile(n_reject, quantile, axis=0)
    n_reject_uq = np.percentile(n_reject, 100 - quantile, axis=0)

    axes[0].plot(time_step, n_reject_mean, color="black", lw=1.5, label="Median"),
    # ln_lq, = axes[0].plot(n_reject_lq, color="darkblue", lw=1.0, ls="-", label=f"{quantile}%"),
    # ln_uq, = axes[0].plot(n_reject_uq, color="darkred", lw=1.0, ls="-", label=f"{100 - quantile}%")
    axes[0].fill_between(time_step, n_reject_lq, n_reject_uq, color="C5", alpha=0.8, label=f"{100 - quantile}% CI")
    axes[0].legend()

    axes[0].axhline(REJECT_THR * ks_pval.shape[1], color="#343", ls="-.")
    axes[0].set_title(f"Number of variables rejected at {(1 - REJECT_THR) * 100}% confidence")
    axes[0].set_xlabel("Timestep")
    axes[0].set_ylabel("N variables")

    test = (n_reject > ks_pval.shape[1] * REJECT_THR).sum(axis=0)
    axes[1].plot(test)
    
    _reject = f"{REJECT_THR:.2f}".replace(".", "p")
    fig.suptitle(f"{case_a} x {case_b}")
    plt.tight_layout()
    plt.savefig(f"plt_{case_a}-{case_b}_n{n_iter}.png")

In [None]:
def fmt_case(case):
    if case == "ctl":
        _out = "Control"
    elif "old" in case:
        num = float(case.replace("old-", "").replace("pct", "").replace("p", "."))
        _out = f"old {num:.1f}%"
    else:
        num = float(case.replace("pct", "").replace("p", "."))
        _out = f"{num:.1f}%"
    return _out

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(12, 6))
quantile = REJECT_THR * 100
reject_test = {}
n_reject = {}
for _file in files:
    print(_file.stem.split("_"))
    case_a, case_b = _file.stem.split("_")[2:4]
    
    n_iter = int(_file.stem.split("_")[-1][1:])
    case_a = fmt_case(case_a)
    case_b = fmt_case(case_b)
    ks_res = xr.open_dataset(_file)
    ks_pval = ks_res["pval"].values
    
    time_step = np.arange(ks_res.time.shape[0])

    n_reject[(case_a, case_b)] = np.array((ks_pval < REJECT_THR).sum(axis=1))
    n_reject_mean = np.median(n_reject[(case_a, case_b)], axis=0)
    n_reject_lq = np.percentile(n_reject[(case_a, case_b)], quantile, axis=0)
    n_reject_uq = np.percentile(n_reject[(case_a, case_b)], 100 - quantile, axis=0)
    
    reject_test[(case_a, case_b)] = n_reject_uq
    
    axes.plot(time_step, n_reject_uq, lw=1.5, label=f"{case_a} x {case_b}"),
    # ln_lq, = axes[0].plot(n_reject_lq, color="darkblue", lw=1.0, ls="-", label=f"{quantile}%"),
    # ln_uq, = axes[0].plot(n_reject_uq, color="darkred", lw=1.0, ls="-", label=f"{100 - quantile}%")
    axes.fill_between(time_step, n_reject_lq, n_reject_uq, color="grey", alpha=0.4)#, label=f"{100 - quantile}% CI")

axes.axhline(REJECT_THR * ks_pval.shape[1], color="#343", ls="-.", label=f"{REJECT_THR * 100} % of variables")
axes.set_title(f"Number of variables rejected at {(1 - REJECT_THR) * 100}% confidence")
axes.set_xlabel("Timestep")
axes.set_ylabel("N variables")

axes.legend()

_reject = f"{REJECT_THR:.2f}".replace(".", "p")
plt.tight_layout()
plt.savefig(f"plt_all_cases.png")

In [None]:
ctl_key = ("Control", "Control")
test = {
    _key: (n_reject[_key] > np.percentile(n_reject[ctl_key], 100 * (1 - REJECT_THR), axis=0)).sum(axis=0) 
    for _key in reject_test
}
fig, axes = plt.subplots(1, 1, figsize=(12, 5))

for _case in test:
    axes.plot(test[_case], label=f"{_case[0]} x {_case[1]}", lw=2.1)
axes.set_ylabel(f"Number of iterations")
axes.set_xlabel("Time step")
axes.set_title("")
axes.legend()

In [None]:
fig, axes = plt.subplots(len(files), 2, figsize=(10, 15), sharex=True)
bins = np.arange(0, 121, 4)
ex_val = 500 / ((bins.shape[0] - 1) / 30)

for file_ix, _file in enumerate(files):
    case_a, case_b = _file.stem.split("_")[2:4]
    cases = [case_a, case_b]
    n_iter = int(_file.stem.split("_")[-1][1:])
    ks_res = xr.open_dataset(_file)
    
    for _case in [0, 1]:
        axes[file_ix, _case].axhline(ex_val, color="k", ls="--")
        _ = ks_res.rnd_idx[_case].plot.hist(edgecolor="k", bins=bins, ax=axes[file_ix, _case])
        axes[file_ix, _case].set_title(cases[_case])
        axes[file_ix, _case].set_xlabel("")

In [None]:
(ks_pval < REJECT_THR).sum(axis=0)

In [None]:
fig, axes = plt.subplots(len(files), 1, figsize=(10, 15), sharex=True)
bins = np.arange(0, 121, 4)
ex_val = 500 / ((bins.shape[0] - 1) / 30)

for file_ix, _file in enumerate(files):
    case_a, case_b = _file.stem.split("_")[2:4]
    case_a = fmt_case(case_a)
    case_b = fmt_case(case_b)
    cases = [case_a, case_b]
    n_iter = int(_file.stem.split("_")[-1][1:])
    ks_res = xr.open_dataset(_file)
    ks_pval = ks_res.pval
    reject_by_var = pd.DataFrame((ks_pval < REJECT_THR).sum(axis=0).T, columns=ks_res.vars)
    mask = (reject_by_var.sum() > reject_by_var.sum().quantile(.9))
    
    # reject_by_var.loc[:, reject_by_var.mean() >= 0.6].plot(ax=axes[file_ix], legend=False)
    reject_by_var.index.name = "Time Step"
    sns.heatmap(reject_by_var.T[mask], ax=axes[file_ix], vmin=0, vmax=500)
    axes[file_ix].set_title(f"{case_a} x {case_b}")

In [None]:
s1 = "2p5pct"
float(s1)