In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
from statsmodels.stats import multitest as smm
import holoviews as hv
import hvplot.pandas
import panel as pn
import random
from functools import partial
hv.extension("bokeh")

In [None]:
ks_test_wmethod = np.vectorize(
    stats.mstats.ks_2samp, signature="(n),(n)->(),()", excluded=["method"]
)

ks_test_nomethod = np.vectorize(stats.mstats.ks_2samp, signature="(n),(n)->(),()")

def cvm_2samp(x, y):
    _res = stats.cramervonmises_2samp(x, y)
    return _res.statistic, _res.pvalue

cvm_test_nomethod = np.vectorize(cvm_2samp, signature="(n),(n)->(),()")

ks_test_wmethod = np.vectorize(
    stats.mstats.ks_2samp, signature="(n),(n)->(),()", excluded=["method"]
)

ks_test_nomethod = np.vectorize(stats.mstats.ks_2samp, signature="(n),(n)->(),()")

In [None]:
def explore(rvs, alpha=0.05, n_loc_ens=30):
    pvals_ens = []
    pvals_ens_cvm = []
    rvs1, rvs2 = rvs
    _smp_a = random.sample(list(range(rvs1.shape[-1])), n_loc_ens)
    _smp_b = random.sample(list(range(rvs2.shape[-1])), n_loc_ens)

    # for _xi in range(rvs.shape[0]):
    _, p_val = ks_test_nomethod(rvs1[:, _smp_a], rvs2[:, _smp_b])
    pvals_ens.append(p_val)
    pvals_ens = np.array(pvals_ens).squeeze()
    
    _, p_val_cvm = cvm_test_nomethod(rvs1[:, _smp_a], rvs2[:, _smp_b])
    pvals_ens_cvm.append(p_val_cvm)
    pvals_ens_cvm = np.array(pvals_ens_cvm).squeeze()
    
    pvals_all = {"K-S": pvals_ens, "CVM": pvals_ens_cvm}

    fdr_methods = [
        "fdr_bh",
        "fdr_by",
        # "fdr_tsbh",
        # "fdr_tsbky",
    ]
    for _base in ["K-S", "CVM"]:
        for _method in fdr_methods:
            _pvalsc = smm.multipletests(
                pvals=pvals_all[_base].flatten(),
                alpha=alpha,
                method=_method,
                is_sorted=False,
            )[1]
            pvals_all[f"{_base}_{_method}"] = _pvalsc

    rejected = {_method: [(pvals_all[_method] < alpha).sum(axis=0)] for _method in pvals_all}

    return pd.DataFrame(rejected)

NTOTENS = 120

def all_explore(alpha, n_loc_ens, scale_min=1, scale_max=5, n_pos=10, n_uncor=0, n_neg_cor=0, ninst=10):
    np.random.seed(710319)
    nens = 30
    nscales = 5
    scale_min = 1
    scale_max = 5
    scales = np.linspace(scale_min, scale_max, nscales, endpoint=True)

    # n_uncor = 100
    # n_neg_cor = 0
    nvars = n_pos + n_uncor + n_neg_cor
    _loc = 2
    rejected = []

    for _inst in range(ninst):
        ensembles = []

        for _scale in scales:
            _ens = [stats.norm.rvs(size=NTOTENS, loc=_loc, scale=_scale)]

            for _var in range(n_pos):
                _tmp = stats.norm.rvs(size=NTOTENS, loc=_loc, scale=_scale)
                _ens.append(_tmp + _ens[0])

            for _var in range(n_neg_cor):
                _tmp = stats.norm.rvs(size=NTOTENS, loc=_loc, scale=_scale)
                _ens.append(_tmp - _ens[0])

            for _var in range(n_uncor):
                _tmp = stats.norm.rvs(size=NTOTENS, loc=_loc, scale=_scale)
                _ens.append(_tmp)

            ensembles.append(_ens)
        ensembles = np.array(ensembles)

        _rej = [explore((ensembles[0], ensembles[i]), alpha, n_loc_ens) for i in range(ensembles.shape[0])]
        _rej = pd.concat(_rej)
        _rej["Scale"] = scales
        _rej["Inst"] = _inst
        rejected.append(_rej)

    rejected = pd.concat(rejected)
    ctl_thr = rejected.groupby("Scale").quantile(1 - alpha).loc[scales.min()]["K-S"]
    ctl_thr_cvm = rejected.groupby("Scale").quantile(1 - alpha).loc[scales.min()]["CVM"]

    thr = {"K-S": ctl_thr, "CVM": ctl_thr_cvm, "K-S_fdr_bh": 0, "K-S_fdr_by": 0, "CVM_fdr_bh": 0, "CVM_fdr_by": 0}
    rej_out = []
    for method in thr: # ["Uncorrected", "fdr_bh", "fdr_by"]:
        rejected[f"{method}_mask"] = (rejected[method] > thr[method])
        _d1 = pd.DataFrame(rejected.groupby("Scale").sum()[f"{method}_mask"])
        _d1 = _d1.rename(columns={f"{method}_mask": method})
        rej_out.append(_d1)#.reset_index())
    rej_out = pd.concat(rej_out, axis=1)
    return rej_out#.groupby("Scale").sum() / nvars

In [None]:
# _exp = partial(all_explore)#, ensembles, scales)
NVARS = 100
nens_slider = pn.widgets.IntSlider(value=30, start=2, end=NTOTENS, step=2, name=f"NEns (of {NTOTENS})")
alpha_slider = pn.widgets.FloatSlider(value=0.05, start=0.01, end=0.1, step=0.001, name="Alpha")
uncor_slider = pn.widgets.IntSlider(value=5, start=0, end=NVARS, step=1, name=f"Uncorrelated vars")
negcor_slider = pn.widgets.IntSlider(value=5, start=0, end=NVARS, step=1, name=f"Negative correlated vars")
poscor_slider = pn.widgets.IntSlider(value=5, start=0, end=NVARS, step=1, name=f"Positive correlated vars")
ninst_slider = pn.widgets.IntSlider(value=10, start=5, end=100, step=2, name=f"Bootstrap inst")

reject_dfi = hvplot.bind(
    all_explore,
    alpha=alpha_slider,
    n_loc_ens=nens_slider,
    n_pos=poscor_slider,
    n_uncor=uncor_slider,
    n_neg_cor=negcor_slider,
    ninst=ninst_slider
).interactive().hvplot.line(
    x="Scale",
    # y=["Uncorrected", "fdr_bh", "fdr_by"],
    y=["K-S", "CVM", "K-S_fdr_bh", "K-S_fdr_by", "CVM_fdr_bh", "CVM_fdr_by"],
    grid=True,
    alpha=0.9
)

In [None]:
reject_dfi.opts(width=900, height=600)

In [None]:
_data  = all_explore(
    alpha=0.05,
    n_loc_ens=30,
    scale_min=1.0,
    scale_max=2.0,
    n_pos=30,
    n_uncor=30,
    n_neg_cor=30,
    ninst=100
)
# all_explore(alpha, n_loc_ens, scale_min=1, scale_max=5, n_pos=10, n_uncor=0, n_neg_cor=0, ninst=10):


In [None]:
_data

In [None]:
ctl_thr = {_base: _data.groupby("Scale").quantile(.95).loc[1.0][_base] for _base in ["K-S", "CVM"]}
print(ctl_thr)