In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.stats import multitest as smm

plt.style.use("ggplot")

In [None]:
np.random.seed(710319)
n_1 = 30
n_2 = 30
ninst = 5000
n_scales = 20
ALPHA = 0.05

In [None]:
ks_test_wmethod = np.vectorize(
    stats.mstats.ks_2samp, signature="(n),(n)->(),()", excluded=["method"]
)
ks_test_nomethod = np.vectorize(stats.mstats.ks_2samp, signature="(n),(n)->(),()")

```python
%%time

pvals_ens = []
scales = np.linspace(1, 5, n_scales)

for _ in range(ninst):
    rvs1 = np.array([stats.norm.rvs(size=n_1, loc=0, scale=1) for _ in scales])
    rvs2 = np.array([stats.norm.rvs(size=n_2, loc=0, scale=_scale + (np.random.rand(1) / 5)) for _scale in scales])
    _, p_val = ks_test_wmethod(rvs1, rvs2, method="exact")
    pvals_ens.append(p_val)```

In [None]:
%%time

pvals_ens = []
scales = np.linspace(1, 8, n_scales)

for _ in range(ninst):
    rvs1 = np.array([stats.norm.rvs(size=n_1, loc=0, scale=1) for _ in scales])
    rvs2 = np.array(
        [stats.norm.rvs(size=n_2, loc=0, scale=_scale) for _scale in scales]
    )
    # rvs2 = np.array([stats.norm.rvs(size=n_2, loc=0, scale=_scale)  - (rvs1[0] * _scale * np.random.rand(n_1)) for _scale in scales])
    _, p_val = ks_test_nomethod(rvs1, rvs2)
    pvals_ens.append(p_val)
pvals_ens = np.array(pvals_ens)

In [None]:
qrt = 5
_, axis = plt.subplots(1, 1, figsize=(7, 5))
pctiles = np.percentile(pvals_ens, [qrt, 50, 100 - qrt], axis=0)

axis.plot(scales - 1.0, pctiles[0], color="#2332aa", lw=2, label=f"{qrt}%")
axis.plot(scales - 1.0, pctiles[1], color="k", lw=3, label="Median")
axis.plot(scales - 1.0, pctiles[2], color="#aa2332", lw=2, label=f"{100 - qrt}%")

axis.axhline(0.05, color="#777977", ls="--")
axis.set_xlabel("Std Dev difference")
axis.set_ylabel("p-value")
axis.set_title(f"K-S Test ensemble for {ninst} samples, {n_1} ensemble size")
axis.text(0.05, 0.06, "5%")
axis.grid(ls="--")
plt.legend()
plt.tight_layout()
plt.savefig("ks_explore.png")

In [None]:
pctiles_all = {"unc": pctiles}
pvals_all = {"unc": pvals_ens}

for _method in ["fdr_bh", "fdr_by", "simes-hochberg", "fdr_tsbh"]:
    _pvals = np.array(
        [
            smm.multipletests(
                pvals=pvals_ens[:, iix],
                alpha=0.01,
                method=_method,
                is_sorted=False,
            )[1]
            for iix in range(pvals_ens.shape[1])
        ]
    ).T

    pvals_all[_method] = _pvals
    pctiles_all[_method] = np.percentile(_pvals, [qrt, 50, 100 - qrt], axis=0)

In [None]:
_, axis = plt.subplots(1, 1, figsize=(7, 5))
for _ix, _method in enumerate(pctiles_all):
    if _method == "unc":
        _color = "k"
    else:
        _color = f"C{_ix}"
    # axis.plot(scales - 1.0, pctiles_all[_method][0], color=f"C{_ix}", lw=2, label=f"{qrt}%: {_method}")
    # axis.plot(scales - 1.0, pctiles[1], color="k", lw=3, label="Median")
    axis.plot(
        scales - 1.0,
        pctiles_all[_method][2],
        ".-",
        color=_color,
        lw=2,
        label=f"{100 - qrt}%: {_method}",
    )

    axis.axhline(ALPHA, color="#777977", ls="--")
    axis.set_xlabel("Std Dev difference")
    axis.set_ylabel("p-value")
    axis.set_title(f"K-S Test ensemble for {ninst} samples, {n_1} ensemble size")
    axis.text(ALPHA, 0.06, "5%")
    axis.grid(ls="--")
plt.legend()
plt.tight_layout()
plt.savefig("ks_explore.png")

In [None]:
_, axis = plt.subplots(1, 1, figsize=(9, 6))
rejected = {_method: (pvals_all[_method] < ALPHA).sum(axis=0) for _method in pvals_all}

for _ix, _method in enumerate(pvals_all):
    if _method == "unc":
        _color = "k"
        ls_alpha = 1
    else:
        _color = f"C{_ix}"
        ls_alpha = 0.5

    axis.plot(
        scales - 1.0,
        rejected[_method],
        "o-",
        color=_color,
        lw=4,
        label=f"{_method}",
        alpha=ls_alpha,
    )

    axis.axhline(ALPHA * pvals_all[_method].shape[0], color="#777977", ls="--")
    axis.set_xlabel("Std Dev difference")
    axis.set_ylabel(f"Rejected instances (P < {ALPHA})")
    axis.set_title(f"K-S Test ensemble for {ninst} samples, {n_1} ensemble size")
    axis.text(ALPHA, 0.06, "5%")
    axis.grid(ls="--")

plt.legend()
plt.tight_layout()
plt.savefig("ks_explore.png")

In [None]:
fig, axes = plt.subplots(5, 4, figsize=(16, 9))
axes = axes.flatten()
for idx in range(rvs1.shape[0]):
    axes[idx].hist(rvs1[idx] - rvs2[idx], bins=np.arange(-10, 11, 0.25), edgecolor="k")
    axes[idx].set_xlim([-12, 12])
    axes[idx].set_ylim([0, 6])