In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
plt.style.use("ggplot")

In [None]:
# np.random.seed(710319)
n_1 = 40
n_2 = 40
ninst = 100
n_scales = 1000

In [None]:
ks_test_wmethod = np.vectorize(
    stats.mstats.ks_2samp, signature="(n),(n)->(),()", excluded=["method"]
)
ks_test_nomethod = np.vectorize(stats.mstats.ks_2samp, signature="(n),(n)->(),()")

In [None]:
%%time

pvals_ens = []
scales = np.linspace(1, 5, n_scales)

for _ in range(ninst):
    rvs1 = np.array([stats.norm.rvs(size=n_1, loc=0, scale=1) for _ in scales])
    rvs2 = np.array([stats.norm.rvs(size=n_2, loc=0, scale=_scale + (np.random.rand(1) / 5)) for _scale in scales])
    _, p_val = ks_test_wmethod(rvs1, rvs2, method="exact")
    pvals_ens.append(p_val)

In [None]:
%%time

pvals_ens = []
scales = np.linspace(1, 5, n_scales)

for _ in range(ninst):
    rvs1 = np.array([stats.norm.rvs(size=n_1, loc=0, scale=1) for _ in scales])
    rvs2 = np.array([stats.norm.rvs(size=n_2, loc=0, scale=_scale + (np.random.rand(1) / 5)) for _scale in scales])
    _, p_val = ks_test_nomethod(rvs1, rvs2)
    pvals_ens.append(p_val)

In [None]:
pvals_ens = np.array(pvals_ens)

In [None]:
qrt = 5
_, axis = plt.subplots(1, 1, figsize=(7, 5))
pctiles = np.percentile(pvals_ens, [qrt, 50, 100 - qrt], axis=0)

axis.plot(scales - 1.0, pctiles[0], color="#2332aa", lw=2, label=f"{qrt}%")
axis.plot(scales - 1.0, pctiles[1], color="k", lw=3, label="Median")
axis.plot(scales - 1.0, pctiles[2], color="#aa2332", lw=2, label=f"{100 - qrt}%")

axis.axhline(0.05, color="#777977", ls="--")
axis.set_xlabel("Std Dev difference")
axis.set_ylabel("p-value")
axis.set_title(f"K-S Test ensemble for {ninst} samples, {n_1} ensemble size")
axis.text(0.05, 0.06, "5%")
axis.grid(ls="--")
plt.legend()
plt.tight_layout()
plt.savefig("ks_explore.png")