In [3]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import math

In [9]:
# F should be numpy-compatible
# todo: implement iterative version
def crit_stat(F, sorted_X):
    n = sorted_X.shape[0]
    k = np.arange(1, n+1) / n
    invn = 1/n
    D = np.max([ F(sorted_X)-k-invn , k-F(sorted_X) ])
    return D


# F should be numpy-compatible
def verify_kolmohorov(F, X, gamma=0.05):
    # 'Limiting distribution of scaled Kolmogorov-Smirnov two-sided test statistic'
    z = sp.stats.kstwobign().ppf(1-gamma)  
    oX = X.sort()  # ordered X
    D = crit_stat(F, X)
    val = np.sqrt(X.shape[0]) * D
    return (val < z, val, z)


# F should be numpy-compatible
def verify_chi2(F, X, r=None, gamma=0.05):
    n = X.shape[0]
    r = r if r is not None else 3 * n / 100
    z = sp.stats.chi2.ppf(1-gamma, r-1)

    Y = F(X) * r
    v = np.zeros(r)
    for y in Y: v[y] += 1

    d = np.square(v)*r/n - n
    return (d < z, d, z)

def get_emptybox_crit_val(n,r,gamma=0.05):
    def calc_q(k):
        j = np.arange(0, r-k+1)
        return math.comb(r,k) * np.sum((1-2*(j%2)) * sp.special.comb(r-k,j) * np.float_power(1 - (k+j)/r, n))
    
    left = r
    right = 0
    mid = int((left-right)/2)
    qs = np.arange(r, -1, -1)
    for i,q in enumerate(qs):
        qs[i] = calc_q(qs[i])
    qs = np.cumsum(qs)
    
    min_k = r
    for v in qs:
        if v > gamma: break
        min_k -= 1
    return min_k


def verify_emptybox(F, X, r=None, gamma=0.05):
    n = X.shape[0]
    r = r if r is not None else 3 * n / 100
    z = sp.stats.chi2.ppf(1-gamma, r-1)

    Y = F(X) * r
    v = np.zeros(r)
    for y in Y: v[y] += 1

    d = np.square(v)*r/n - n
    return (d < z, d, z)


In [11]:
get_emptybox_crit_val(100,50)

-1