# Chi-Square Fit Test (Descrete)

In [5]:
import random
from scipy import stats

In [6]:
def cs_compute_t(n_i_list, h0):
    n = sum([x[1] for x in n_i_list])
    
    # compute T
    t = 0
    for i, n_i in n_i_list:
        n_p_i = n * h0(i)
        t += ((n_i - n_p_i)**2) / n_p_i
    
    return t

In [7]:
def cs_compute_pvalue(t, k):
    return 1 - stats.chi2.cdf(t, k - 1)

In [8]:
def cs_compute_pvalue_simulation(t, h0, valid_values, iterations=1000):
    n_larger_than_t = 0
    
    def gen_n_i_list(samples):
        ret = []
        for v in valid_values:
            ret.append((v, samples.count(v)))
        
        return ret
    
    for i in range(iterations):
        samples = generate_descrete_samples(h0)
        n_i_list = gen_n_i_list(samples)
        sim_t = cs_compute_t(n_i_list, h0)
        if sim_t >= t:
            n_larger_than_t += 1
    
    return n_larger_than_t / iterations
        

In [9]:
def cs_is_hypothesis_accepted(pvalue, threshold=0.05):
    return pvalue > threshold

### Chi-Square Example

In [10]:
def cs_example():
    n_i_list = [(1, 12), (2, 5), (3, 19), (4, 7), (5, 7)] # (2, 12) means that in samples we had 12 occurances of 2
    h0_function = lambda i: 0.2

    t = cs_compute_t(n_i_list, h0_function)
    pvalue = cs_compute_pvalue(t, len(n_i_list))
    is_accepted = cs_is_hypothesis_accepted(pvalue)
    
    if is_accepted:
        print(f'hypothesis was accepted with pvalue: {pvalue} and t: {t}')
    else:
        print(f'hypothesis was rejected with pvalue: {pvalue} and t: {t}')

cs_example()

hypothesis was rejected with pvalue: 0.012295523821487131 and t: 12.8


___

# Kolmogrov-Smronov Fit Test (Continuous)

In [11]:
import math
import random

In [12]:
def ks_compute_d(sim_samples, h0):
    n = len(sim_samples)
    
    # 1. sort simulation values (y)
    sim_samples.sort()
    y_list = sim_samples
    
    # 2. compute (j/n) - H0(y[j]) for all js
    f_fe_list = [((j/n) - h0(y_list[j-1])) for j in range(1, n+1)]
    
    # 3. compute H0(y[j]) - ((j-1)/n) for all js
    fe_f_list = [(h0(y_list[j-1]) - ((j-1)/n)) for j in range(1, n+1)]
    
    # 4. return d as maximum of step 2 and 3 results
    return max([max(f_fe_list), max(fe_f_list)])    

In [13]:
def ks_compute_pvalue(d, n, iterations=1000):
    p = 0
    
    for i in range(iterations):
        # generate n uniform random variables
        u_list = [random.random() for j in range(n)]
        u_list.sort()
        
        # generate f_fe and fe_f list for these u list
        f_fe_list = [((j/n) - u_list[j-1]) for j in range(1, n+1)]
        fe_f_list = [(u_list[j-1] - ((j-1)/n)) for j in range(1, n+1)]
        
        # add number of items that are more than d in the preceding lists
        p += len([x for x in f_fe_list if x >= d])
        p += len([x for x in fe_f_list if x >= d])
    
    return p/iterations

In [14]:
def ks_is_hypothesis_accepted(pvalue, threshold=0.05):
    return pvalue > threshold

### Kolmogrov-Smronov Example

In [15]:
def ks_example():
    sim_samples = [66, 72, 81, 94, 112, 116, 124, 140, 145, 155]
    h0_function = lambda x: (1 - math.exp((-x)/100))

    d = ks_compute_d(sim_samples, h0_function)
    pvalue = ks_compute_pvalue(d, 10)
    is_accepted = ks_is_hypothesis_accepted(pvalue)

    if is_accepted:
        print(f'hypothesis was accepted with pvalue: {pvalue} and d: {d}')
    else:
        print(f'hypothesis was rejected with pvalue: {pvalue} and d: {d}')

ks_example()

hypothesis was rejected with pvalue: 0.011 and d: 0.4831486655083008


___

# Unknown Parameter (Descrete)

- first compute parameters
- write the hypothesis function that computes for example poisson(i, my_parameters) foreach i and returns probabilities
- compute number of occurances for each i and save it in n_i_list
- compute t using cs_compute_t
- compute pvalue using chi-square cs_compute_pvalue but instead of "k" pass "k-m" to the function
- if pvalue < threshold => reject else => accept

In [16]:
def udp_example_h0(i):
    if i != 6:
        numerator1 = math.exp(-2.9)
        numerator2 = (2.9) ** (i-1)
        denominator = math.factorial(i-1)
    else:
        ps = sum([udp_example_h0(x) for x in range(1, 6)])
        return 1 - ps
    
    return (numerator1 * numerator2) / denominator

def upd_example():
    n_i_list = [(1, 6), (2, 2), (3, 1), (4, 9), (5, 7), (6, 5)]
    m = 1 # number of unknown parameters
    
    t = cs_compute_t(n_i_list, udp_example_h0)
    pvalue = cs_compute_pvalue(t, len(n_i_list) - m)
    is_accepted = cs_is_hypothesis_accepted(pvalue)
    
    if is_accepted:
        print(f'hypothesis was accepted with pvalue: {pvalue} and t: {t}')
    else:
        print(f'hypothesis was rejected with pvalue: {pvalue} and t: {t}')

upd_example()

hypothesis was rejected with pvalue: 0.0005257103814344521 and t: 19.887012712014464
