In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.stats import t as stud
from scipy.stats import chi2
from scipy.stats import kstwobign as kolm
from math import sqrt, log, exp
import scipy.integrate as integrate
%matplotlib inline

# Завдання 1. 

Перевірка гіпотези про вигляд розподілу критерієм $\chi^2$.

In [53]:
def chi2_criteria(n,r):
    boxes_sel = np.random.normal(size = r - 1, loc = 0, scale = 1)
    sel = np.random.normal(size = n, loc = 0, scale = 1)
    sel_min = np.amin(sel)
    sel_max = np.amax(sel)
    sel_sorted = np.sort(sel)
    while (sel_sorted[4] > np.amin(boxes_sel) and sel_sorted[n-5] < np.amax(boxes_sel)):
        sel = np.random.normal(size = n, loc = 0, scale = 1)
        boxes_sel = np.random.normal(size = r - 1, loc = 0, scale = 1)
    bins = np.array(sel_min - 0.0001) + boxes_sel + np.array(sel_max + 0.00001)
    print(bins)
    inds = np.digitize(sel, bins)
    unique = np.arange(r) + 1
    frequencies = dict(((i,0) for i in unique))
    for i in inds:
        frequencies[i] += 1
    probs = np.array([])
    frequencies_list = np.fromiter(frequencies.values(), dtype=float)
    for i in range(len(bins) - 1):
        probs = np.append(probs, norm.cdf(bins[i + 1], loc = 0, scale = 1) - norm.cdf(bins[i], loc = 0, scale = 1))
    delta = np.sum(np.divide((frequencies_list - n*probs)**2, n*probs))
    gamma = 0.05
    z_gamma = chi2.ppf(1 - 0.5*gamma, df = r-1)
    print('Delta = {0}, z_gamma = {1}'.format(delta, z_gamma))
    if(delta > z_gamma):
        return False
    else:
        return True

При $n=100, r=20$:

In [54]:
chi2_criteria(120, 10)

-1.5705422029238507
-2.1057874358788546
2.484491229854187
1.3990940416129518


ValueError: bins must be monotonically increasing or decreasing

При $n=200, r=30$:

In [4]:
chi2_criteria(200, 30)

Delta = 24.32014115263864, z_gamma = 45.7222858041745


True

При $n=300, r=40$:

In [5]:
chi2_criteria(300, 40)

Delta = 44.92069991947452, z_gamma = 58.12005973468631


True

# Завдання 2.

Перевірка гіпотези про вигляд розподілу критерієм пустих ящиків.


In [2]:
def empty_boxes_criteria(n, r):
    sel = np.random.uniform(low = 0.0, high = 1.0, size = n)
    bins = np.linspace(0.0, 1.0, r + 1)
    inds = np.digitize(sel, bins)
    unique = np.arange(r) + 1
    frequencies = dict(((i,0) for i in unique))
    for i in inds:
        frequencies[i] += 1
    probs = np.array([])
    empt_boxes_count = 0
    for i in frequencies:
        if (frequencies[i] == 0):
            empt_boxes_count += 1
    gamma = 0.05
    z_gamma = norm.ppf(1-0.5*gamma, loc = 0, scale=1)
    ro = n/r
    k_star = r*exp(-ro) + z_gamma*np.sqrt(r*exp(-ro)*(1 - (1 + ro)*exp(-ro)))
    print('k = {0}, k* = {1}'.format(empt_boxes_count, k_star))
    if (empt_boxes_count >= k_star):
        return False
    else:
        return True

При $n=40, r=20$:

In [5]:
empty_boxes_criteria(40, 20)

k = 1, k* = 5.191896200055833


True

При $n=400, r=200$:

In [7]:
empty_boxes_criteria(400, 200)

k = 29, k* = 34.92591915843819


True

При $n=4000, r=2000$:

In [10]:
empty_boxes_criteria(4000, 2000)

k = 232, k* = 295.5224718264612


True

# Завдання 3.

Перевірка гіпотези однорідності критерієм Смірнова.

In [13]:
def imp_df(sel, u):
    return np.mean(list(map(lambda x: x <= u, sel)))
    

In [14]:
def smirnov_criteria(n, m):
    sel_1 = np.random.exponential(size=n)
    sel_2 = np.random.exponential(size=m)
    sel_2 = np.sort(sel_2)
    D_plus = 0
    for k in range(m):
        temp = k/m-imp_df(sel_1, sel_2[k])
        if (temp >= D_plus):
            D_plus = temp
    D_minus = 0
    for k in range(m):
        temp = imp_df(sel_1, sel_2[k]) - (k-1)/m
        if (temp >= D_minus):
            D_minus = temp
    D = max(D_plus, D_minus)
    gamma = 0.05
    z_gamma = kolm.ppf(1-0.5*gamma)
    print("D = {0}, z_gamma = {1}".format(D,z_gamma))
    if (D > z_gamma):
        return False
    else:
        return True

При $n=10, m=20$:

In [50]:
smirnov_criteria(10,20)

D = 0.29999999999999993, z_gamma = 1.4802068574249225


True

При $n=100, m=200$:

In [51]:
smirnov_criteria(100,200)

D = 0.09500000000000008, z_gamma = 1.4802068574249225


True

При $n=500, m=600$:

In [52]:
smirnov_criteria(500,600)

D = 0.063, z_gamma = 1.4802068574249225


True