### 1.1 
Сгенерировать выборку X объемом 500 величин, подчиняющихся распределениям Гаусса с параметрами 3, 5 (N(3; 5))
Проверить критерий отсутствия "сдвига дисперсии"

In [111]:
import numpy as np
import scipy.stats as sps

def build_gauss(mean=3, var=5, size=500):
    return np.random.normal(loc=mean, scale=np.sqrt(var), size=size)

def build_cauchy(loc=0, scale=1, size=500):
    return sps.cauchy(loc=loc, scale=scale).rvs(size=size)

In [112]:
def hsu_criterion(x):
    median = np.median(x)
    n = len(x)
    stats = np.sum([(i-1)*(x[i] - median) ** 2 for i in range(n)]) \
            / ((n - 1) * np.sum((x - median) ** 2))
    
    stats = (stats - 0.5) * np.sqrt(6 * (n - 1) * (n + 2) / (n + 1))

    p_value = 2 * (1 - sps.norm.cdf(np.abs(stats)))

    q = sps.norm.ppf(0.025)
    
    if stats < q or stats > -q:
        print(f"Decline H0, pvalue = {p_value}, q = {q}, stats = {stats}")
    
    else:
        print(f"Accept H0, pvalue = {p_value}, q = {q}, stats = {stats}")
    

In [113]:
print("1.1")
hsu_criterion(build_gauss())

1.1
Accept H0, pvalue = 0.4746761912084909, q = -1.9599639845400545, stats = -0.7148913357248162


### 1.2 
Половина выборки из пункта 1.1, вторая половина:
А) Гаусс 3,10
Б) Гаусс 3,6
Тот же критерий

In [114]:
X_a = np.concatenate([build_gauss(size=250), build_gauss(var=10, size=250)])
X_b = np.concatenate([build_gauss(size=250), build_gauss(var=6, size=250)])

print("1.2 A)")
hsu_criterion(X_a)

print("1.2 Б)")
hsu_criterion(X_b)

1.2 A)
Decline H0, pvalue = 5.8651408174625885e-09, q = -1.9599639845400545, stats = 5.820558216822116
1.2 Б)
Accept H0, pvalue = 0.0775780436500475, q = -1.9599639845400545, stats = 1.7649150515705525


### 2.1 
Cтандартная гауссовская (0,1) выборка объемом 195 + 5 выбросов (-4,-3,2.99,3.3,5) 
Проверить правило 3 сигм 

In [115]:
def three_sigma_rule(x):
    outliers = []
    n = len(x)
    dev = np.std(x, ddof=n - 1)

    for elem in x:
        if elem < -3 or elem > 3:
            outliers.append(elem)

    return outliers

 

In [116]:
x_0 = np.concatenate([build_gauss(mean=0, var=1, size = 195), [-4,-3,2.99,3.3,5]])
out = three_sigma_rule(x_0)

print("2.1")
print("Outliers:", ', '.join([str(elem) for elem in out])) if out else print("No outliers were caught")



2.1
Outliers: -3.345535888451954, -4.0, 3.3, 5.0


### 2.2 
То же самое, только проверить боксплот тьюки

In [117]:
def boxplot_tuky(x):
    lq = np.quantile(x, 0.25)
    uq = np.quantile(x, 0.75)

    iqr = uq - lq
    xl = lq - 1.5 * iqr
    xu = uq + 1.5 * iqr

    outliers = []
    for elem in x:
        if elem < xl or elem > xu:
            outliers.append(elem)
            
    return outliers

In [118]:
out = boxplot_tuky(x_0)

print("2.2")
print("Outliers:", ', '.join([str(elem) for elem in out])) if out else print("No outliers were caught")

2.2
Outliers: -3.345535888451954, -4.0, -3.0, 2.99, 3.3, 5.0


### 3. Робастные статистики 
Вычислить среднее, медиану, коэффициент  Хубера, двухэтапную оценку (убрать выбросы и вычислить среднее) для следующих выборок объема 500

3.1 стандартный Гаусс (0,1)
3.2 Коши
3.3 стандартный Гаусс (0,1) + Коши * 0.1

In [119]:
def two_stage_value(x, outliers, type):

    mean = np.mean(x)
    median = np.median(x)
    new_x = []
    for elem in x:
        if elem not in outliers:
            new_x.append(elem)
    mean_wout_outliers = np.mean(new_x)
    x = [np.sign(elem) if elem >= 2.5 or elem <= -2.5 else elem for elem in x]
    huber = np.mean(x)#np.mean(x[x < 2.5]) + np.median(x[x >= 2.5])
    
    print(type)
    print("Mean:", mean)
    print("Median:", median)
    print("Huber", huber)
    print("Mean wout outliers", mean_wout_outliers)
    

In [120]:
print("3.1")
x = build_gauss(mean=0, var=1, size=500)
out = boxplot_tuky(x)
new_x = []
two_stage_value(x, out, "N(0,1)")

print("3.2")
x_cauchy = build_cauchy()
out = boxplot_tuky(x_cauchy)
two_stage_value(x_cauchy, out, "Cauchy")

print("3.3")
out = boxplot_tuky(x + 0.1 * x_cauchy)
two_stage_value(x + 0.1 * x_cauchy, out, "N(0,1) + Cauchy * 0.1")

3.1
N(0,1)
Mean: -0.023611608518862154
Median: -0.0012255891905428198
Huber -0.019059153277479424
Mean wout outliers -0.017231467957049922
3.2
Cauchy
Mean: -9.622754080697232
Median: -0.05710874678487949
Huber -0.02033741407174154
Mean wout outliers -0.061983755766708816
3.3
N(0,1) + Cauchy * 0.1
Mean: -0.9858870165885852
Median: -0.06472012220176879
Huber -0.04744151281185056
Mean wout outliers -0.042164677113336546
