In [11]:
import scipy.stats as ss
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

##### 1 task

In [107]:
def sample_size(p, rel_tol) -> int:
    n = 0
    error = rel_tol + 1
    while error > rel_tol:
        n += 1
        sigma_phi = np.sqrt((1-p)/(n*p))
        error = sigma_phi
    return n


p = 0.025
rel_tol = 0.05

n = sample_size(p, rel_tol)
print(n)

15600


##### 2 task

In [108]:
def sample_size(gamma, alpha, delta):
    error = delta + 1
    n = 1
    while error >= delta:
        z_quantile = ss.norm.ppf(1 - alpha)
        phi_z_quantile = ss.norm.pdf(z_quantile)
        error = (gamma/(6*np.sqrt(n)))*(2*z_quantile**2+1)*phi_z_quantile
        n+=1
    return n-1

result = sample_size(2, 0.025, 0.025 * 0.1)
print(result)

4579


##### 3 task

In [154]:
def pivot_interval(n, k, gamma):
    sample_mean = k/n
    z = ss.norm.ppf((1-gamma)/2)
    a = z**2 + n
    b = (2*n*sample_mean + z**2)
    c = n*sample_mean**2
    p_R = -(-b - np.sqrt(b**2-4*a*c))/(2*a)
    p_L = -(-b + np.sqrt(b**2-4*a*c))/(2*a)
    return p_L, p_R


n = 20
k = 3
gamma = 0.95

p_alpha, p_beta = pivot_interval(n, k, gamma)
print("Доверительный интервал для p:", (p_alpha, p_beta))

Доверительный интервал для p: (0.052368745896216595, 0.36041886474075696)


##### 4 task

In [155]:
def var_interval(sample, gamma):
    n = len(sample)
    alpha = 1 - gamma
    g_1 = ss.chi2.ppf(alpha / 2, n - 1)
    g_2 = ss.chi2.ppf(1 - alpha / 2, n - 1)
    sample_variance = np.var(sample, ddof=1)
    lower_bound = (n - 1) * sample_variance / g_2
    upper_bound = (n - 1) * sample_variance / g_1
    return (lower_bound, upper_bound)


print(var_interval([0, 1], 0.95))

(0.09952454760270645, 509.12913485983375)


##### 5 task

In [207]:
def loc_interval(sample, gamma, number_of_trials):
    # MONTE-CARLO for quantiles
    n = len(sample)
    loc = 1
    scale = 5
    sample_monte_carlo = ss.cauchy(loc=loc, scale=scale).rvs((number_of_trials, n))
    med = np.median(sample_monte_carlo, axis=1)
    MAD = np.median(abs(sample_monte_carlo - np.resize(med, (number_of_trials, n))), axis=1)
    g = (med - loc)/MAD
    alpha = 1 - gamma
    quantile_1 = np.quantile(g, alpha/2)
    quantile_2 = np.quantile(g, 1 - alpha/2)
    # CONFIDENCE INTERVAL
    med_loc = np.median(sample)
    MAD_loc = np.median(abs(sample - np.resize(med_loc, (1, n))))
    loc_2 = med_loc - MAD_loc*quantile_1
    loc_1 = med_loc - MAD_loc*quantile_2
    return loc_1, loc_2


sample = [-0.93, -1.84, -0.84, -0.13, -0.63, 0.06, -0.93, 13.29, 0.9, -2.64,
          -0.37, 0.43, -2.41, 19.33, -0.18, 1.29, 1.32, -0.47, -0.27, 0.27,
          1.07, -1.49, -0.78, 0.59, -0.0, -1.59, -0.28, -1.38, 0.1, 1.72]
print(loc_interval(sample, 0.95, 10000))

(-0.6050185968267746, 0.15172480236498656)


##### 6 task

In [228]:
def mixture(n, eps):
# Задаем параметры компонентов смеси
    mu1, sigma1 = 0, 1
    mu2, sigma2 = 0, 5

    # Генерируем выборку смешанной смеси
    component_choice = np.random.choice([0, 1], size=n, p=[1-eps, eps])
    sample = np.empty(n)
    for i, component in enumerate(component_choice):
        if component == 0:
            sample[i] = np.random.normal(mu1, sigma1)
        else:
            sample[i] = np.random.normal(mu2, sigma2)
    
    return sample

def ratio(eps):
    sigma_1 = 1
    sigma_2 = 5
    sigma = np.sqrt(1-(4/5)*eps)
    # sigma = np.sqrt((1-eps)*sigma_1**2 + eps*sigma_2**2)
    # sample = mixture(10000, eps)
    ratio = sigma
    return c_d_mean/c_d_med

ratio(0)

# from scipy.optimize import root_scalar
# print(root_scalar(lambda x: ratio(x) - 1, bracket=[0, 0.5]).root)

NameError: name 'c_d_mean' is not defined

##### 7 task

In [272]:
def corr_interval(sample, gamma):
    n = len(sample)
    x, y = zip(*sample)
    rho = np.corrcoef(x, y)[1, 0]
    z_conf = ss.norm.ppf(1 - (1 - gamma) / 2)
    ci_left = np.tanh(np.arctanh(rho) - z_conf/np.sqrt(n))
    ci_right = np.tanh(np.arctanh(rho) + z_conf/np.sqrt(n))
    return [ci_left, ci_right]


data = np.array([(576, 3.39), (635, 3.30), (558, 2.81), (578, 3.03), (666, 3.44),
                 (580, 3.07), (555, 3.0), (661, 3.43), (651, 3.36), (605, 3.13),
                 (653, 3.12), (575, 2.74), (545, 2.76), (572, 2.88), (594, 2.96)])

print(corr_interval(data, 0.95))

[0.4854712460301556, 0.9124958350575495]


##### 8 task

In [277]:
# def corr_coeff(sample):
#     corr_coeff_arr = []
#     for i in range(len(sample)):
#         x, y = zip(*sample[i])
#         corr_coeff_arr.append(np.corrcoef(x,y)[0,1])
#     return corr_coeff_arr

def corr_interval(sample, gamma):
    n = len(sample)
    x, y = zip(*sample)
    rho = np.corrcoef(x, y)[1, 0]
    z_conf = ss.norm.ppf(1 - (1 - gamma) / 2)
    ci_left = np.tanh(np.arctanh(rho) - z_conf/np.sqrt(n))
    ci_right = np.tanh(np.arctanh(rho) + z_conf/np.sqrt(n))
    return [ci_left, ci_right]

def true_proba(n, rho, gamma, number_of_trials):
    alpha = np.sqrt(rho / (1 - rho))
    z = ss.expon.rvs(size=(number_of_trials, n))
    x = ss.expon.rvs(size=(number_of_trials, n)) + alpha * z
    y = ss.expon.rvs(size=(number_of_trials, n)) + alpha * z
    samples = np.stack([x, y], axis=-1)
    corr_interval_arr = [corr_interval(sample, gamma) for sample in samples]
    left, right = zip(*corr_interval_arr)
    left = np.array(left)
    right = np.array(right)
    prob_left = np.sum(rho > right)/number_of_trials
    prob_right = np.sum(rho < left)/number_of_trials
    return prob_right, prob_left

print(true_proba(30, 0.6, 0.95, 10000))

(0.1034, 0.0775)


##### 9 task

In [278]:
import numpy as np


def efron_true_proba(n, rho, gamma, number_of_trials, n_of_resamples):
    alpha = np.sqrt(rho / (1 - rho))
    z = ss.expon.rvs(size=(number_of_trials, n))
    x = ss.expon.rvs(size=(number_of_trials, n)) + alpha * z
    y = ss.expon.rvs(size=(number_of_trials, n)) + alpha * z
    samples = np.stack([x, y], axis=-1)
    
    return efron_left, efron_right


print(efron_true_proba(30, 0.6, 0.95, 10000, 1000))

ValueError: a must be 1-dimensional