## Доверительные интервалы

In [39]:
import numpy as np

In [1]:
from scipy import stats

####  1

In [10]:
round(stats.norm.ppf(1 - (1 - 99.7/100) / 2), 4)

2.9677

####  5

In [110]:
inf_plaz, total_plaz, inf_asp, total_asp = 189, 11034, 104, 11037

p1 = float(inf_plaz) / total_plaz
p2 = float(inf_asp) / total_asp
round(p1 -  p2, 4)

0.0077

####  6

#### Доверительный интервал для разности долей (независимые выборки)

   | $X_1$ | $X_2$  
  ------------- | -------------|
  1  | a | b 
  0  | c | d 
  $\sum$ | $n_1$| $n_2$
  
$$ \hat{p}_1 = \frac{a}{n_1}$$

$$ \hat{p}_2 = \frac{b}{n_2}$$


$$\text{Доверительный интервал для }p_1 - p_2\colon \;\; \hat{p}_1 - \hat{p}_2 \pm z_{1-\frac{\alpha}{2}}\sqrt{\frac{\hat{p}_1(1 - \hat{p}_1)}{n_1} + \frac{\hat{p}_2(1 - \hat{p}_2)}{n_2}}$$

In [103]:
def proportions_confint_diff_ind(inf_plaz, total_plaz, inf_asp, total_asp, alpha = 0.05):    
    z = stats.norm.ppf(1 - alpha / 2.)
    p1 = float(inf_plaz) / total_plaz
    p2 = float(inf_asp) / total_asp
    
    left_boundary = (p1 - p2) - z * np.sqrt(p1 * (1 - p1)/ total_plaz + p2 * (1 - p2)/ total_asp)
    right_boundary = (p1 - p2) + z * np.sqrt(p1 * (1 - p1)/ total_plaz + p2 * (1 - p2)/ total_asp)
    
    return (left_boundary, right_boundary)

In [111]:
interval = proportions_confint_diff_ind(inf_plaz, total_plaz, inf_asp, total_asp, alpha = 0.05)
interval

(0.0046877506750494392, 0.010724297276960124)

In [112]:
round(interval[1], 4)

0.0107

####  7

In [20]:
def odds(p):
    return p / (1 - p)

In [23]:
round(odds(189. / 11034) / odds(104. / 11037), 4)

1.8321

####  8

In [24]:
def get_bootstrap_samples(data, n_samples):
    indices = np.random.randint(0, len(data), (n_samples, len(data)))
    samples = data[indices]
    return samples

In [87]:
def stat_intervals(stat, alpha):
    boundaries = np.percentile(stat, [100 * alpha / 2., 100 * (1 - alpha / 2.)])
    return boundaries

In [74]:
data_asp_kontr = np.array(map(int, list('1' * 104) + list('0' * (11037 - 104))))

In [75]:
data_plaz_test = np.array(map(int, list('1' * 189) + list('0' * (11034 - 189))))

In [76]:
np.random.seed(0)

bootstrap_kontr = get_bootstrap_samples(data_asp_kontr, 1000)
bootstrap_test = get_bootstrap_samples(data_plaz_test, 1000)

In [83]:
odds_kontr = map(odds, bootstrap_kontr.mean(1))
odds_test = map(odds, bootstrap_test.mean(1))

In [84]:
odds_scores = map(lambda x: x[1] / x[0], zip(odds_kontr, odds_test))

In [86]:
len(odds_scores)

1000

In [88]:
stat_intervals(odds_scores, 0.05)

array([ 1.44419465,  2.34321168])

In [89]:
round(stat_intervals(odds_scores, 0.05)[0], 4)

1.4442