# Week 2

In [34]:
import numpy as np
from scipy import stats
import pandas as pd
from sympy import *


## Week 2-1
### Approximation method 1
 When the random variable $X$ follows $\mathcal{B}(n,p)$,  $\hat{p}^∗ = \frac{X+0.5}{n+1}$ follows $\mathcal{N}(p, \frac{p(1-p)}{n})$
 in a similar way.
### Approximation method 2
Let $L$ be a function $L(x)=\rm{ln}\frac{x}{1-x}$. The transformation of numbers by thisfunction is called the **logit transformation**.  
When the random variable $X$ follows $\mathcal{B}(n,p)$,  $L(\mathcal{B}(n,p))$ follows approximately $\mathcal{N}(L(p), \frac{1}{np(1-p)})$

In [6]:
def approx_1(n, p0, X):
    p_hat = (X + 0.5) / (n + 1)
    U0 = (p_hat - p0) / np.sqrt(p0 * (1 - p0) / n)
    return U0


def logit_transform(p):
    assert 0 < p < 1
    return np.log(p / (1-p))


def approx_2(n, p0, X):
    p_hat = (X + 0.5) / (n + 1)
    Lp_hat = logit_transform(p_hat)
    Lp_0 = logit_transform(p0)
    U0 = (Lp_hat - Lp_0) * np.sqrt(n * p0 * (1 - p0))
    return U0

In [29]:
# n, X, p0 = 20, 11, 1/3
# alpha = 0.01

n, X, p0 = 500, 20, 0.05
alpha = 0.05

U_01 = approx_1(n, p0, X)
U_02 = approx_2(n, p0, X)
print("Approximation method 1 = {:.3f}, Approximation method 2 = {:.3f}".format(U_01, U_02))

Approximation method 1 = -0.932, Approximation method 2 = -1.023


 ### Estimation of the parameters of binomial distribution
Estimate the parameter (population) $p$ of the binomial distribution.  
There are two types of parameter estimation: point estimation and interval estimation.   
For point estimation, $\hat{p}=\frac{X}{n}$ or $\hat{p}^∗=\frac{X+0.5}{n+1}$ is used as the estimator. 
$$
\rm{Pr}\left\{z(\frac{\alpha}{2})≤U_0≤z(1−\frac{\alpha}{2})\right\}\approx 1−\alpha
$$

In [33]:
# Calculate the rejection region
n_statistic = stats.norm.ppf(1 - alpha/2)
print("rejection region statistic={:.3f}".format(n_statistic))
alpha_half = alpha / 2
n_statistic_left = stats.norm.ppf(alpha_half)
n_statistic_right = stats.norm.ppf(1 - alpha_half)
p_hs = (X + 0.5) / (n + 1)

# Method 1
var = np.sqrt(p_hs * (1 - p_hs) / n)
lower_interval1 = n_statistic_left * var + p_hs
upper_interval1 = n_statistic_right * var + p_hs
print("Method 1 Interval=({:.3f}, {:.3f})".format(lower_interval1, upper_interval1))

# Method 2
Lp_hs = logit_transform(p_hs)
var = 1 / np.sqrt(n * p_hs * (1 - p_hs))
lower_interval2 = 1 / (1 + np.exp(-(n_statistic_left * var + Lp_hs)))
upper_interval2 = 1 / (1 + np.exp(-(n_statistic_right * var + Lp_hs)))

print("Method 2 Interval=({:.3f}, {:.3f})".format(lower_interval2, upper_interval2))

rejection region statistic=1.960
Method 1 Interval=(0.024, 0.058)
Method 2 Interval=(0.027, 0.062)


## Week 2-2