## Chapter 7
 - Confidence Interval for Population Proportion
 - z critical value calculator
 - t critical value calculator
 - chi-sqaured value calculator

In [5]:
import scipy.stats as st
import numpy as np

In [4]:
# p is the portion; alpha is the confidence value, n is the size of population
# Note: two tails
def CI_proportion(p, alpha, n):
    z_value = st.norm.ppf(1-(1-alpha)/2)
    p_tilda = (p + z_value**2/(2*n))/(1+z_value**2/n)
    q = 1-p
    next_part = z_value*(np.sqrt(p*q/n + (z_value**2)/(4*(n**2)))/(1+((z_value**2)/n)))
    ci = (p_tilda - next_part, p_tilda + next_part)
    return p_tilda, next_part, ci
p = 16/48
alpha = 0.95
n = 48
p_tilda, next_part, CI = CI_proportion(p,alpha,n)
print("p_tilda is ", p_tilda)
print("next_part is ", next_part)
print("Confidence Interval is ", CI)

p_tilda is  0.34568335494435287
next_part is  0.12891554799616722
Confidence Interval is  (0.21676780694818565, 0.4745989029405201)


In [42]:
# calculate z critical value
def z_critical_val(alpha):
    return st.norm.ppf(1-(1-alpha)/2)
alpha = 0.95
z_critical = z_critical_val(alpha)
z_critical

1.959963984540054

In [9]:
# calculate t critical value
def t_critical_val(alpha, dof):
    return st.t.ppf(1-(1-alpha)/2, dof)
alpha = 0.90
n = 30
degree_of_freedom = n - 2
t_critical = t_critical_val(alpha,degree_of_freedom)
t_critical

1.701130934265931

In [44]:
# calculate chi-square critical value
def chi_square_critical_val(alpha, dof):
    return st.chi2.ppf(1-(1-alpha)/2, dof)
alpha = 0.95
n = 17
degree_of_freedom = n - 1
chi_square_critl = chi_square_critical_val(alpha,degree_of_freedom)
chi_square_critl
# Be careful with calculate CI for vairance
# st.chi2.ppf( alpha/2 , degree_of_freedom)

28.845350723404753

## Chapter 8
- Type II error probability for proportion test
- calculate sample size n that satisfies beta

In [45]:
# po is the proportion stated in the null hypothesis
# p' is the error proportion introduced here
# alpha should be smaller than 0.5 such as 0.1, 0.05
def type_II_proportion(p0,p_prime, alpha, n, mode):
    if mode == 'larger':
        z_critical = st.norm.ppf(1-alpha)
        z = (p0-p_prime+z_critical*np.sqrt(p0*(1-p0)/n))/(np.sqrt(p_prime*(1-p_prime)/n))
        return st.norm.cdf(z)
    elif mode == 'smaller':
        z_critical = st.norm.ppf(1-alpha)
        z = (p0-p_prime-z_critical*np.sqrt(p0*(1-p0)/n))/(np.sqrt(p_prime*(1-p_prime)/n))
        return 1-st.norm.cdf(z)
    elif mode == 'not_equal':
        z_critical = st.norm.ppf(1-alpha/2)
        z1 = (p0-p_prime+z_critical*np.sqrt(p0*(1-p0)/n))/(np.sqrt(p_prime*(1-p_prime)/n))
        z2 = (p0-p_prime-z_critical*np.sqrt(p0*(1-p0)/n))/(np.sqrt(p_prime*(1-p_prime)/n))
        return st.norm.cdf(z1)- (1-st.norm.cdf(z2))
    else:
        raise Exception('wrong mode')

In [46]:
p0 = 0.9
p_prime = 0.8
alpha = 0.01
n = 225
mode = "smaller"
type_II_proportion(p0,p_prime, alpha, n, mode)

0.022468746245518756

Sample size n for which the level $\alpha$ test also satisfies $\beta(p^{\prime}) = \beta$

In [47]:
# alpha should be smaller than 0.5 such as 0.1, 0.05
def sample_size_for_beta_proportion(alpha, beta, p0,p_prime, mode):
    if mode == 'one_tail':
        z_alpha = st.norm.ppf(1-alpha)
    elif mode == 'two_tail':
        z_alpha = st.norm.ppf(1-alpha/2)
    else:
        raise Exception('wrong mode')
    z_beta = st.norm.ppf(1-beta)
    return ((z_alpha*np.sqrt(p0*(1-p0))+z_beta*np.sqrt(p_prime*(1-p_prime)))\
       /(p_prime-p0))**2

In [48]:
p0 = 0.9
p_prime = 0.8
alpha = 0.01
beta = 0.01
n = 225
mode = "one_tail"
sample_size_for_beta_proportion(alpha,beta, p0,p_prime,mode)


265.18282712166274

## Chapter 9
 - t test statistic for two-sample t test
 - degree of freedom for t test statistic
 - pooled variance
 - type II error probability for Two sample Proportion
 - sample size for type II of two sample proportion

In [49]:
# X is the mean value for sample 1, Y is the mean value for sample2, s1 is the std for sample 1, s2 is the std for sample 2
# m is the size of sample 1 and n is the size of sample 2
def t_statistic_two_sample_t(X, Y, diff, s1, s2, m, n):
    return (X-Y-diff)/(np.sqrt(s1**2/m + s2**2/n))

In [50]:
mean1 = 51.71
mean2 = 136.14
diff = 0
s1 = 0.79
s2 = 3.59
m = 10
n = 10
t_statistic_two_sample_t(mean1, mean2, diff, s1, s2, m, n)

-72.63296227738981

In [51]:
def degree_of_freedom_two_sample_t(X, Y, diff, s1, s2, m, n):
    se1 = s1/np.sqrt(m)
    se2 = s2/np.sqrt(n)
    dof = (se1**2+ se2**2)**2/(se1**4/(m-1) + se2**4 / (n-1))
    return dof

In [52]:
mean1 = 51.71
mean2 = 136.14
diff = 0
s1 = 0.79
s2 = 3.59
m = 10
n = 10
degree_of_freedom_two_sample_t(mean1, mean2, diff, s1, s2, m, n)

9.869602122157502

In [58]:
def pooled_variance_for_two_sample_t(s1, s2, m, n):
    sp_squared = ((m-1)/(m+n-2))*s1**2 + ((n-1)/(m+n-2))*s2**2
    return np.sqrt(sp_squared)

2.2621571627409915

In [None]:
s1 = 1.5
s2 = 1.7
m = 10
n = 10
pooled_variance_for_two_sample_t(s1,s2, m, n)

In [None]:
# alpha should be smaller than 0.5 such as 0.1, 0.05
def type_II_error_for_two_sample_proportion(p1,p2,m,n,alpha,mode):
    q1 = 1-p1
    q2 = 1-p2
    p = (m*p1 + n*p2)/(m+n)
    q = (m*q1 + n*q2)/(m+n)
    variance = np.sqrt(p1*q1/m + p2*q2/n)
    if mode == 'larger':
        z_alpha = st.norm.ppf(1-alpha)
        z_val =z_alpha*(np.sqrt(p*q*(1/m+1/n))-(p1-p2))/variance
        return st.norm.cdf(z_val)
    if mode == 'smaller':
        z_alpha = st.norm.ppf(1-alpha)
        z_val =-z_alpha*(np.sqrt(p*q*(1/m+1/n))-(p1-p2))/variance
        return 1-st.norm.cdf(z_val)
    if mode == 'not_equal':
        z_alpha = st.norm.ppf(1-alpha/2)
        z_val1 = z_alpha*(np.sqrt(p*q*(1/m+1/n))-(p1-p2))/variance
        z_val2 = -z_alpha*(np.sqrt(p*q*(1/m+1/n))-(p1-p2))/variance
        return st.norm.cdf(z_val1)-st.norm.cdf(z_val2)

In [1]:
# d is p1-p2
def sample_size_for_beta_of_two_sample_proportion(alpha,beta,p1, p2,d):
    z_alpha = st.norm.ppf(1-alpha)
    z_beta =  st.norm.ppf(1-beta)
    q1 = 1-p1
    q2 = 1-p2
    n = (z_alpha*np.sqrt((p1+p2)*(q1+q2)/2)+z_beta*np.sqrt(p1*q1+p2*q2))**2/(d**2)
    return n

### Chapter 10
- [Anova](https://goodcalculators.com/one-way-anova-calculator/)
