In [8]:
import math
import scipy.stats as st

### Null and Alternative Hypothesis

Rules when determine H0 and Ha:
- Ha cannot include equality
- The asserteds value in H0 should also appear in Ha. i.e They should measure the same value

Correct Example:
- H0: p1 − p2 = −0.1, Ha: p1 − p2 < −0.1
- H0: σ1/σ2 = 1, Ha: σ1/σ2 ≠ 1

Wrong Example:
- H0: σ = 20, Ha: σ ≤ 20 (has equal in Ha)
- H0: σ = 20, Ha: σ ≤ 20
- H0: μ = 120, Ha: μ = 150

### Errors in Hypothesis Testing
Define:
- A **Type I error**  consists of rejecting the null hypothesis H0 when it is True
- A **Type II error** involves not rejecting the null hypothesis H0 when it is False

Changing The rejection Region:
- beta is the probability that the test statistic falls

Proposition:
- Suppose an experiment and a sample size are fixed and a test statistic is chosen. Then decreasing the size of the
rejection region to obtain a smaller value of alpha result in a larger value of beta

In [9]:
# Assume that population has normal distirbution with know sd and mean
def calculate_Z_and_P(x_value, mean, sd, n):
    z_value = (x_value - mean)/(sd /math.sqrt(n))
    print("Z value is ", z_value )
    print("The corresponding P is: ", (st.norm.cdf(z_value), 1-st.norm.cdf(z_value)))
calculate_Z_and_P(x_value= 249.66666666666666, mean=200, sd=145.14903521636796, n=12 )

Z value is  1.185336023501911
The corresponding P is:  (0.8820576920795353, 0.11794230792046467)


In [10]:
st.norm.cdf(-0.02)

0.492021686283098

### Rejection Area
Define:
- If P-value <= alpha, then H0 will be rejected in favor of Ha
- if P-value > alpha, the H0 fails to be rejected

Note:
- P-value is a probability
- This probability is calculated assuming H0 is true
- The smaller P-value, the stronger is the evidence against H0

### Beta and Sample Size Determination
![image](https://drive.google.com/uc?export=view&id=1yDmCgMjVvvOTEuGaf1BKwftUsh5d33eN)

In [11]:
# When Ha: x > x0
def beta_larger(conf, mean, x_value, sd, n):
    print("Beta for level alpha test is ", st.norm.cdf(st.norm.ppf(conf) + (mean- x_value)/(sd /math.sqrt(n))))
# When Ha: x < x0
def beta_smaller(conf, mean, x_value, sd, n):
    # print("Beta for level alpha test is ", 1-st.norm.cdf(-st.norm.ppf(conf) + (mean- x_value)/(sd /math.sqrt(n))))
    print("Beta for level alpha test is ",1-st.norm.cdf((mean- x_value)/(sd /math.sqrt(n))+st.norm.ppf(0.003)))
# When Ha: x != x0
def beta_not_equal(conf, mean, x_value, sd, n):
    return st.norm.cdf(st.norm.ppf(conf/2) + (mean- x_value)/(sd /math.sqrt(n)))-st.norm.cdf(-st.norm.ppf(conf/2) + (mean- x_value)/(sd /math.sqrt(n)))

In [12]:
beta_larger()
print(1+ beta_not_equal(conf = 0.01, mean=5.5, x_value = 5.6,sd= 0.32,n= 16))

TypeError: beta_larger() missing 5 required positional arguments: 'conf', 'mean', 'x_value', 'sd', and 'n'

In [None]:
# sample size n for one-tailed test (upper or lower)
def n_beta_one_tail(alpha, beta, mean, x_value,sd):
    return (sd*(abs(st.norm.ppf(alpha))+abs(st.norm.ppf(beta)))/(mean-x_value))**2
# sample size n for one-tailed test (upper or lower)
def n_beta_double_tail(alpha, beta, mean, x_value,sd):
    return (sd*(abs(st.norm.ppf(alpha/2))+abs((st.norm.ppf(beta))))/(mean-x_value))**2

In [None]:
print(n_beta_one_tail(alpha = 0.003, beta = 0.01, mean = 74, x_value=70, sd =7))
print(n_beta_double_tail(alpha = 0.01, beta = 0.01, mean = 5.5, x_value=5.6, sd = 0.32))

## The One-Sample t test
![image](https://drive.google.com/uc?export=view&id=12fBjdrIJoVAR1T7YY3mMKcJBiMltRAh-)

In [21]:
# !!! For review: check if it is mean-xvalue
from scipy import stats
# Calculate t:
def t_value(x_value, mean, sd, n):
    t_value = (mean-x_value)/(sd/math.sqrt(n))
    probability = stats.t.sf(t_value,n)
    return t_value, probability
t_value, probability = t_value(x_value=96.41, mean=100, sd =8.21184104408587, n = 10)
print("t_value is: ", t_value)
print("probability: ", probability)

t_value is:  1.3824642658153454
probability:  0.0984612017581452


In [20]:
stats.t.sf(-1.3824642658153454,10)

0.9015387982418548

### Beta and sample size for t value

In [None]:
from scipy import stats
stats.t.sf(1.6,7)

## Large Sample population proportion test
![image](https://drive.google.com/uc?export=view&id=1jdt1IoR4D5rbbV-qea7wcICr1IF-p6YE)

In [23]:
def proportion_z(p,p0, n):
    z = (p-p0)/(math.sqrt(p0*(1-p0)/n))
    p_values = stats.norm.sf((z)) #one-sided
    return z, p_values

In [40]:
z, p_value = proportion_z(76/121, 2/3,121)
print("Z value is: ", z)
print("p_value is: ", 1-p_value)

Z value is:  -0.8999540851465136
p_value is:  0.18407234286445195


In [39]:
stats.norm.sf(-0.90)

0.8159398746532405

In [None]:
# Beta:
# for p >p0:
def beta_larger(po,p_prime,n, alpha):
    print("z—alpha", stats.norm.ppf(1-abs(alpha)))
    z = (po-p_prime+stats.norm.ppf(1-abs(alpha))*math.sqrt(po*(1-po)/n))/(math.sqrt(p_prime*(1-p_prime)/n))
    return stats.norm.cdf(z)
# for p<p0
def beta_smaller(po,p_prime,n, alpha):
    z = (po-p_prime+stats.norm.ppf(1-abs(alpha))*math.sqrt(po*(1-po)/n))/(math.sqrt(p_prime*(1-p_prime)/n))
    return 1-stats.norm.cdf(z)
# for p!= po
def beta_not_equal(po,p_prime,n, alpha):
    z_1 =  (po-p_prime+stats.norm.ppf(1-abs(alpha/2))*math.sqrt(po*(1-po)/n))/(math.sqrt(p_prime*(1-p_prime)/n))
    z_2 = (po-p_prime-stats.norm.ppf(1-abs(alpha/2))*math.sqrt(po*(1-po)/n))/(math.sqrt(p_prime*(1-p_prime)/n))
    return  stats.norm.cdf(z_1)- stats.norm.cdf(z_2)

## !!! For review: what is p_prime and po
# sample size
def sample_size_proportion_one_tail(p_prime, po, alpha,beta):
    return ((stats.norm.ppf(1-abs(alpha))*math.sqrt(po*(1-po))+stats.norm.ppf(1-abs(beta))*math.sqrt(p_prime*(1-p_prime)))\
           /(p_prime-po))**2

# sample size
def sample_size_proportion_two_tail(p_prime, po, alpha,beta):
    return ((stats.norm.ppf(1-abs(alpha/2))*math.sqrt(po*(1-po))+stats.norm.ppf(1-abs(beta))*math.sqrt(p_prime*(1-p_prime)))\
           /(p_prime-po))**2

In [None]:
probability = beta_larger(200,300,12,0.05)
probability

In [None]:
sample_size_proportion_two_tail(p_prime=0.1, po=0.05, alpha=0.01,beta=0.1)

In [38]:
stats.binom.cdf(76,121,2/3)+1-stats.binom.cdf((76+(121*2/3-76)*2),121,2/3)

0.3857768881003494

In [35]:
121*2/3

80.66666666666667

## Small Sample population proportion test