# Hypothesis Testing

In [136]:
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import pandas as pd

## Unfair coin

### simulation

In [3]:
ONE_CHANCE = 0.55
def flip_coin(sample_size:int)->list[int]:
    return np.random.choice([1, 0], sample_size, p=[ONE_CHANCE, 1-ONE_CHANCE])
flip_coin(10)

array([0, 1, 0, 0, 1, 1, 0, 0, 0, 0])

### Run testing

In [62]:
H0_VALUE = 0.5
def calculate_p_value(sample:list[bool]):
    n = len(sample)
    x_bar = np.mean(sample)
    s = np.std(sample)
    se = s / (n ** 0.5)
    z = abs(H0_VALUE - x_bar) / se
    p_value = 2 * st.norm.cdf(-z)
    confidence_interval = (x_bar - 1.96 * se, x_bar + 1.96 * se)
    return p_value, z, confidence_interval

In [145]:
def run_test(sample_size:int):
    sample = flip_coin(sample_size)
    p_value, z_score, confidence_interval = calculate_p_value(sample)
    output = {
        "sample size": sample_size,
        "p_value": p_value,
        "z score": z_score,
        "confidence interval": confidence_interval,
        "result z test": p_value > 0.05,
        "result confidence interval": confidence_interval[0] < H0_VALUE < confidence_interval[1]
    }
    return output

In [146]:
tmp = list()
for i in [30,100,1000]:
    test_result = run_test(i)
    tmp.append(test_result)

df = pd.DataFrame(tmp)
df['confidence interval'] = df['confidence interval'].apply(lambda x: (round(x[0], 3), round(x[1], 3)))

In [150]:
df

Unnamed: 0,sample size,p_value,z score,confidence interval,result z test,result confidence interval
0,30,0.714393,0.365963,"(0.288, 0.645)",True,True
1,100,0.00709,2.692602,"(0.535, 0.725)",False,False
2,1000,0.026484,2.219038,"(0.504, 0.566)",False,False
