In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In [132]:
def calculate_zscore(n, mu, std, M, alpha=0.05, tail_num=2):
    se = std / np.sqrt(n)
    return round((M - mu) / se, 2)

def calculate_norm_portion(alpha, tail_num):
    return round(stats.norm.ppf(1 - alpha/tail_num), 3)

def hypothesis_tester_basic(n, mu, std, M, alpha=0.05, tail_num=2):
    
    z, cr = calculate_zscore(n, mu, std, M), calculate_norm_portion(alpha, tail_num)
    
    if tail_num == 2:
        
        rejection_decision = (z > cr) | (z < -1 * cr)
        region = f'z > {cr} or z < -{cr}'
        criteria = f'two tail, alpha {alpha}'
        
    elif tail_num == 1:
        
        if z > 0:
        
            rejection_decision = (z > cr)
            region = f'z > {cr}'
            
        else:
            
            rejection_decision = (z < -1 * cr)
            region = f'z < -{cr}'
        
        criteria = f'one tail, alpha {alpha}'
        
    else:
        print('Should use tail_num 1 or 2.')
        return None
            
    print(f'[{criteria}] z_statistic:{z}, critical_region:{region}\n=> null hypothesis rejection [{rejection_decision}]')
    
def calculate_cohens_d(mu, std, M):
    return round(abs((M - mu) / std), 2)

def calculate_stat_power(n, mu, std, M):
    se = std / np.sqrt(n)
    z = ((mu + 1.96 * se) - M) / se
    return round(1 - stats.norm.cdf(z), 4)

# 유형
1. 가설검정 단계에 따라, Treatment 효과(M - mu) 통계적 유의성(significance)를 판단하는 문제
2. 단측(one-sided test) / 양측(two-sided test), 유의수준(alpha)에 따른 결과 차이 확인 문제
3. cohens d를 구하는 문제
4. 표준편차(sigma), 샘플 개수(n)에 따른 귀무가설(null hypothesis) 기각의 관계
5. 통계적 검증력 (statistical power)

#### 세광님
- cohens d

### 가설검정 단계에 따라, Treatment 효과(M - mu) 통계적 유의성(significance)를 판단

---
![''](./08_src/08_06_01.png)

![''](./08_src/08_06_02.png)

In [146]:
n, mu, std, M = 16, 50, 12, 54

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:1.33, critical_region:z > 1.96 or z < -1.96
=> null hypothesis rejection [False]


---
![''](./08_src/08_07_01.png)

In [54]:
n, mu, std, M = 64, 14, 4.8, 12.5

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:-2.5, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [True]


---
![''](./08_src/08_08_01.png)

In [55]:
n, mu, std, M = 100, 50, 15, 53.8

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:2.53, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [True]


In [56]:
calculate_cohens_d(mu, std, M)

0.25

---
![''](./08_src/08_15_01.png)

In [80]:
n, mu, std, M = 36, 400, 40, 392

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:-1.2, critical_region:z > 1.96 or z < -1.96
=> null hypothesis rejection [False]


In [83]:
calculate_cohens_d(mu, std, M)

0.2

---
![''](./08_src/08_18_01.png)

In [87]:
n, mu, std, M = 16, 45, 9, 50.2

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:2.31, critical_region:z > 1.96 or z < -1.96
=> null hypothesis rejection [True]


In [88]:
calculate_cohens_d(mu, std, M)

0.58

### 표준편차(sigma), 샘플 개수(n)에 따른 귀무가설(null hypothesis) 기각의 관계

---
![''](./08_src/08_09_01.png)

In [57]:
n, mu, std, M = 36, 71, 12, 76

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:2.5, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [True]


In [58]:
n, mu, std, M = 36, 71, 18, 76

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:1.67, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [False]


---
![''](./08_src/08_10_01.png)

![''](./08_src/08_10_02.png)

In [59]:
n, mu, std, M = 16, 30, 8, 33

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:1.5, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [False]


In [60]:
n, mu, std, M = 64, 30, 8, 33

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:3.0, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [True]


---
![''](./08_src/08_11_01.png)

In [61]:
n, mu, std, M = 25, 40, 5, 44

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:4.0, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [True]


In [62]:
n, mu, std, M = 25, 40, 15, 44

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:1.33, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [False]


### 단측(one-sided test) / 양측(two-sided test), 유의수준(alpha)에 따른 결과 차이

---
![''](./08_src/08_12_01.png)

In [75]:
n, mu, std, M = 36, 4.22, 0.6, 4.48

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:2.6, critical_region:z > 1.96 or z < -1.96
=> null hypothesis rejection [True]


In [76]:
n, mu, std, M = 25, 4.22, 0.6, 4.01

hypothesis_tester_basic(n, mu, std, M, alpha=0.05, tail_num=1)

[one tail, alpha 0.05] z_statistic:-1.75, critical_region:z < -1.645
=> null hypothesis rejection [True]


---
![''](./08_src/08_14_01.png)

In [77]:
n, mu, std, M = 25, 400, 40, 422

hypothesis_tester_basic(n, mu, std, M, alpha=0.01)

[two tail, alpha 0.01] z_statistic:2.75, critical_region:z > 2.576 or z < -2.576
=> null hypothesis rejection [True]


In [78]:
n, mu, std, M = 25, 400, 40, 422

hypothesis_tester_basic(n, mu, std, M, alpha=0.01, tail_num=1)

[one tail, alpha 0.01] z_statistic:2.75, critical_region:z > 2.326
=> null hypothesis rejection [True]


In [79]:
calculate_cohens_d(mu, std, M)

0.55

---
![''](./08_src/08_16_01.png)

![''](./08_src/08_16_02.png)

In [84]:
n, mu, std, M = 4, 9.6, 1.9, 12.25

hypothesis_tester_basic(n, mu, std, M, alpha=0.05, tail_num=1)

[one tail, alpha 0.05] z_statistic:2.79, critical_region:z > 1.645
=> null hypothesis rejection [True]


---
![''](./08_src/08_17_01.png)

In [85]:
n, mu, std, M = 20, 500, 100, 562

hypothesis_tester_basic(n, mu, std, M, alpha=0.01, tail_num=1)

[one tail, alpha 0.01] z_statistic:2.77, critical_region:z > 2.326
=> null hypothesis rejection [True]


In [86]:
calculate_cohens_d(mu, std, M)

0.62

### 통계적 검증력 (statistical power)

---
![''](./08_src/08_19_01.png)

![''](./08_src/08_19_02.png)

In [118]:
n, mu, std, M = 9, 40, 12, 46

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:1.5, critical_region:z > 1.96 or z < -1.96
=> null hypothesis rejection [False]


In [126]:
calculate_stat_power(n, mu, std, M)

0.3228

In [127]:
n, mu, std, M = 16, 40, 12, 46

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:2.0, critical_region:z > 1.96 or z < -1.96
=> null hypothesis rejection [True]


In [128]:
calculate_stat_power(n, mu, std, M)

0.516

---
![''](./08_src/08_20_01.png)

In [133]:
n, mu, std, M = 9, 240, 30, 210

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:-3.0, critical_region:z > 1.96 or z < -1.96
=> null hypothesis rejection [True]


In [134]:
calculate_stat_power(n, mu, std, M)

0.0

In [139]:
# def calculate_stat_power(n, mu, std, M):
se = std / np.sqrt(n)
z = ((mu + -1.96 * se) - M) / se
# return round(1 - stats.norm.cdf(z), 4)

In [144]:
(mu + -1.96 * se)

220.4

In [143]:
z

1.0400000000000005

In [145]:
stats.norm.cdf(z)

0.8508300496690187