In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In [132]:
def calculate_zscore(n, mu, std, M, alpha=0.05, tail_num=2):
    se = std / np.sqrt(n)
    return round((M - mu) / se, 2)

def calculate_norm_portion(alpha, tail_num):
    return round(stats.norm.ppf(1 - alpha/tail_num), 3)

def hypothesis_tester_basic(n, mu, std, M, alpha=0.05, tail_num=2):
    
    z, cr = calculate_zscore(n, mu, std, M), calculate_norm_portion(alpha, tail_num)
    
    if tail_num == 2:
        
        rejection_decision = (z > cr) | (z < -1 * cr)
        region = f'z > {cr} or z < -{cr}'
        criteria = f'two tail, alpha {alpha}'
        
    elif tail_num == 1:
        
        if z > 0:
        
            rejection_decision = (z > cr)
            region = f'z > {cr}'
            
        else:
            
            rejection_decision = (z < -1 * cr)
            region = f'z < -{cr}'
        
        criteria = f'one tail, alpha {alpha}'
        
    else:
        print('Should use tail_num 1 or 2.')
        return None
            
    print(f'[{criteria}] z_statistic:{z}, critical_region:{region}\n=> null hypothesis rejection [{rejection_decision}]')
    
def calculate_cohens_d(mu, std, M):
    return round(abs((M - mu) / std), 2)

def calculate_stat_power(n, mu, std, M):
    se = std / np.sqrt(n)
    z = ((mu + 1.96 * se) - M) / se
    return round(1 - stats.norm.cdf(z), 4)

# 유형
1. t test - 가설검정 단계에 따라, Treatment 효과(M - mu) 통계적 유의성(significance)를 판단
2. 표준편차(sigma), 샘플 개수(n)에 따른 귀무가설(null hypothesis) 기각의 관계
3. 단측(one-sided test) / 양측(two-sided test), 유의수준(alpha)에 따른 결과 차이
4. 신뢰구간(confidence interval) 구하는 법
5. 결정계수(r sqaured) 구하는 법
6. 샘플 수(n)과 결정계수(r squared)의 관계
7. raw datapoint 기반 문제풀이 

### t test - 가설검정 단계에 따라, Treatment 효과(M - mu) 통계적 유의성(significance)를 판단

---
![''](./09_src/09_09_01.png)

In [53]:
n, mu, std, M = 16, 50, 12, 54

hypothesis_tester_basic(n, mu, std, M)

[two tail, alpha 0.05] z_statistic:1.33, critical_region:-1.96 < z < 1.96
=> null hypothesis rejection [False]


---
![''](./09_src/09_10_01.png)

### 표준편차(sigma), 샘플 개수(n)에 따른 귀무가설(null hypothesis) 기각의 관계

---
![''](./09_src/09_11_01.png)

---
![''](./09_src/09_12_01.png)

![''](./09_src/09_12_02.png)

### 단측(one-sided test) / 양측(two-sided test), 유의수준(alpha)에 따른 결과 차이

---
![''](./09_src/09_13_01.png)

---
![''](./09_src/09_16_01.png)

---
![''](./09_src/09_21_01.png)

### 신뢰구간(confidence interval), 결정계수(r sqaured) 구하는 법

---
![''](./09_src/09_14_01.png)

![''](./09_src/09_14_02.png)

---
![''](./09_src/09_15_01.png)

---
![''](./09_src/09_17_01.png)

![''](./09_src/09_17_02.png)

---
![''](./09_src/09_20_01.png)

![''](./09_src/09_20_02.png)

### 샘플 수(n)과 결정계수(r squared)의 관계

---
![''](./09_src/09_18_01.png)

---
![''](./09_src/09_19_01.png)

### raw datapoint 기반 문제풀이

---
![''](./09_src/09_22_01.png)

![''](./09_src/09_22_02.png)

---
![''](./09_src/09_23_01.png)

![''](./09_src/09_23_02.png)