In [1]:
import numpy as np
import pandas as pd
from scipy import stats

import slow_statistic

In [38]:
def __calculate_t_statistic_rel__(M_d, SS, n):

    std_error = np.sqrt((SS / (n - 1)) / n)
    t_statistic = (M_d - 0) / std_error
    
    return t_statistic

def __calculate_t_portion_rel__(n, alpha, tail_num):
    
    df = n - 1
    t_portion = round(stats.t.ppf(1 - alpha/tail_num, df=df), 3)
    
    return t_portion

def ttest_rel_from_stats(M_d, SS, n, alpha=0.05, tail_num=2):
    
    t, cr = __calculate_t_statistic_rel__(M_d, SS, n), __calculate_t_portion_rel__(n, alpha, tail_num)

    if tail_num == 2:

        rejection_decision = (t > cr) | (t < -1 * cr)
        region = f't > {cr} or t < -{cr}'
        criteria = f'two tail, alpha {alpha}'

    elif tail_num == 1:

        if t > 0:

            rejection_decision = (t > cr)
            region = f't > {cr}'

        else:

            rejection_decision = (t < -1 * cr)
            region = f't < -{cr}'

        criteria = f'one tail, alpha {alpha}'

    else:
        print('Should use tail_num 1 or 2.')
        return None

    print(f'[{criteria}] t_statistic:{t}, critical_region:{region}\n=> null hypothesis rejection [{rejection_decision}]')

def cohens_d_rel_from_stats(M_d, SS, n):
    
    s = np.sqrt(SS / (n - 1))
    estimated_d = round(M_d / s, 3)
    
    return estimated_d

def r_squared_rel(M_d, SS, n):
    
    t_statistic = __calculate_t_statistic_rel__(M_d, SS, n)
    r_squared = round(t_statistic**2 / (t_statistic**2 + n - 1), 4)
    
    return r_squared

def confidence_interval_rel_from_stats(M_d, SS, n, alpha=0.05, tail_num=2):
    
    std_error = np.sqrt((SS / (n - 1)) / n)
    ci_start = round(M_d - __calculate_t_portion_rel__(n, alpha, tail_num) * std_error, 4)
    ci_end = round(M_d + __calculate_t_portion_rel__(n, alpha, tail_num) * std_error, 4)
    
    print(f'[confidence interval] {ci_start} ~ {ci_end}')

# 유형
1. repeated-measure t test (using statistic)
2. repeated-measure t test (using raw dataset)
3. sample standard deviation/mean difference/size 와 null hypothesis 기각 여부의 관계
4. independent-measure와 repeated-measure 비교

# repeated-measure t test (using statistic)

---
![''](./11_src/11_06_01.png)

![''](./11_src/11_06_02.png)

In [6]:
n = 12
SS = 33
M_d = 1.2

In [7]:
ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:2.4, critical_region:t > 2.201 or t < -2.201
=> null hypothesis rejection [True]


---
![''](./11_src/11_08_01.png)

In [9]:
n = 16
SS = 135
M_d = 2.6

In [10]:
ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:3.466666666666667, critical_region:t > 2.131 or t < -2.131
=> null hypothesis rejection [True]


In [12]:
cohens_d_rel_from_stats(M_d, SS, n)

0.867

---
![''](./11_src/11_09_01.png)

![''](./11_src/11_09_02.png)

In [15]:
n = 25
SS = 150
M_d = 1.32

In [16]:
ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:2.64, critical_region:t > 2.064 or t < -2.064
=> null hypothesis rejection [True]


In [19]:
r_squared_rel(M_d, SS, n)

0.225

---
![''](./11_src/11_10_01.png)

In [39]:
n = 20
SS = 125 * (n - 1)
M_d = 4.8

n, SS, M_d

(20, 2375, 4.8)

In [40]:
ttest_rel_from_stats(M_d, SS, n, tail_num=1)

[one tail, alpha 0.05] t_statistic:1.92, critical_region:t > 1.729
=> null hypothesis rejection [True]


In [41]:
confidence_interval_rel_from_stats(M_d, SS, n, alpha=0.2)

[confidence interval] 1.48 ~ 8.12


---
![''](./11_src/11_11_01.png)

![''](./11_src/11_11_02.png)

In [42]:
n = 16
SS = 2940
M_d = 21

n, SS, M_d

(16, 2940, 21)

In [43]:
ttest_rel_from_stats(M_d, SS, n, alpha=0.01)

[two tail, alpha 0.01] t_statistic:6.0, critical_region:t > 2.947 or t < -2.947
=> null hypothesis rejection [True]


In [44]:
confidence_interval_rel_from_stats(M_d, SS, n)

[confidence interval] 13.5415 ~ 28.4585


---
![''](./11_src/11_12_01.png)

In [46]:
n = 16
SS = 2940
M_d = 21

n, SS, M_d

(16, 2940, 21)

In [47]:
ttest_rel_from_stats(M_d, SS, n, alpha=0.01)

[two tail, alpha 0.01] t_statistic:6.0, critical_region:t > 2.947 or t < -2.947
=> null hypothesis rejection [True]


In [48]:
cohens_d_rel_from_stats(M_d, SS, n)

1.5

---
![''](./11_src/11_13_01.png)

In [50]:
s = 2
n = 25
SS = s**2 * (n - 1)
M_d = 2.7

n, SS, M_d

(25, 96, 2.7)

In [52]:
ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:6.75, critical_region:t > 2.064 or t < -2.064
=> null hypothesis rejection [True]


---
![''](./11_src/11_14_01.png)

![''](./11_src/11_14_02.png)

In [53]:
n = 9
SS = 288
M_d = 7

n, SS, M_d

(9, 288, 7)

In [54]:
ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:3.5, critical_region:t > 2.306 or t < -2.306
=> null hypothesis rejection [True]


In [55]:
confidence_interval_rel_from_stats(M_d, SS, n)

[confidence interval] 2.388 ~ 11.612


# repeated-measure t test (using raw dataset)

---
![''](./11_src/11_07_01.png)

In [84]:
exp_df = pd.DataFrame({'A':[7, 2, 4, 5, 5, 3], 
                       'B':[8, 9, 6, 7, 6, 8]})
exp_df

Unnamed: 0,A,B
0,7,8
1,2,9
2,4,6
3,5,7
4,5,6
5,3,8


In [85]:
diff_sr = (exp_df.A - exp_df.B)

n = len(diff_sr)
M_d = diff_sr.mean()
SS = np.sum([r**2 for r in diff_sr]) - (np.sum(diff_sr)**2 / n)

n, M_d, SS

(6, -3.0, 30.0)

In [86]:
ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:-3.0, critical_region:t > 2.571 or t < -2.571
=> null hypothesis rejection [True]


In [87]:
stats.ttest_rel(exp_df.A, exp_df.B)

Ttest_relResult(statistic=-3.0, pvalue=0.03009924789746257)

---
![''](./11_src/11_15_01.png)

In [88]:
exp_df = pd.DataFrame({'A':[4, 6, 5, 5, 6, 4, 3, 8, 6], 
                       'B':[7, 7, 8, 9, 9, 7, 9, 9, 9]})
exp_df

Unnamed: 0,A,B
0,4,7
1,6,7
2,5,8
3,5,9
4,6,9
5,4,7
6,3,9
7,8,9
8,6,9


In [89]:
diff_sr = (exp_df.A - exp_df.B)

n = len(diff_sr)
M_d = diff_sr.mean()
SS = np.sum([r**2 for r in diff_sr]) - (np.sum(diff_sr)**2 / n)

n, M_d, SS

(9, -3.0, 18.0)

In [90]:
ttest_rel_from_stats(M_d, SS, n, alpha=0.01)

[two tail, alpha 0.01] t_statistic:-6.0, critical_region:t > 3.355 or t < -3.355
=> null hypothesis rejection [True]


In [91]:
stats.ttest_rel(exp_df.A, exp_df.B)

Ttest_relResult(statistic=-6.0, pvalue=0.0003233932218851488)

---
![''](./11_src/11_16_01.png)

![''](./11_src/11_16_02.png)

In [92]:
exp_df = pd.DataFrame({'A':[94, 70, 52, 83, 46, 117, 69, 39, 51, 73], 
                       'B':[59, 61, 47, 60, 35, 92, 53, 30, 56, 61]})
exp_df

Unnamed: 0,A,B
0,94,59
1,70,61
2,52,47
3,83,60
4,46,35
5,117,92
6,69,53
7,39,30
8,51,56
9,73,61


In [93]:
diff_sr = (exp_df.A - exp_df.B)

n = len(diff_sr)
M_d = diff_sr.mean()
SS = np.sum([r**2 for r in diff_sr]) - (np.sum(diff_sr)**2 / n)

n, M_d, SS

(10, 14.0, 1152.0)

In [94]:
ttest_rel_from_stats(M_d, SS, n, alpha=0.01)

[two tail, alpha 0.01] t_statistic:3.913118960624632, critical_region:t > 3.25 or t < -3.25
=> null hypothesis rejection [True]


In [95]:
stats.ttest_rel(exp_df.A, exp_df.B)

Ttest_relResult(statistic=3.913118960624632, pvalue=0.00354788605082344)

In [96]:
r_squared_rel(M_d, SS, n)

0.6298

# sample standard deviation/mean difference/size 와 null hypothesis 기각 여부의 관계

---
![''](./11_src/11_17_01.png)

In [98]:
s = 4
n, M_d = 16, 3
SS = s**2 * (n - 1)

ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:3.0, critical_region:t > 2.131 or t < -2.131
=> null hypothesis rejection [True]


In [99]:
s = 12
n, M_d = 16, 3
SS = s**2 * (n - 1)

ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:1.0, critical_region:t > 2.131 or t < -2.131
=> null hypothesis rejection [False]


> repeated-measure t test에서, sample std가 증가하면 null hypothesis 기각 가능성이 감소한다.

---
![''](./11_src/11_18_01.png)

![''](./11_src/11_18_02.png)

In [100]:
s = 8
n, M_d = 16, 4
SS = s**2 * (n - 1)

ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:2.0, critical_region:t > 2.131 or t < -2.131
=> null hypothesis rejection [False]


In [101]:
s = 8
n, M_d = 16, 10
SS = s**2 * (n - 1)

ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:5.0, critical_region:t > 2.131 or t < -2.131
=> null hypothesis rejection [True]


> repeated-measure t test에서, sample mean difference가 증가하면 null hypothesis 기각 가능성이 증가한다.

---
![''](./11_src/11_19_01.png)

In [102]:
s = 4
n, M_d = 4, 3
SS = s**2 * (n - 1)

ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:1.5, critical_region:t > 3.182 or t < -3.182
=> null hypothesis rejection [False]


In [103]:
s = 4
n, M_d = 16, 3
SS = s**2 * (n - 1)

ttest_rel_from_stats(M_d, SS, n)

[two tail, alpha 0.05] t_statistic:3.0, critical_region:t > 2.131 or t < -2.131
=> null hypothesis rejection [True]


> repeated-measure t test에서, sample size가 증가하면 null hypothesis 기각 가능성이 증가한다.

# independent-measure와 repeated-measure 비교

---
![''](./11_src/11_21_01.png)

![''](./11_src/11_21_02.png)

---
![''](./11_src/11_22_01.png)
![''](./11_src/11_22_02.png)