In [4]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')

# 두 집단의 평균을 비교하는 검정 방법

## 1-표본 t-검정

### scipy.stats.ttest_1samp
- [바로가기](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_1samp.html#scipy.stats.ttest_1samp)
- 함수 원형
```
scipy.stats.ttest_1samp(
    a, 
    popmean, 
    axis=0, 
    nan_policy='propagate', 
    alternative='two-sided', 
    *, 
    keepdims=False
)
```

In [6]:
from scipy import stats
from scipy.stats import ttest_1samp

In [7]:
rvs = stats.uniform.rvs( size=50 )
rvs

array([0.25691911, 0.43241898, 0.7326889 , 0.63158396, 0.2808299 ,
       0.25246772, 0.56543942, 0.82385406, 0.76467736, 0.4472907 ,
       0.86489482, 0.47100526, 0.51030858, 0.37065626, 0.18033817,
       0.64428006, 0.36896558, 0.55330826, 0.92948127, 0.02073572,
       0.35590362, 0.96954492, 0.46370323, 0.85590093, 0.50330489,
       0.76555636, 0.91585124, 0.16673639, 0.27592016, 0.90471617,
       0.0603725 , 0.45840118, 0.37958094, 0.39954663, 0.77094704,
       0.04780625, 0.48910092, 0.75954953, 0.8612784 , 0.0216658 ,
       0.68094188, 0.19456357, 0.44886555, 0.92208057, 0.86952715,
       0.10335217, 0.23461862, 0.77008103, 0.30421902, 0.2421083 ])

In [8]:
ttest_1samp( rvs, popmean=0.5 )

Ttest_1sampResult(statistic=0.15041820561146496, pvalue=0.8810521735195176)

In [10]:
rvs = stats.norm.rvs( size=50 )
ttest_1samp( rvs, popmean=0.5 )

Ttest_1sampResult(statistic=-3.15977004064562, pvalue=0.00270547830242588)

## 독립표본 t-검정

### scipy.stats.ttest_ind
- [바로가기](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html#scipy.stats.ttest_ind)
- 함수 원형
```
scipy.stats.ttest_ind(
    a, b, 
    axis=0, 
    equal_var=True, 
    nan_policy='propagate', 
    permutations=None, 
    random_state=None, 
    alternative='two-sided', 
    trim=0
)
```

In [11]:
from scipy.stats import ttest_ind

In [17]:
rvs1 = stats.norm.rvs(loc=5, scale=10, size=500 )
rvs2 = stats.norm.rvs(loc=5, scale=10, size=500 )

In [18]:
ttest_ind(rvs1, rvs2)

Ttest_indResult(statistic=-0.5100072468266222, pvalue=0.6101591522653469)

In [23]:
rvs3 = stats.norm.rvs(loc=5, scale=20, size=100 )
print( ttest_ind(rvs1, rvs3) )
print( ttest_ind(rvs1, rvs3, equal_var=False ) )

Ttest_indResult(statistic=-1.057508488707401, pvalue=0.2907065809390743)
Ttest_indResult(statistic=-0.6743455315767852, pvalue=0.501531096392113)


In [25]:
rvs4 = stats.norm.rvs( loc=8, scale=20, size=100 )
print( ttest_ind( rvs1, rvs4 ) )
print( ttest_ind( rvs1, rvs4, equal_var=False ) )

Ttest_indResult(statistic=-2.6730441206912032, pvalue=0.0077218051227199115)
Ttest_indResult(statistic=-1.727698920566838, pvalue=0.08688409419284876)


## 대응표본 t-검정

### scipy.stats.ttest_rel
- [바로가기](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html#scipy.stats.ttest_rel)
- 함수 원형
```
scipy.stats.ttest_rel(
    a, b, 
    axis=0, 
    nan_policy='propagate', 
    alternative='two-sided', 
    *, 
    keepdims=False
)
```

In [26]:
from scipy.stats import ttest_rel

In [27]:
rvs1 = stats.norm.rvs(loc=5, scale=10, size=500 )
rvs2 = (
	stats.norm.rvs(loc=5, scale=10, size=500 ) + 
	stats.norm.rvs(scale=0.2, size=500 )
)

In [28]:
stats.ttest_rel(rvs1, rvs2)

Ttest_relResult(statistic=1.6087429083883615, pvalue=0.10830501950364764)

In [29]:
rvs3 = (
    stats.norm.rvs(loc=8, scale=10, size=500 ) + 
    stats.norm.rvs(scale=0.2, size=500 )
)

In [30]:
stats.ttest_rel(rvs1, rvs3)

Ttest_relResult(statistic=-3.2730639987375207, pvalue=0.0011376410245255264)