<img src="https://i.esdrop.com/d/7o0dj05m8rnz/JNGCMedl18.png" width="45%">

# Student's t-test

1. One-sample t-test
2. Independent two-sample t-test
3. Paired samples t-test

---

## Import Packages
- Visual Python: Data Analysis > Import

In [1]:
# Visual Python: Data Analysis > Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

## 1 One-sample t-test

In [2]:
# Visual Python: Data Analysis > File
df1 = pd.read_csv('./data/08_1_일표본t검정.csv')
df1

Unnamed: 0,신장
0,148.0
1,150.0
2,149.0
3,144.0
4,152.0
5,150.0
6,155.0
7,147.0
8,148.0
9,151.0


In [3]:
# Visual Python: Student's t-test
# One-sample t-test
vp_df = df1.dropna().copy()

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.shapiro(vp_df['신장'])
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue},index=['Normality test (Shapiro-Wilk)']))

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'N':vp_df['신장'].size,'Mean':vp_df['신장'].mean(),
                           'Std. Deviation':vp_df['신장'].std(),
                           'Std. Error Mean':vp_df['신장'].std()/np.sqrt(vp_df['신장'].size)},
                     index=['Statistics']))

# One-sample t-test
_res = stats.ttest_1samp(vp_df['신장'], popmean=150, alternative='two-sided')
_lower, _upper = _res.confidence_interval(confidence_level=0.95)
display(Markdown('### One-sample t-test'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'dof':_res.df,'Alternative':'two-sided',
                           'p-value':_res.pvalue,'Test Value':150,'Mean difference':vp_df['신장'].mean()-150,
                           'Confidence interval':0.95,'Lower':_lower,'Upper':_upper},
                     index=['One-sample t-test']))

### Normality test (Shapiro-Wilk)

Unnamed: 0,Statistic,p-value
Normality test (Shapiro-Wilk),0.907768,0.013077


### Statistics

Unnamed: 0,N,Mean,Std. Deviation,Std. Error Mean
Statistics,30,149.766667,4.847206,0.884975


### One-sample t-test

Unnamed: 0,Statistic,dof,Alternative,p-value,Test Value,Mean difference,Confidence interval,Lower,Upper
One-sample t-test,-0.263661,29,two-sided,0.793906,150,-0.233333,0.95,147.95669,151.576643


## 2 Independent two-sample t-test

In [4]:
# Visual Python: Data Analysis > File
df2 = pd.read_csv('./data/08_2_독립표본t검정.csv')
df2

Unnamed: 0,id,성별,수학성적,국어성적
0,1,남성,78.0,88.0
1,2,남성,84.0,76.0
2,3,남성,76.0,74.0
3,4,남성,88.0,90.0
4,5,남성,70.0,69.0
5,6,남성,75.0,74.0
6,7,남성,77.0,66.0
7,8,남성,79.0,77.0
8,9,남성,80.0,78.0
9,10,남성,84.0,85.0


In [5]:
# Visual Python: Student's t-test
# Independent two-sample t-test
vp_df1 = df2[(df2['성별'] == '남성')]['수학성적'].dropna().copy()
vp_df2 = df2[(df2['성별'] == '여성')]['수학성적'].dropna().copy()

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res1 = stats.shapiro(vp_df1)
_res2 = stats.shapiro(vp_df2)
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':[_res1.statistic,_res2.statistic],'p-value':[_res1.pvalue,_res2.pvalue]},
                    index=[['Normality test (Shapiro-Wilk)' for i in range(2)],['Variable1','Variable2']]))

# Equal Variance test (Levene)
display(Markdown('### Equal Variance test (Levene)'))
_res = stats.levene(vp_df1, vp_df2, center='mean')
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue}, index=['Equal Variance test (Levene)']))

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'N':[vp_df1.size,vp_df2.size],
                           'Mean':[vp_df1.mean(),vp_df2.mean()],
                           'Std. Deviation':[vp_df1.std(),vp_df2.std()],
                           'Std. Error mean':[vp_df1.std()/np.sqrt(vp_df1.size),
                                              vp_df2.std()/np.sqrt(vp_df2.size )]},
                     index=[['Statistics' for i in range(2)],['Variable1','Variable2']]))

# Independent two-sample t-test
_res1 = stats.ttest_ind(vp_df1, vp_df2, equal_var=True,  alternative='two-sided')
_res2 = stats.ttest_ind(vp_df1, vp_df2, equal_var=False, alternative='two-sided')
display(Markdown('### Independent two-sample t-test'))
display(pd.DataFrame(data={'Statistic':[_res1.statistic,_res2.statistic],'Alternative':['two-sided' for i in range(2)],
                           'p-value':[_res1.pvalue,_res2.pvalue],
                           'Mean difference':[vp_df1.mean()-vp_df2.mean() for i in range(2)]},
                     index=[['Independent two-sample t-test' for i in range(2)],['Equal variance' for i in range(2)],[True,False]]))
display(Markdown('If equal_var is False, perform Welch\'s t-test, which does not assume equal population variance'))

### Normality test (Shapiro-Wilk)

Unnamed: 0,Unnamed: 1,Statistic,p-value
Normality test (Shapiro-Wilk),Variable1,0.958391,0.66446
Normality test (Shapiro-Wilk),Variable2,0.932664,0.298956


### Equal Variance test (Levene)

Unnamed: 0,Statistic,p-value
Equal Variance test (Levene),1.556848,0.222462


### Statistics

Unnamed: 0,Unnamed: 1,N,Mean,Std. Deviation,Std. Error mean
Statistics,Variable1,15,80.666667,5.627314,1.452966
Statistics,Variable2,15,84.533333,7.927048,2.046755


### Independent two-sample t-test

Unnamed: 0,Unnamed: 1,Unnamed: 2,Statistic,Alternative,p-value,Mean difference
Independent two-sample t-test,Equal variance,True,-1.540478,two-sided,0.13467,-3.866667
Independent two-sample t-test,Equal variance,False,-1.540478,two-sided,0.135883,-3.866667


If equal_var is False, perform Welch's t-test, which does not assume equal population variance

## 3 Paired samples t-test

In [6]:
# Visual Python: Data Analysis > File
df3 = pd.read_csv('./data/08_3_대응표본t검정.csv')
df3

Unnamed: 0,id,사전체력,사후체력
0,1,50.0,36.67
1,2,50.0,61.67
2,3,67.5,85.00
3,4,95.0,75.00
4,5,67.5,75.00
...,...,...,...
145,146,90.0,83.33
146,147,72.5,75.00
147,148,57.5,66.67
148,149,80.0,88.33


In [7]:
# Visual Python: Student's t-test
# Paired samples t-test
vp_df = df3.dropna().copy()

try: vp_df['사전체력'].reset_index(drop=True, inplace=True)
except: pass
try: vp_df['사후체력'].reset_index(drop=True, inplace=True)
except: pass

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.shapiro(vp_df['사전체력']-vp_df['사후체력'])
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue},
                     index=['Normality test (Shapiro-Wilk): Paired differences']))

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'N':[vp_df['사전체력'].size,vp_df['사후체력'].size,vp_df['사전체력'].size],
                   'Mean':[vp_df['사전체력'].mean(),vp_df['사후체력'].mean(),(vp_df['사전체력']-vp_df['사후체력']).mean()],
                   'Std. Deviation':[vp_df['사전체력'].std(),vp_df['사후체력'].std(),(vp_df['사전체력']-vp_df['사후체력']).std()],
                   'Std. Error mean':[vp_df['사전체력'].std()/np.sqrt(vp_df['사전체력'].size),
                                      vp_df['사후체력'].std()/np.sqrt(vp_df['사후체력'].size),
                                      (vp_df['사전체력']-vp_df['사후체력']).std()/np.sqrt(vp_df['사전체력'].size)]},
             index=[['Statistics' for i in range(3)],['Variable1','Variable2','Paired differences']]))

# Paired samples t-test
_res = stats.ttest_rel(vp_df['사전체력'], vp_df['사후체력'], alternative='two-sided')
_lower, _upper = _res.confidence_interval(confidence_level=0.95)
display(Markdown('### Paired samples t-test'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'dof':_res.df,'Alternative':'two-sided',
                           'p-value':_res.pvalue,'Mean difference':(vp_df['사전체력']-vp_df['사후체력']).mean(),
                           'Confidence interval':0.95,'Lower':_lower,'Upper':_upper},
                     index=['Paired samples t-test']))

### Normality test (Shapiro-Wilk)

Unnamed: 0,Statistic,p-value
Normality test (Shapiro-Wilk): Paired differences,0.993535,0.740626


### Statistics

Unnamed: 0,Unnamed: 1,N,Mean,Std. Deviation,Std. Error mean
Statistics,Variable1,150,64.066667,21.60806,1.764291
Statistics,Variable2,150,68.244867,16.007447,1.307003
Statistics,Paired differences,150,-4.1782,17.385172,1.419493


### Paired samples t-test

Unnamed: 0,Statistic,dof,Alternative,p-value,Mean difference,Confidence interval,Lower,Upper
Paired samples t-test,-2.943445,149,two-sided,0.003766,-4.1782,0.95,-6.983138,-1.373262


---

In [8]:
# End of file