# Student's t-test

1. One-sample t-test
2. Independent two-sample t-test
3. Paired samples t-test

---

## Import Packages
- Visual Python: Data Analysis > Import

In [None]:
# Visual Python: Data Analysis > Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

## 1 One-sample t-test

In [None]:
# Visual Python: Data Analysis > File
df1 = pd.read_csv('./data/08_1_일표본t검정.csv')
df1

In [None]:
# Visual Python: Student's t-test
# One-sample t-test
vp_df = df1.dropna().copy()

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.shapiro(vp_df['신장'])
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue},index=['Normality test (Shapiro-Wilk)']))

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'N':vp_df['신장'].size,'Mean':vp_df['신장'].mean(),
                           'Std. Deviation':vp_df['신장'].std(),
                           'Std. Error Mean':vp_df['신장'].std()/np.sqrt(vp_df['신장'].size)},
                     index=['Statistics']))

# One-sample t-test
_res = stats.ttest_1samp(vp_df['신장'], popmean=150, alternative='two-sided')
_lower, _upper = _res.confidence_interval(confidence_level=0.95)
display(Markdown('### One-sample t-test'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'dof':_res.df,'Alternative':'two-sided',
                           'p-value':_res.pvalue,'Test Value':150,'Mean difference':vp_df['신장'].mean()-150,
                           'Confidence interval':0.95,'Lower':_lower,'Upper':_upper},
                     index=['One-sample t-test']))

## 2 Independent two-sample t-test

In [None]:
# Visual Python: Data Analysis > File
df2 = pd.read_csv('./data/08_2_독립표본t검정.csv')
df2

In [None]:
# Visual Python: Student's t-test
# Independent two-sample t-test
vp_df1 = df2[(df2['성별'] == '남성')]['수학성적'].dropna().copy()
vp_df2 = df2[(df2['성별'] == '여성')]['수학성적'].dropna().copy()

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res1 = stats.shapiro(vp_df1)
_res2 = stats.shapiro(vp_df2)
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':[_res1.statistic,_res2.statistic],'p-value':[_res1.pvalue,_res2.pvalue]},
                    index=[['Normality test (Shapiro-Wilk)' for i in range(2)],['Variable1','Variable2']]))

# Equal Variance test (Levene)
display(Markdown('### Equal Variance test (Levene)'))
_res = stats.levene(vp_df1, vp_df2, center='mean')
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue}, index=['Equal Variance test (Levene)']))

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'N':[vp_df1.size,vp_df2.size],
                           'Mean':[vp_df1.mean(),vp_df2.mean()],
                           'Std. Deviation':[vp_df1.std(),vp_df2.std()],
                           'Std. Error mean':[vp_df1.std()/np.sqrt(vp_df1.size),
                                              vp_df2.std()/np.sqrt(vp_df2.size )]},
                     index=[['Statistics' for i in range(2)],['Variable1','Variable2']]))

# Independent two-sample t-test
_res1 = stats.ttest_ind(vp_df1, vp_df2, equal_var=True,  alternative='two-sided')
_res2 = stats.ttest_ind(vp_df1, vp_df2, equal_var=False, alternative='two-sided')
display(Markdown('### Independent two-sample t-test'))
display(pd.DataFrame(data={'Statistic':[_res1.statistic,_res2.statistic],'Alternative':['two-sided' for i in range(2)],
                           'p-value':[_res1.pvalue,_res2.pvalue],
                           'Mean difference':[vp_df1.mean()-vp_df2.mean() for i in range(2)]},
                     index=[['Independent two-sample t-test' for i in range(2)],['Equal variance' for i in range(2)],[True,False]]))
display(Markdown('If equal_var is False, perform Welch\'s t-test, which does not assume equal population variance'))

## 3 Paired samples t-test

In [None]:
# Visual Python: Data Analysis > File
df3 = pd.read_csv('./data/08_3_대응표본t검정.csv')
df3

In [None]:
# Visual Python: Student's t-test
# Paired samples t-test
vp_df = df3.dropna().copy()

try: vp_df['사전체력'].reset_index(drop=True, inplace=True)
except: pass
try: vp_df['사후체력'].reset_index(drop=True, inplace=True)
except: pass

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.shapiro(vp_df['사전체력']-vp_df['사후체력'])
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue},
                     index=['Normality test (Shapiro-Wilk): Paired differences']))

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'N':[vp_df['사전체력'].size,vp_df['사후체력'].size,vp_df['사전체력'].size],
                   'Mean':[vp_df['사전체력'].mean(),vp_df['사후체력'].mean(),(vp_df['사전체력']-vp_df['사후체력']).mean()],
                   'Std. Deviation':[vp_df['사전체력'].std(),vp_df['사후체력'].std(),(vp_df['사전체력']-vp_df['사후체력']).std()],
                   'Std. Error mean':[vp_df['사전체력'].std()/np.sqrt(vp_df['사전체력'].size),
                                      vp_df['사후체력'].std()/np.sqrt(vp_df['사후체력'].size),
                                      (vp_df['사전체력']-vp_df['사후체력']).std()/np.sqrt(vp_df['사전체력'].size)]},
             index=[['Statistics' for i in range(3)],['Variable1','Variable2','Paired differences']]))

# Paired samples t-test
_res = stats.ttest_rel(vp_df['사전체력'], vp_df['사후체력'], alternative='two-sided')
_lower, _upper = _res.confidence_interval(confidence_level=0.95)
display(Markdown('### Paired samples t-test'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'dof':_res.df,'Alternative':'two-sided',
                           'p-value':_res.pvalue,'Mean difference':(vp_df['사전체력']-vp_df['사후체력']).mean(),
                           'Confidence interval':0.95,'Lower':_lower,'Upper':_upper},
                     index=['Paired samples t-test']))

---

In [None]:
# End of file