# Normality test

1. Shapiro-Wilk test
2. Anderson-Darling test
3. Kolmogorov-Smirnov test
4. D'Agostino and Pearson's test
5. Jarque-Bera test

---

In [None]:
# Visual Python: Data Analysis > Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
from matplotlib import rcParams
rcParams['font.family'] = 'New Gulim'
rcParams['axes.unicode_minus'] = False

## Read data
- Visual Python: Data Analysis > File

In [None]:
# Visual Python: Data Analysis > File
df = pd.read_csv('./data/03_1_정규성검정.csv')
df

## 1. Shapiro-Wilk test
- Perform the Shapiro-Wilk test for normality.

In [None]:
# Visual Python: Normality test
# Shapiro-Wilk test
vp_df = df.dropna().copy()

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.shapiro(vp_df['만족감'])
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue},index=['Normality test (Shapiro-Wilk)']))

# Charts
import seaborn as sns
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)

    plt.subplot(2,2,1)
    sns.histplot(vp_df['만족감'], stat='density', kde=True)
    plt.title('Histogram')

    plt.subplot(2,2,2)
    sns.boxplot(y=vp_df['만족감'])
    plt.title('Boxplot')

    plt.subplot(2,2,3)
    stats.probplot(vp_df['만족감'], plot=plt)
    plt.title('Q-Q Plot')

    plt.tight_layout()
    plt.show()

## 2.  Anderson-Darling test
- Anderson-Darling test for data coming from a particular distribution.

In [None]:
# Visual Python: Normality test
# Anderson-Darling test
vp_df = df.dropna().copy()

# Normality test (Anderson-Darling)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.anderson(vp_df['만족감'])
display(Markdown('### Normality test (Anderson-Darling)'))
display(pd.DataFrame(data={'Statistic':[_res.statistic],'Critical values':[_res.critical_values],
                           'Significance level(%)':[_res.significance_level]},
                     index=['Normality test (Anderson-Darling)']))

# Charts
import seaborn as sns
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)

    plt.subplot(2,2,1)
    sns.histplot(vp_df['만족감'], stat='density', kde=True)
    plt.title('Histogram')

    plt.subplot(2,2,2)
    sns.boxplot(y=vp_df['만족감'])
    plt.title('Boxplot')

    plt.subplot(2,2,3)
    stats.probplot(vp_df['만족감'], plot=plt)
    plt.title('Q-Q Plot')

    plt.tight_layout()
    plt.show()

## 3. Kolmogorov-Smirnov test
- Performs the (one-sample or two-sample) Kolmogorov-Smirnov test for goodness of fit.

In [None]:
# Visual Python: Normality test
# Kolmogorov-Smirnov test
vp_df = df.dropna().copy()

# Normality test (Kolmogorov-Smirnov)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.kstest(vp_df['만족감'], 'norm', alternative='two-sided')
display(Markdown('### Normality test (Kolmogorov-Smirnov)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue},
                     index=['Normality test (Kolmogorov-Smirnov)']))

# Charts
import seaborn as sns
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)

    plt.subplot(2,2,1)
    sns.histplot(vp_df['만족감'], stat='density', kde=True)
    plt.title('Histogram')

    plt.subplot(2,2,2)
    sns.boxplot(y=vp_df['만족감'])
    plt.title('Boxplot')

    plt.subplot(2,2,3)
    stats.probplot(vp_df['만족감'], plot=plt)
    plt.title('Q-Q Plot')

    plt.tight_layout()
    plt.show()

## 4. D'Agostino and Pearson's test
- Test whether a sample differs from a normal distribution.

In [None]:
# Visual Python: Normality test
# D'Agostino and Pearson's test
vp_df = df.dropna().copy()

# Normality test (D Agostino and Pearson)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.normaltest(vp_df['만족감'])
display(Markdown('### Normality test (D Agostino and Pearson)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue},
                     index=['Normality test (D Agostino and Pearson)']))

# Charts
import seaborn as sns
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)

    plt.subplot(2,2,1)
    sns.histplot(vp_df['만족감'], stat='density', kde=True)
    plt.title('Histogram')

    plt.subplot(2,2,2)
    sns.boxplot(y=vp_df['만족감'])
    plt.title('Boxplot')

    plt.subplot(2,2,3)
    stats.probplot(vp_df['만족감'], plot=plt)
    plt.title('Q-Q Plot')

    plt.tight_layout()
    plt.show()

## 5. Jarque-Bera test
- The Jarque-Bera test tests whether the sample data has the skewness and kurtosis matching a normal distribution.

In [None]:
# Visual Python: Normality test
# Jarque-Bera test
vp_df = df.dropna().copy()

# Normality test (Jarque-Bera)
from IPython.display import display, Markdown
from scipy import stats
_res = stats.jarque_bera(vp_df['만족감'])
display(Markdown('### Normality test (Jarque-Bera)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue},
                     index=['Normality test (Jarque-Bera)']))

# Charts
import seaborn as sns
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)

    plt.subplot(2,2,1)
    sns.histplot(vp_df['만족감'], stat='density', kde=True)
    plt.title('Histogram')

    plt.subplot(2,2,2)
    sns.boxplot(y=vp_df['만족감'])
    plt.title('Boxplot')

    plt.subplot(2,2,3)
    stats.probplot(vp_df['만족감'], plot=plt)
    plt.title('Q-Q Plot')

    plt.tight_layout()
    plt.show()

---

In [None]:
# End of file