# Testing for Normality — Applications with Python


#  Shapiro-Wilk test

In [6]:
import numpy as np
from scipy import stats
np.random.seed(12345678)
x = stats.norm.rvs(loc=5, scale=3, size=100)
shapiro_test = stats.shapiro(x)
print(shapiro_test)

print(shapiro_test.statistic)

print(shapiro_test.pvalue)


ShapiroResult(statistic=0.9772805571556091, pvalue=0.08144091814756393)
0.9772805571556091
0.08144091814756393


# Kolmogorov-Smirnov

In [16]:
x_10 = stats.norm.rvs(loc=5, scale=3, size=10)
x_50 = stats.norm.rvs(loc=5, scale=3, size=50)
x_100 = stats.norm.rvs(loc=5, scale=3, size=100)
x_1000 = stats.norm.rvs(loc=5, scale=3, size=1000)

data_1 = [1,2,3,4,5]
data_2 = [6,7,8,9,10]

In [17]:
# Perform test KS test against a normal distribution with
# mean = 5 and sd = 3
print (stats.kstest(data_1, data_2))
print (stats.kstest(x_10, 'norm', args=(5, 3)))
print (stats.kstest(x_50, 'norm', args=(5, 3)))
print (stats.kstest(x_100, 'norm', args=(5, 3)))
print (stats.kstest(x_1000, 'norm', args=(5, 3)))

KstestResult(statistic=1.0, pvalue=0.007936507936507936)
KstestResult(statistic=0.1254726070423322, pvalue=0.9914238282974819)
KstestResult(statistic=0.12275524064207721, pvalue=0.4056725943853564)
KstestResult(statistic=0.0677489896149095, pvalue=0.722325628728014)
KstestResult(statistic=0.01960583416307521, pvalue=0.8293179503932067)


# The Kolmogorov-Smirnov test.

In [11]:
anderson_results_10 = stats.anderson(x_10, dist='norm')
anderson_results_50 = stats.anderson(x_50, dist='norm')
anderson_results_100 = stats.anderson(x_100, dist='norm')
anderson_results_1000 = stats.anderson(x_1000, dist='norm')
print(anderson_results_10)
print(anderson_results_50)
print(anderson_results_100)
print(anderson_results_1000)

AndersonResult(statistic=0.26644783748957934, critical_values=array([0.501, 0.57 , 0.684, 0.798, 0.95 ]), significance_level=array([15. , 10. ,  5. ,  2.5,  1. ]))
AndersonResult(statistic=0.29418336318070715, critical_values=array([0.538, 0.613, 0.736, 0.858, 1.021]), significance_level=array([15. , 10. ,  5. ,  2.5,  1. ]))
AndersonResult(statistic=0.1591254293426232, critical_values=array([0.555, 0.632, 0.759, 0.885, 1.053]), significance_level=array([15. , 10. ,  5. ,  2.5,  1. ]))
AndersonResult(statistic=0.3318263625445752, critical_values=array([0.574, 0.653, 0.784, 0.914, 1.088]), significance_level=array([15. , 10. ,  5. ,  2.5,  1. ]))
