<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

# Frequentist methods of normality tests

In [1]:
import numpy as np, pandas as pd, scipy as sp
from math import *
from scipy import stats
from scipy.stats import norm
import statsmodels.api as sm
from scipy import stats,random
import pandas_datareader.data as web
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style="whitegrid")
p = print

In [2]:
n_samples = 2500
# data = stats.norm.rvs(size=n_samples,random_state=1234)
data = np.random.normal(loc=0.0,scale=1.0,size=n_samples)

In [3]:
stats.shapiro(data)

(0.9996313452720642, 0.945408046245575)

In [4]:
result = stats.shapiro(data)
result

(0.9996313452720642, 0.945408046245575)

In [5]:
p(f'W-test: {result[0]}, p-value: {result[1]}')

W-test: 0.9996313452720642, p-value: 0.945408046245575


The p-value is greater than the 0.05 alpha level, and so the sample came from a normally distriuted poopulation, as expected since the we generated a normally distribute data.

In [6]:
def shapiro_normality_test(ticker, sample):
    """
    H0: The null hypothesis
        That a sample came from a normally distributed population.
    """
    
    p(f'ticker= {ticker}, W-test and p-value') 
    p(stats.shapiro(sample))
    # normality test
    stat, p_value = stats.shapiro(sample)
    p(f'Statistics= {stat}, p-value= {p_value}\n')
    # interpretation
    alpha = 0.05
    if p_value > alpha:
        p(f'{ticker} sample looks Gaussian (fail to reject H0)')
    else:
        p(f'{ticker} sample does not look Gaussian (reject H0)')
    return

In [7]:
n_samples = 2500
# data = stats.norm.rvs(size=n_samples,random_state=1234)
data = np.random.normal(loc=0.0,scale=1.0,size=n_samples)

In [8]:
shapiro_normality_test('Simulated data',data)

ticker= Simulated data, W-test and p-value
(0.9992918968200684, 0.47128817439079285)
Statistics= 0.9992918968200684, p-value= 0.47128817439079285

Simulated data sample looks Gaussian (fail to reject H0)


In [9]:
# tickers = ['AAPL','IBM','MSFT']
df = web.DataReader('AAPL','yahoo','01/01/2007','10/18/2019')
rets = df['Adj Close'].pct_change().dropna()

In [10]:
shapiro_normality_test('AAPL', rets)

ticker= AAPL, W-test and p-value
(0.938154399394989, 1.103788047478426e-34)
Statistics= 0.938154399394989, p-value= 1.103788047478426e-34

AAPL sample does not look Gaussian (reject H0)


In [11]:
df = web.DataReader('IBM','yahoo','01/01/2007','10/18/2019')
rets = df['Adj Close'].pct_change().dropna()

In [12]:
shapiro_normality_test('IBM', rets)

ticker= IBM, W-test and p-value
(0.928572952747345, 1.0501538756446162e-36)
Statistics= 0.928572952747345, p-value= 1.0501538756446162e-36

IBM sample does not look Gaussian (reject H0)


In [13]:
df = web.DataReader('MSFT','yahoo','01/01/2007','10/18/2019')
rets = df['Adj Close'].pct_change().dropna()

In [14]:
shapiro_normality_test('MSFT', rets)

ticker= MSFT, W-test and p-value
(0.910578191280365, 5.597346585899049e-40)
Statistics= 0.910578191280365, p-value= 5.597346585899049e-40

MSFT sample does not look Gaussian (reject H0)


In [15]:
n_samples = 2500
data = stats.norm.rvs(size=n_samples,random_state=1234)

In [16]:
result = stats.anderson(data)
result

AndersonResult(statistic=0.5129308097734793, critical_values=array([0.575, 0.655, 0.786, 0.917, 1.09 ]), significance_level=array([15. , 10. ,  5. ,  2.5,  1. ]))

The statistic 0.5129 is less than critical value 0.786 at the significance level of 5.0%. 

In [17]:
result.statistic

0.5129308097734793

In [18]:
result.significance_level

array([15. , 10. ,  5. ,  2.5,  1. ])

In [19]:
result.significance_level[2]

5.0

In [20]:
result.critical_values

array([0.575, 0.655, 0.786, 0.917, 1.09 ])

In [21]:
result.critical_values[2]

0.786

In [22]:
def anderson_normality_test(ticker, sample): 
    """
    H0: The Null Hypothesis
        The Anderson-Darling tests the null hypothesis that a sample is drawn 
        from a normally distributed population.
    """
    
    p(f'ticker=',ticker,'W-test, and P-value') 

    # normality test
    result = stats.anderson(sample, dist='norm')
    p(f'Statistic: {round(result.statistic,3)}')
    #p = 0
    for i in range(len(result.critical_values)):
        sl, cv = result.significance_level[i], result.critical_values[i]
        if result.statistic < result.critical_values[i]:
            p(f'Significance level: {round(sl,3)}; Critical value: {round(cv,3)}: Sample looks Gaussian (fail to reject H0)')
        else:
            p(f'Significance level: {round(sl,3)}; Critical value: {round(cv,3)}: Sample does not look Gaussian (reject H0)')
    return

In [23]:
n_samples = 2500
# data = stats.norm.rvs(size=n_samples,random_state=1234)
data = np.random.normal(loc=0.0,scale=1.0,size=n_samples)

In [24]:
anderson_normality_test('Simulated Series',data)

ticker= Simulated Series W-test, and P-value
Statistic: 0.272
Significance level: 15.0; Critical value: 0.575: Sample looks Gaussian (fail to reject H0)
Significance level: 10.0; Critical value: 0.655: Sample looks Gaussian (fail to reject H0)
Significance level: 5.0; Critical value: 0.786: Sample looks Gaussian (fail to reject H0)
Significance level: 2.5; Critical value: 0.917: Sample looks Gaussian (fail to reject H0)
Significance level: 1.0; Critical value: 1.09: Sample looks Gaussian (fail to reject H0)


In [25]:
df = web.DataReader('AAPL','yahoo','01/01/2007','10/18/2019')
rets = df['Adj Close'].pct_change().dropna()

In [26]:
anderson_normality_test('AAPL',rets)

ticker= AAPL W-test, and P-value
Statistic: 39.027
Significance level: 15.0; Critical value: 0.575: Sample does not look Gaussian (reject H0)
Significance level: 10.0; Critical value: 0.655: Sample does not look Gaussian (reject H0)
Significance level: 5.0; Critical value: 0.786: Sample does not look Gaussian (reject H0)
Significance level: 2.5; Critical value: 0.917: Sample does not look Gaussian (reject H0)
Significance level: 1.0; Critical value: 1.091: Sample does not look Gaussian (reject H0)


In [27]:
df = web.DataReader('IBM','yahoo','01/01/2007','10/18/2019')
rets = df['Adj Close'].pct_change().dropna()

In [28]:
anderson_normality_test('IBM',rets)

ticker= IBM W-test, and P-value
Statistic: 44.371
Significance level: 15.0; Critical value: 0.575: Sample does not look Gaussian (reject H0)
Significance level: 10.0; Critical value: 0.655: Sample does not look Gaussian (reject H0)
Significance level: 5.0; Critical value: 0.786: Sample does not look Gaussian (reject H0)
Significance level: 2.5; Critical value: 0.917: Sample does not look Gaussian (reject H0)
Significance level: 1.0; Critical value: 1.091: Sample does not look Gaussian (reject H0)


In [29]:
df = web.DataReader('MSFT','yahoo','01/01/2007','10/18/2019')
rets = df['Adj Close'].pct_change().dropna()

In [30]:
anderson_normality_test('MSFT',rets)

ticker= MSFT W-test, and P-value
Statistic: 48.662
Significance level: 15.0; Critical value: 0.575: Sample does not look Gaussian (reject H0)
Significance level: 10.0; Critical value: 0.655: Sample does not look Gaussian (reject H0)
Significance level: 5.0; Critical value: 0.786: Sample does not look Gaussian (reject H0)
Significance level: 2.5; Critical value: 0.917: Sample does not look Gaussian (reject H0)
Significance level: 1.0; Critical value: 1.091: Sample does not look Gaussian (reject H0)


In [31]:
data = np.random.normal(loc=0.0,scale=1.0,size=n_samples)
shapiro_normality_test('test',data)
p('\n')
anderson_normality_test('test',data)

ticker= test, W-test and p-value
(0.9990682601928711, 0.21895110607147217)
Statistics= 0.9990682601928711, p-value= 0.21895110607147217

test sample looks Gaussian (fail to reject H0)


ticker= test W-test, and P-value
Statistic: 0.581
Significance level: 15.0; Critical value: 0.575: Sample does not look Gaussian (reject H0)
Significance level: 10.0; Critical value: 0.655: Sample looks Gaussian (fail to reject H0)
Significance level: 5.0; Critical value: 0.786: Sample looks Gaussian (fail to reject H0)
Significance level: 2.5; Critical value: 0.917: Sample looks Gaussian (fail to reject H0)
Significance level: 1.0; Critical value: 1.09: Sample looks Gaussian (fail to reject H0)


In [32]:
anderson_normality_test('test',data)

ticker= test W-test, and P-value
Statistic: 0.581
Significance level: 15.0; Critical value: 0.575: Sample does not look Gaussian (reject H0)
Significance level: 10.0; Critical value: 0.655: Sample looks Gaussian (fail to reject H0)
Significance level: 5.0; Critical value: 0.786: Sample looks Gaussian (fail to reject H0)
Significance level: 2.5; Critical value: 0.917: Sample looks Gaussian (fail to reject H0)
Significance level: 1.0; Critical value: 1.09: Sample looks Gaussian (fail to reject H0)
