In [99]:
import numpy as np
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
from scipy.stats import norm, kstest, expon, t, ksone
from statsmodels.distributions.empirical_distribution import ECDF

%matplotlib inline

In [100]:
data = pd.read_csv('dataset1.txt', header=None)[0]

In [101]:
def compare_to_normal(title, sample, critical_value = 0.04301):
    ecdf = ECDF(sample)
    D = np.max(np.abs(ECDF(sample)(sample) -  st.norm(np.mean(sample), np.std(sample)).cdf(sample)))
    if D > critical_value:
        print("Rejecting the H0 for {}, since D={} (>{})".format(title, D, critical_value))
    else:
        print("Accepting the H0 for {}, since D={} (<{})".format(title, D, critical_value))

In [102]:
compare_to_normal("normal", st.norm().rvs(1000))
compare_to_normal("double exponential", st.laplace().rvs(1000))
compare_to_normal("T-3", st.t(3).rvs(1000))
compare_to_normal("lognormal", st.lognorm(1.).rvs(1000))
compare_to_normal("custom data", data)

Accepting the H0 for normal, since D=0.01938550423813956 (<0.04301)
Rejecting the H0 for double exponential, since D=0.063796555507114 (>0.04301)
Rejecting the H0 for T-3, since D=0.0812016664180738 (>0.04301)
Rejecting the H0 for lognormal, since D=0.22639149541488007 (>0.04301)
Rejecting the H0 for custom data, since D=0.15366213968015685 (>0.04301)


In [106]:
critical_value = 1.22385 / np.sqrt(len(data)) # for alpha = 0.1
critical_value

0.038701535143970713

In [107]:
compare_to_normal("normal", st.norm().rvs(1000), critical_value)
compare_to_normal("double exponential", st.laplace().rvs(1000), critical_value)
compare_to_normal("T-3", st.t(3).rvs(1000), critical_value)
compare_to_normal("lognormal", st.lognorm(1.).rvs(1000), critical_value)
compare_to_normal("custom data", data, critical_value)

Accepting the H0 for normal, since D=0.0157460422860361 (<0.03870153514397071)
Rejecting the H0 for double exponential, since D=0.06908949266538644 (>0.03870153514397071)
Rejecting the H0 for T-3, since D=0.09039374162553093 (>0.03870153514397071)
Rejecting the H0 for lognormal, since D=0.21900454568341576 (>0.03870153514397071)
Rejecting the H0 for custom data, since D=0.15366213968015685 (>0.03870153514397071)
