# Can we automatically test whether a random sample is approximately normally distributed?

In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2
%matplotlib inline

import logging

logging.basicConfig(level=logging.INFO)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

np.set_printoptions(precision=4, floatmode="fixed", suppress=True)
rng = np.random.default_rng()

In [None]:
import numpy as np
from scipy.stats import kstest, norm as normal

normal.random_state = np.random.default_rng(1234)

for n in (10, 100, 1000, 10_000):
    x = normal.rvs(size=(n,))
    A = normal.rvs(size=(n, n)) / np.sqrt(n)
    y = A @ x
    x_test = kstest(x, normal.cdf)
    y_test = kstest(y, normal.cdf)
    print(n, x_test, y_test, sep="\n")

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(constrained_layout=True)
T = np.linspace(-4, 4)
ax.plot(T, normal.pdf(T), ":k")
ax.hist([x, y], bins=50, density=True)
ax.legend(["$N(0,1)$", f"$x: p={x_test.pvalue:.2f}$", f"$y: p={y_test.pvalue:.2f}$"])
fig.savefig("example.png")