# 06 — Normal distribution, Z standardization, CLT, and normal approximations

We’ll demonstrate:
- Normal PDF shape via histogram
- Standardization: Z = (X−μ)/σ
- Φ(t) = P(Z ≤ t) using `scipy` if available; otherwise Monte Carlo
- Central Limit Theorem (CLT): sums/means approach normal
- Normal approximation to Binomial


In [None]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

# Reproducibility: you can change this seed
rng = np.random.default_rng(42)


In [None]:
# Try to import scipy for accurate normal CDF (Phi). If unavailable, we'll fallback to Monte Carlo.
try:
    from scipy.stats import norm
    have_scipy = True
except Exception:
    have_scipy = False

have_scipy


## Normal(μ,σ²) sampling and histogram


In [None]:
mu, sigma = 10.0, 2.5
n = 200000
X = rng.normal(mu, sigma, size=n)

plt.figure()
plt.hist(X, bins=60, density=True)
plt.title(f"Histogram of Normal(μ={mu}, σ={sigma}) samples (density)")
plt.xlabel("x")
plt.ylabel("density")
plt.show()

print("Empirical mean/var:", X.mean(), X.var())
print("Theoretical mean/var:", mu, sigma**2)


## Standardization: Z = (X−μ)/σ should be ~ N(0,1)


In [None]:
Z = (X - mu) / sigma

print("Z mean/var (empirical):", Z.mean(), Z.var(), " (theory: 0,1)")

plt.figure()
plt.hist(Z, bins=60, density=True)
plt.title("Histogram of standardized Z")
plt.xlabel("z")
plt.ylabel("density")
plt.show()


## Φ(t) probabilities and symmetry Φ(−t)=1−Φ(t)


In [None]:
t = 1.0

if have_scipy:
    Phi_t = norm.cdf(t)
    Phi_minus_t = norm.cdf(-t)
else:
    # Monte Carlo approximation for Phi
    Zs = rng.normal(0, 1, size=2_000_000)
    Phi_t = (Zs <= t).mean()
    Phi_minus_t = (Zs <= -t).mean()

print("Phi(t)    =", Phi_t)
print("Phi(-t)   =", Phi_minus_t)
print("1-Phi(t)  =", 1 - Phi_t)


## CLT: sums of iid approach Normal


In [None]:
# Start with a non-normal distribution, e.g. Exponential(1)
m = 60           # number of iid terms in each sum
reps = 50000     # number of sums to generate

# Exponential with mean=1, var=1
X = rng.exponential(scale=1.0, size=(reps, m))
S = X.sum(axis=1)
Xbar = X.mean(axis=1)

# Standardize S and Xbar using true mean/var
mu_x, var_x = 1.0, 1.0
S_std = (S - m*mu_x) / math.sqrt(m*var_x)
Xbar_std = (Xbar - mu_x) / math.sqrt(var_x/m)

plt.figure()
plt.hist(S_std, bins=60, density=True)
plt.title("CLT: standardized sum S (should look ~ N(0,1))")
plt.xlabel("standardized S")
plt.ylabel("density")
plt.show()

plt.figure()
plt.hist(Xbar_std, bins=60, density=True)
plt.title("CLT: standardized mean X̄ (should look ~ N(0,1))")
plt.xlabel("standardized X̄")
plt.ylabel("density")
plt.show()

print("Empirical mean/var of standardized S:", S_std.mean(), S_std.var())
print("Empirical mean/var of standardized X̄:", Xbar_std.mean(), Xbar_std.var())


## Normal approximation to Binomial(n,p)


In [None]:
n_trials = 80
p = 0.4
reps = 200000
X = rng.binomial(n_trials, p, size=reps)

mu = n_trials*p
var = n_trials*p*(1-p)
Z = (X - mu) / math.sqrt(var)

plt.figure()
plt.hist(Z, bins=60, density=True)
plt.title("Standardized Binomial (normal approximation)")
plt.xlabel("z")
plt.ylabel("density")
plt.show()

# Example probability: P(X <= k)
k = 28
emp = (X <= k).mean()

# Continuity correction
z_cc = (k + 0.5 - mu) / math.sqrt(var)

if have_scipy:
    approx = norm.cdf(z_cc)
else:
    Zs = rng.normal(0, 1, size=2_000_000)
    approx = (Zs <= z_cc).mean()

print(f"Empirical P(X <= {k}) =", emp)
print("Normal approx (with continuity correction) =", approx)
