# Module 04：数理统计练习
用模拟数据理解估计与假设检验流程。

## 1. 正态分布参数估计 (MLE)
生成带噪数据，估计均值与方差，与真实参数对比。

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

np.random.seed(0)
true_mu, true_sigma = 2.0, 1.5
data = np.random.normal(true_mu, true_sigma, size=500)

mu_hat = data.mean()
sigma_hat = data.std(ddof=0)
print(f'真实均值={true_mu}, 估计均值={mu_hat:.3f}')
print(f'真实方差={true_sigma**2:.3f}, 估计方差={sigma_hat**2:.3f}')

plt.hist(data, bins=30, density=True, alpha=0.6, label='样本分布')
x = np.linspace(-3, 6, 200)
plt.plot(x, stats.norm.pdf(x, mu_hat, sigma_hat), label='MLE 拟合')
plt.plot(x, stats.norm.pdf(x, true_mu, true_sigma), '--', label='真实分布')
plt.legend(); plt.title('正态参数估计对比'); plt.show()


## 2. 置信区间覆盖率实验
重复抽样构造 95% 置信区间，统计覆盖真实均值的比例。

In [None]:
# TODO: 修改试验次数和样本大小，观察覆盖率变化
trials = 200
sample_size = 50
cover = 0
for _ in range(trials):
    sample = np.random.normal(true_mu, true_sigma, size=sample_size)
    mu_hat = sample.mean()
    se = sample.std(ddof=1) / np.sqrt(sample_size)
    low, high = stats.norm.interval(0.95, loc=mu_hat, scale=se)
    cover += int(low <= true_mu <= high)
print(f'覆盖率: {cover/trials:.3f}')


## 3. A/B 测试假设检验
比较两组点击率差异，计算 z 检验的 p 值。

In [None]:
# TODO: 修改点击率和样本量，观察显著性结果
pa, pb = 0.12, 0.15
na, nb = 800, 820

clicks_a = np.random.binomial(1, pa, size=na)
clicks_b = np.random.binomial(1, pb, size=nb)

conv_a, conv_b = clicks_a.mean(), clicks_b.mean()
pooled = ((clicks_a.sum() + clicks_b.sum()) / (na + nb))
se = np.sqrt(pooled * (1 - pooled) * (1/na + 1/nb))
z = (conv_b - conv_a) / se
p_value = 2 * (1 - stats.norm.cdf(abs(z)))
print(f'A 组点击率={conv_a:.3f}, B 组={conv_b:.3f}, z={z:.3f}, p={p_value:.4f}')
