# 第 3 章　使用 Pyhton 进行数据分析｜用 Python 动手学统计学

## 第 8 节　假设检验

### 13. t 检验的实现：环境准备

In [21]:
# 用于数值计算的库
import numpy as np
import pandas as pd
import scipy as sp
from scipy import stats

# 用于绘图的库
from matplotlib import pyplot as plt
import seaborn as sns
sns.set()

# 设置浮点数打印精度
%precision 3
# 在 Jupyter Notebook 里显示图形
%matplotlib inline

In [22]:
# 读入数据
URL = "https://raw.githubusercontent.com/pineapple-666/Learn-Statistics-with-Python/main/data/3-8-1-junk-food-weight.csv"
junk_food = pd.read_csv(URL)["weight"]
junk_food.head()

Unnamed: 0,weight
0,58.52982
1,52.353039
2,74.446169
3,52.983263
4,55.876879


### 14. t 检验的实现：计算 t 值

In [23]:
# 样本均值
mu = np.mean(junk_food)
mu

np.float64(55.38496619666667)

In [24]:
# 自由度
df = len(junk_food) - 1
df

19

In [25]:
# 标准误差
sigma = np.std(junk_food, ddof = 1)
se = sigma / np.sqrt(len(junk_food))
se

np.float64(1.9579276805755885)

In [26]:
# t 值
t_value = (mu - 50) / se
t_value

np.float64(2.7503396831713434)

### 15. t 检验的实现：计算 p 值

In [27]:
# p 值
alpha = stats.t.cdf(t_value, df = df)
(1 - alpha) * 2

np.float64(0.012725590012524268)

In [33]:
stats.ttest_1samp?

In [28]:
# t 检验
stats.ttest_1samp(junk_food, 50)

TtestResult(statistic=np.float64(2.7503396831713434), pvalue=np.float64(0.012725590012524155), df=np.int64(19))

### 16. 通过模拟实验计算 p 值

In [29]:
# 样本的相关信息 (一部分)
size = len(junk_food)
sigma = np.std(junk_food, ddof = 1)

In [30]:
# 存放 t 值的窗口
t_value_array = np.zeros(50000)

In [31]:
# 总体均值为 50, 以接受零假设为前提进行 50,000 次抽样并计算 t 值的实验
np.random.seed(1)
norm_dist = stats.norm(loc = 50, scale = sigma)
for i in range(0, 50000):
    sample = norm_dist.rvs(size = size)
    sample_mean = np.mean(sample)
    sample_std = np.std(sample, ddof = 1)
    sample_se = sample_std / np.sqrt(size)
    t_value_array[i] = (sample_mean - 50) / sample_se

In [32]:
(sum(t_value_array > t_value) / 50000) * 2

np.float64(0.01324)