In [2]:
import scipy.stats as ss
import numpy as np

https://blog.csdn.net/qq_35125180/category_10161271.html

In [9]:
# pdf 概率密度函数
# cdf 累积概率密度函数，求左侧检验P值
# ppf 下分位数
# sf 1 -cdf  求右侧检验P值
# isf 上分位数
# mean()均值 var 方差 std 标准差 
# fit 参数估计
# expect 求期望
# rvs 生成随机值

# 正态总体参数的假设检验

## 单个正态总体方差已知时总体均值的检验

In [8]:
l1 = [10512,10623,10668,10554,10776,10707,10557,10581,10666,10670]
# 右侧检验 H0 mu = 10560 H1:mu > 10560

Z = ss.norm(loc = 0,scale = 1)#标准正态分布
mu0 = 10560.0 # H0假设的均值
sigma = 80 # 已知方差

x_bar = np.array(l1,dtype = np.float32).mean()# 计算样本均值
z = (x_bar- mu0)/(sigma/len(l1)) #计算检验统计量

print("P_value = {}".format(Z.sf(z)))#输出P值

P_value = 2.22751320200631e-19


In [6]:
z,x_bar

(8.925048828125, 10631.4)

In [15]:
def ztest_1simple(X,u0,sigma,side = 'both'):
    '''
    需import numpy as np
        import scipy.stats as ss
    参数 
        进行方差已知的均值检验（单样本）
        X：样本数据
        u0:零假设均值 
        sigma :已知的均值 
        side = 'both','left','right'
    返回值： 
        return 假设检验的P值 
    '''
    x_mean = np.array(X).mean()
    x_num = len(X)
    Z = ss.norm()
    z = (x_mean - u0)/(sigma/x_num)
    if side == 'both':
        z0 = np.abs(z)
        return Z.cdf(-z0) + Z.sf(z0)
    if side == 'left':
        return Z.cdf(z)
    if side =='right':
        return Z.sf(z)

In [16]:
ztest_1simple(l1,10560,80,'right')

2.2284960498548793e-19

## 单个正态总体方差未知时的总体均值检验

输入：数据 和零假设均值<br>
输出：检验统计量和P值（双尾检验）当为单尾检验时，要除以2

In [18]:
#使用t检验函数

In [19]:
ss.ttest_1samp(l1,popmean=10560)


Ttest_1sampResult(statistic=2.78759777774592, pvalue=0.021136656081453067)

## 单个正态总体方差的检验

In [35]:
def chi2test(X,sigma,side = 'both'):
    '''
    需import numpy as np
        import scipy.stats as ss
    参数 
        进行方差已知的均值检验（单样本）
        X：样本数据
        u0:零假设均值 
        sigma :已知的均值 
        side = 'both','left','right'
    返回值： 
        return 假设检验的P值 
    '''
    x_var = np.cov(np.array(X))
    x_num = len(X)
    chi2 = ss.chi2(x_num-1)
    z = (x_num-1)*x_var/np.square(sigma)
    if side == 'both':
        p = chi2.cdf(z)
        return min(p,1.0-p)
    if side == 'left':
        return chi2.cdf(z)
    if side =='right':
        return chi2.sf(z)

In [37]:
chi2test(l1,80,'right')

0.4167076285222403

In [38]:
chi2test(l1,80,'left')

0.5832923714777597

## 两个正态总体均值相等的检验

### 两个方差都已知

In [39]:
def ztest_2simple(X,Y,sigma1,sigma2,side = 'both'):
    '''
    需import numpy as np
        import scipy.stats as ss
    参数 
        进行方差已知的均值检验（单样本）
        X：样本数据
        u0:零假设均值 
        sigma :已知的均值 
        side = 'both','left','right'
    返回值： 
        return 假设检验的P值 
    '''
    x_mean = np.array(X).mean()
    y_mean = np.array(Y).mean()
    x_num = len(X)
    y_num = len(Y)
    Z = ss.norm()
    z = (x_mean - y_mean)/np.sqrt(sigma1**2/x_num + sigma2**2/y_num)
    if side == 'both':
        z0 = np.abs(z)
        return Z.cdf(-z0) + Z.sf(z0)
    if side == 'left':
        return Z.cdf(z)
    if side =='right':
        return Z.sf(z)

In [42]:
l2 = [x + np.random.randint(100,200) for x in l1]
ztest_2simple(l1,l2,80,80)

2.9686634344057317e-05

### 两正态总体方差未知但相等

In [43]:
ss.ttest_ind(l1,l2,equal_var=True)

Ttest_indResult(statistic=-3.674486498868449, pvalue=0.0017344565506176805)

### 两正态总体方差未知且不等

In [44]:
ss.ttest_ind(l1,l2,equal_var=False)

Ttest_indResult(statistic=-3.674486498868449, pvalue=0.0018384314986924293)

## 两个正态总体方差相等的检验

In [49]:
import numpy as np
from scipy import stats

def ftest(data1, data2, side='both'):
    n1=len(data1)
    n2=len(data2)
    F = ss.f(dfn=n1-1, dfd=n2-1)
    tmp = np.var(data1, ddof=1)/np.var(data2, ddof=1)
    ret_left = F.cdf(tmp)
    ret_right = F.sf(tmp)
    if side=='both':
        return 2*min(ret_left, ret_right)
    elif side=='left':
        return ret_left
    return ret_right


In [50]:
ftest(l1,l2)

0.5427965408986852