Analysis of variance

In [1]:
import numpy as np
import pandas as pd

In [2]:
#  データの作成
data = np.array([
    [33,31,33],
    [30,29,31],
    [33,28,32],
    [29,29,32],
    [32,27,36]])
df = pd.DataFrame(data,columns=['Mimizu','Batta','Mix'],index=[1,2,3,4,5])
df

Unnamed: 0,Mimizu,Batta,Mix
1,33,31,33
2,30,29,31
3,33,28,32
4,29,29,32
5,32,27,36


In [3]:
# 全データの平均
all_mean = df.stack().mean()
all_mean

31.0

In [4]:
# 列の効果
df_effect = df.mean(axis=0) - all_mean
df_effect

Mimizu    0.4
Batta    -2.2
Mix       1.8
dtype: float64

In [5]:
# 誤差 - データから列の平均を引く
df_error = df - df.mean()
df_error

Unnamed: 0,Mimizu,Batta,Mix
1,1.6,2.2,0.2
2,-1.4,0.2,-1.8
3,1.6,-0.8,-0.8
4,-2.4,0.2,-0.8
5,0.6,-1.8,3.2


In [6]:
# 誤差の合計は0になる
df_error.sum()

Mimizu    7.105427e-15
Batta    -3.552714e-15
Mix       1.421085e-14
dtype: float64

不偏分散の式
$$
V = \frac{\sum(x_i - \bar{x})^2}{n-1}
$$

In [7]:
# 列の効果の不偏分散 V1

dfn = df.columns.size - df_effect.mean().size #分子の自由度
V_1 = np.sum(df.index.size * (np.square(df_effect - df_effect.mean() ))) / dfn
V_1

20.59999999999996

In [8]:
# 誤差の不偏分散 V2

err = df_error.stack()
dfd = err.size - df_error.mean().size #分母の自由度
V_2 = np.square(err - err.mean()).sum() / dfd
V_2

3.0666666666666664

In [9]:
# F値 V1/V2
F = V_1 / V_2
F

6.717391304347813

In [10]:
from scipy.stats import f

five = f.ppf(0.95, dfn,dfd)
one = f.ppf(0.99, dfn, dfd)
print('上側確率 5%',five)
print('上側確率 1%',one)

上側確率 5% 3.8852938346523933
上側確率 1% 6.9266081401913
