# Analysis of variance

1因子の場合

In [1]:
import numpy as np
import pandas as pd

In [2]:
#  データの作成
data = np.array([
    [33,31,33],
    [30,29,31],
    [33,28,32],
    [29,29,32],
    [32,27,36]])
df = pd.DataFrame(data,columns=['Mimizu','Batta','Mix'],index=[1,2,3,4,5])
df

Unnamed: 0,Mimizu,Batta,Mix
1,33,31,33
2,30,29,31
3,33,28,32
4,29,29,32
5,32,27,36


In [3]:
# 全データの平均
all_mean = df.stack().mean()
all_mean

31.0

In [4]:
# 列の効果
df_effect = df.mean(axis=0) - all_mean
df_effect

Mimizu    0.4
Batta    -2.2
Mix       1.8
dtype: float64

In [5]:
# 誤差 - データから列の平均を引く
df_error = df - df.mean()
df_error

Unnamed: 0,Mimizu,Batta,Mix
1,1.6,2.2,0.2
2,-1.4,0.2,-1.8
3,1.6,-0.8,-0.8
4,-2.4,0.2,-0.8
5,0.6,-1.8,3.2


In [6]:
# 誤差の合計は0になる
df_error.sum()

Mimizu    7.105427e-15
Batta    -3.552714e-15
Mix       1.421085e-14
dtype: float64

不偏分散の式
$$
V = \frac{\sum(x_i - \bar{x})^2}{n-1}
$$

In [7]:
# 列の効果の不偏分散 V1

dfn = df.columns.size - df_effect.mean().size #分子の自由度
V_1 = np.sum(df.index.size * (np.square(df_effect - df_effect.mean() ))) / dfn
V_1

20.59999999999996

In [8]:
# 誤差の不偏分散 V2

err = df_error.stack()
dfd = err.size - df_error.mean().size #分母の自由度
V_2 = np.square(err - err.mean()).sum() / dfd
V_2

3.0666666666666664

In [9]:
# F値 V1/V2
F = V_1 / V_2
F

6.717391304347813

In [10]:
from scipy.stats import f

five = f.ppf(0.95, dfn,dfd)
one = f.ppf(0.99, dfn, dfd)
print('上側確率 5%',five)
print('上側確率 1%',one)

上側確率 5% 3.8852938346523933
上側確率 1% 6.9266081401913


# Analysis of Variance
2因子の場合

In [47]:
import numpy as np
import pandas as pd
from scipy.stats import f

In [2]:
data = np.array([
    [29,35,29,29],
    [26,33,30,28],
    [32,34,33,34]
    ])
df = pd.DataFrame(data, columns=['火','砂','腐','粘'], index=['ミ','バ','混'])
df

Unnamed: 0,火,砂,腐,粘
ミ,29,35,29,29
バ,26,33,30,28
混,32,34,33,34


In [5]:
# 行の効果（餌の効果）
row_effect = df.mean(axis=1) - df.stack().mean()
row_effect

ミ   -0.50
バ   -1.75
混    2.25
dtype: float64

In [6]:
# 列の効果（土の効果）
col_effect = df.mean(axis=0) - df.stack().mean()
col_effect

火   -2.000000
砂    3.000000
腐   -0.333333
粘   -0.666667
dtype: float64

In [29]:
# 誤差
df_error = df - df.stack().mean()
df_error

Unnamed: 0,火,砂,腐,粘
ミ,-2.0,4.0,-2.0,-2.0
バ,-5.0,2.0,-1.0,-3.0
混,1.0,3.0,2.0,3.0


In [30]:
# 誤差を分離する
df_error = df_error.subtract(row_effect, axis=0)
df_error = df_error.subtract(col_effect, axis=1)
df_error

Unnamed: 0,火,砂,腐,粘
ミ,0.5,1.5,-1.166667,-0.833333
バ,-1.25,0.75,1.083333,-0.583333
混,0.75,-2.25,0.083333,1.416667


In [40]:
# 誤差の不偏分散
phi_2 = (df.columns.size - 1) * (df.index.size -1)
V_2 = np.sum(np.square(df_error.stack())) / phi_2
V_2

2.638888888888889

In [45]:
# 行の不偏分散
phi_11 = row_effect.size - df.stack().mean().size
V_11 = np.sum(np.square(row_effect) * df.columns.size) / phi_11
V_11

16.75

In [48]:
# F1 行の効果の有意差
F_1 = V_11 / V_2

five = f.ppf(0.95, phi_11,phi_2)
one = f.ppf(0.99, phi_11,phi_2)
print('上側確率 5%',five)
print('上側確率 1%',one)
print('F1 = ',F_1)

上側確率 5% 5.143252849784718
上側確率 1% 10.92476650083833
F =  6.347368421052631


In [49]:
# 列の不偏分散
phi_12 = col_effect.size - df.stack().mean().size
V_12 = np.sum(np.square(col_effect) * df.index.size) / phi_12
V_12

13.555555555555555

In [52]:
# F2 列の効果の有意差

F_2 = V_12 / V_2

five = f.ppf(0.95, phi_12,phi_2)
one = f.ppf(0.99, phi_12,phi_2)
print('上側確率 5%',five)
print('上側確率 1%',one)
print('F2 = ',F_2)

上側確率 5% 4.757062663089414
上側確率 1% 9.779538240923273
F =  5.136842105263158
