In [6]:
import os
os.chdir(os.getcwd())


In [7]:
import math
import numpy as np
import pandas as pd

## Anova One Factor - Equal Sample Size: Example 1

### Scipy.Stats

In [8]:
df = pd.read_csv("anova1_csv/fabric.csv")
df

Unnamed: 0,polymer1,polymer2,polymer3
0,0.56,0.72,0.62
1,1.12,0.69,1.08
2,0.9,0.87,1.07
3,1.07,0.78,0.99
4,0.94,0.91,0.93


In [9]:
from scipy.stats import f_oneway
f_oneway(df.polymer1, df.polymer2, df.polymer3)

F_onewayResult(statistic=0.9865974924340688, pvalue=0.4011558781296862)

### Statsmodels

In [10]:
df = pd.read_csv("anova1_csv/fabric.csv")
df

Unnamed: 0,polymer1,polymer2,polymer3
0,0.56,0.72,0.62
1,1.12,0.69,1.08
2,0.9,0.87,1.07
3,1.07,0.78,0.99
4,0.94,0.91,0.93


In [11]:
stacked_data = df.stack().reset_index()
stacked_data = stacked_data.rename(columns={'level_0': 'id',
                                            'level_1': 'polymer',
                                            0:'soiling'})
stacked_data

Unnamed: 0,id,polymer,soiling
0,0,polymer1,0.56
1,0,polymer2,0.72
2,0,polymer3,0.62
3,1,polymer1,1.12
4,1,polymer2,0.69
5,1,polymer3,1.08
6,2,polymer1,0.9
7,2,polymer2,0.87
8,2,polymer3,1.07
9,3,polymer1,1.07


In [12]:
from statsmodels.formula.api import ols
results = ols('soiling ~ C(polymer)', data=stacked_data).fit()
results.summary()



0,1,2,3
Dep. Variable:,soiling,R-squared:,0.141
Model:,OLS,Adj. R-squared:,-0.002
Method:,Least Squares,F-statistic:,0.9866
Date:,"Thu, 24 Feb 2022",Prob (F-statistic):,0.401
Time:,09:39:17,Log-Likelihood:,6.4816
No. Observations:,15,AIC:,-6.963
Df Residuals:,12,BIC:,-4.839
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9180,0.079,11.689,0.000,0.747,1.089
C(polymer)[T.polymer2],-0.1240,0.111,-1.116,0.286,-0.366,0.118
C(polymer)[T.polymer3],0.0200,0.111,0.180,0.860,-0.222,0.262

0,1,2,3
Omnibus:,4.856,Durbin-Watson:,1.783
Prob(Omnibus):,0.088,Jarque-Bera (JB):,2.757
Skew:,-1.041,Prob(JB):,0.252
Kurtosis:,3.278,Cond. No.,3.73


In [13]:
import statsmodels.api as sm
aov_table = sm.stats.anova_lm(results, typ=2)
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(polymer),0.060853,2.0,0.986597,0.401156
Residual,0.37008,12.0,,


In [14]:
def anova_table(aov):
    aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df']
    
    cols = ['sum_sq', 'df', 'mean_sq', 'F', 'PR(>F)']
    aov = aov[cols]
    return aov

anova_table(aov_table)

Unnamed: 0,sum_sq,df,mean_sq,F,PR(>F)
C(polymer),0.060853,2.0,0.030427,0.986597,0.401156
Residual,0.37008,12.0,0.03084,,


In [15]:
from scipy.stats import f
f.ppf(0.99,2,12)

6.9266081401913

## Anova One Factor - Unequal Sample Size: Example 1

In [16]:
df2 = pd.read_csv("anova1_csv/mg.csv")
df2

Unnamed: 0,PermMold,DieCast,Plaster
0,45.5,44.2,46.0
1,45.3,43.9,45.9
2,45.4,44.7,44.8
3,44.4,44.2,46.2
4,44.6,44.0,45.1
5,43.9,43.8,45.5
6,44.6,44.6,
7,44.0,43.1,


In [17]:
stacked_data2 = df2.stack().reset_index()
stacked_data2 = stacked_data2.rename(columns={'level_0': 'id',
                                              'level_1': 'process',
                                               0:'strength'})
stacked_data2

Unnamed: 0,id,process,strength
0,0,PermMold,45.5
1,0,DieCast,44.2
2,0,Plaster,46.0
3,1,PermMold,45.3
4,1,DieCast,43.9
5,1,Plaster,45.9
6,2,PermMold,45.4
7,2,DieCast,44.7
8,2,Plaster,44.8
9,3,PermMold,44.4


In [18]:
from statsmodels.formula.api import ols
results = ols('strength ~ C(process)', data=stacked_data2).fit()
results.summary()

0,1,2,3
Dep. Variable:,strength,R-squared:,0.569
Model:,OLS,Adj. R-squared:,0.524
Method:,Least Squares,F-statistic:,12.56
Date:,"Thu, 24 Feb 2022",Prob (F-statistic):,0.000334
Time:,09:39:18,Log-Likelihood:,-16.917
No. Observations:,22,AIC:,39.83
Df Residuals:,19,BIC:,43.11
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,44.0625,0.199,221.853,0.000,43.647,44.478
C(process)[T.PermMold],0.6500,0.281,2.314,0.032,0.062,1.238
C(process)[T.Plaster],1.5208,0.303,5.013,0.000,0.886,2.156

0,1,2,3
Omnibus:,2.206,Durbin-Watson:,1.224
Prob(Omnibus):,0.332,Jarque-Bera (JB):,1.208
Skew:,-0.213,Prob(JB):,0.546
Kurtosis:,1.933,Cond. No.,3.62


In [19]:
import statsmodels.api as sm
aov_table = sm.stats.anova_lm(results, typ=2)
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(process),7.930076,2.0,12.564679,0.000334
Residual,5.995833,19.0,,


In [20]:
from scipy.stats import f
f.ppf(0.999,2,19)

10.1568117705905