## Using the following data, perform a oneway analysis of variance using α=.05

## This is method is using python and scipy

In [1]:
#Hypothesis:

# H0: m1 = m2 = m3
H0 = 'There is no difference between the means.'

# Ha: m1 /= m2 /= m3
Ha = 'At least two of the means are significantly different.'

In [2]:
import scipy.stats as st
import math
import pandas as pd

In [3]:
datafile = "onewayanalysis.csv"

In [4]:
df = pd.read_csv(datafile)

In [5]:
df

Unnamed: 0,Values,Groups
0,51,Group1
1,43,Group1
2,33,Group1
3,45,Group1
4,67,Group1
5,23,Group2
6,43,Group2
7,23,Group2
8,43,Group2
9,45,Group2


In [6]:
#create boxplot
df.boxplot('Values', by='Groups', figsize=(12, 8))

<matplotlib.axes._subplots.AxesSubplot at 0x1a11e8d1d0>

In [7]:
ctrl = df['Values'][df.Groups == 'Group1']

grps = pd.unique(df.Groups.values)
d_data = {grp:df['Values'][df.Groups == grp] for grp in grps}

#values to help calculate degree of freedom
k = len(pd.unique(df.Groups)) 
N = len(df.Values)  
n = df.groupby('Groups').size()[0] 

In [8]:
#using scipy oneway analysis
F, p = st.f_oneway(d_data['Group1'],d_data['Group2'],d_data['Group3'])
print(F, p)

9.703324808184142 0.003111414131216391


In [9]:
#degrees of freedom
DFbetween = k - 1
DFwithin = N - k
DFtotal = N - 1

In [10]:
#sum of squares between
SSbetween = (sum(df.groupby('Groups').sum()['Values']**2)/n) \
- (df['Values'].sum()**2)/N

In [11]:
#sum of squares within
sum_y_squared = sum([value**2 for value in df['Values'].values])
SSwithin = sum_y_squared - sum(df.groupby('Groups').sum()['Values']**2)/n

In [12]:
#sum of squares total
SStotalS  = sum_y_squared - (df['Values'].sum()**2)/N

In [13]:
#calculate mean square between
MSbetween = SSbetween/DFbetween

#calculate mean square within
MSwithin = SSwithin/DFwithin

In [14]:
#calculating the F value
F = MSbetween/MSwithin

In [15]:
p= st.f.sf(F, DFbetween, DFwithin)

In [16]:
eta_sqrd  = SSbetween/SStotalS

In [17]:

om_sqrd = (SSbetween - (DFbetween * MSwithin))/(SStotalS + MSwithin)

In [18]:
print(F,eta_sqrd,om_sqrd)

9.703324808184119 0.6179153094462535 0.5371320337779173


In [19]:
#one-way Analysis Rejection criteria
#This means that if the critical value of F from tables is less than the calculated value of F, we reject the null hypothesis.

#F-Value for variance a=.05 and degrees of freedom : within = 14 between = 2:
f=3.89 


In [22]:
if F < f:
    print("Accept null hypothesis") 
else:
    print ("Reject null hypothesis accept alternative:\n",Ha)

Reject null hypothesis accept alternative:
 At least two of the means are significantly different.


## Another way to do a one-way analysis is using stats model

In [26]:
import statsmodels.api as sm
from statsmodels.formula.api import ols
from pandas.core import datetools
 
mod = ols('Values ~ Groups',data=df).fit()
                
aov_table = sm.stats.anova_lm(mod, typ=2)
print (aov_table)

          sum_sq    df         F    PR(>F)
Groups    3035.2   2.0  9.703325  0.003111
Residual  1876.8  12.0       NaN       NaN


In [28]:
#using the sum of squares from the table to calculate effect size
esq_sm = aov_table['sum_sq'][0]/(aov_table['sum_sq'][0]+aov_table['sum_sq'][1])

In [29]:
if aov_table['F'][0] < f:
        print("Accept null hypothesis") 
else:
    print ("Reject null hypothesis accept alternative:\n",Ha)

Reject null hypothesis accept alternative:
 At least two of the means are significantly different.
