<a href="https://colab.research.google.com/github/tejas-gkt/Statistics/blob/master/ANOVA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [2]:
# creating the dataframe

data = {'Treatment-1': [67, 42, 67, 56, 62, 64, 59, 72, 71, 60],
        'Treatment-2': [50, 52, 43, 67, 67, 59, 67, 64, 63, 65],
        'Treatment-3': [48, 49, 50, 55, 56, 61, 61, 60, 59, 64],
        'Treatment-4': [47, 67, 54, 67, 68, 65, 65, 56, 60, 65]}

df = pd.DataFrame(data)    # wide format
df

Unnamed: 0,Treatment-1,Treatment-2,Treatment-3,Treatment-4
0,67,50,48,47
1,42,52,49,67
2,67,43,50,54
3,56,67,55,67
4,62,67,56,68
5,64,59,61,65
6,59,67,61,65
7,72,64,60,56
8,71,63,59,60
9,60,65,64,65


In [3]:
# convert wide format to long format
# in order to pass data into ols, we will melt the data from wide to long
data_melt = df.melt(var_name='Treatment',value_name="Value")

In [4]:
data_melt

Unnamed: 0,Treatment,Value
0,Treatment-1,67
1,Treatment-1,42
2,Treatment-1,67
3,Treatment-1,56
4,Treatment-1,62
5,Treatment-1,64
6,Treatment-1,59
7,Treatment-1,72
8,Treatment-1,71
9,Treatment-1,60


In [6]:
# Hypothesis
# Null Hypothesis: Mean of all treatment are same
# Alternate: Atleast one treatement is different

In [7]:
# We accept Null Hyposthesis if;
# F-value is close to 1
# P value > 0.05 (alpha)


In [8]:
# OLS method
model = ols('Value ~ C(Treatment)', data=data_melt).fit()

#perfrom anova
anova_table = sm.stats.anova_lm(model)

#tableoutput
print(anova_table)

                df  sum_sq    mean_sq         F   PR(>F)
C(Treatment)   3.0   196.5  65.500000  1.144327  0.34436
Residual      36.0  2060.6  57.238889       NaN      NaN


In [12]:
r_square = model.rsquared
print(r_square*100)

8.705861503699442


In [13]:
# Performing ANOVA without data as dataframe
# Library

from scipy.stats import f_oneway

# data
Treatment_1 = [67, 42, 67, 56, 62, 64, 59, 72, 71, 60]
Treatment_2 = [50, 52, 43, 67, 67, 59, 67, 64, 63, 65]
Treatment_3 = [48, 49, 50, 55, 56, 61, 61, 60, 59, 64]
Treatment_4 = [47, 67, 54, 67, 68, 65, 65, 56, 60, 65]

f_stats, p_value = f_oneway(Treatment_1,Treatment_2,Treatment_3,Treatment_4)

# Output
print(f'F-Stats: {f_stats}', f'P-Value: {p_value}', sep='\n')

F-Stats: 1.144326895079103
P-Value: 0.3443595629359094
