In [1]:
import pandas as pd
import numpy as np
from scipy.stats import f

In [10]:
class Anova:

    def __init__ (self):
        self.f_stats = None
        self.p_value =None

    def fit(self, data, value_col, group_col):
        self.data = data
        self.value_col = value_col
        self.group_col = group_col

        groups = data[group_col].unique()
        group_means = []
        group_sizes = []
        grand_mean = data[value_col].mean()

        ssw = 0
        for g in groups:
            group_data = data[data[group_col] == g][value_col]
            m = group_data.mean()
            n = len(group_data)
            ssw = ssw + ((group_data - m)**2).sum()

            group_means.append(m)
            group_sizes.append(n)

        sst = 0
        group_data = data[value_col]
        sst = ((group_data - grand_mean)**2).sum()

        ssb = sst - ssw

        dfb = len(groups) - 1
        dfw = len(data) - len(groups)

        msb = ssb / dfb
        msw = ssw / dfw

        self.f_stats = msb / msw
        self.p_value = 1 - f.cdf(self.f_stats, dfb, dfw)

        print('F-Value: ', self.f_stats)
        print('P-Value: ', self.p_value)

    def predict(self):
        if self.p_value < 0.05:
            print("Reject the null hypothesis: At least one group mean is different.")
        else:
            print("Fail to reject the null hypothesis: No significant difference between group means.")

In [11]:
data = pd.DataFrame({
    'Method': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'],
    'Score': [82, 85, 88, 78, 79, 80, 90, 92, 94]
})

print(data)

  Method  Score
0      A     82
1      A     85
2      A     88
3      B     78
4      B     79
5      B     80
6      C     90
7      C     92
8      C     94


In [12]:
model = Anova()

In [13]:
model.fit(data, 'Score', 'Method')
print(model.predict())

F-Value:  27.21428571428571
P-Value:  0.0009788739453649997
Reject the null hypothesis: At least one group mean is different.
None
