In [30]:
import numpy as np
from scipy.stats import f

class TwoWayAnova:
    def __init__(self):
        self.results = {}

    def fit(self, data, value_col, factor_a, factor_b):
        self.data = data
        self.value_col = value_col
        self.factor_a = factor_a
        self.factor_b = factor_b

        a_levels = data[factor_a].unique()
        b_levels = data[factor_b].unique()
        grand_mean = data[value_col].mean()

        dfA = len(a_levels) - 1
        dfB = len(b_levels) - 1
        dfAB = dfA * dfB
        dfW = len(data) - (len(a_levels) * len(b_levels))

        # SS for Factor A
        SSA = 0
        for a in a_levels:
            group = data[data[factor_a] == a][value_col]
            SSA += len(group) / len(b_levels) * (group.mean() - grand_mean) ** 2

        # SS for Factor B
        SSB = 0
        for b in b_levels:
            group = data[data[factor_b] == b][value_col]
            SSB += len(group) / len(a_levels) * (group.mean() - grand_mean) ** 2

        # SS for Interaction
        SSAB = 0
        for a in a_levels:
            for b in b_levels:
                group = data[(data[factor_a] == a) & (data[factor_b] == b)][value_col]
                if len(group) == 0:
                    continue
                mean_ab = group.mean()
                mean_a = data[data[factor_a] == a][value_col].mean()
                mean_b = data[data[factor_b] == b][value_col].mean()
                SSAB += len(group) * ((mean_ab - mean_a - mean_b + grand_mean) ** 2)

        # SS Within/Error
        SSW = 0
        for a in a_levels:
            for b in b_levels:
                group = data[(data[factor_a] == a) & (data[factor_b] == b)][value_col]
                if len(group) == 0:
                    continue
                mean_group = group.mean()
                SSW += ((group - mean_group) ** 2).sum()

        # Mean Squares
        MSA = SSA / dfA
        MSB = SSB / dfB
        MSAB = SSAB / dfAB
        MSW = SSW / dfW

        # F-statistics
        FA = MSA / MSW
        FB = MSB / MSW
        FAB = MSAB / MSW

        # p-values
        pA = 1 - f.cdf(FA, dfA, dfW)
        pB = 1 - f.cdf(FB, dfB, dfW)
        pAB = 1 - f.cdf(FAB, dfAB, dfW)

        self.results = {
            'Factor A': {'SS': SSA, 'DF': dfA, 'MS': MSA, 'F': FA, 'p': pA},
            'Factor B': {'SS': SSB, 'DF': dfB, 'MS': MSB, 'F': FB, 'p': pB},
            'Interaction': {'SS': SSAB, 'DF': dfAB, 'MS': MSAB, 'F': FAB, 'p': pAB},
            'Error': {'SS': SSW, 'DF': dfW, 'MS': MSW}
        }

    def summary(self):
        print(f"{'Source':<12} {'SS':>10} {'DF':>5} {'MS':>10} {'F':>10} {'p-value':>10}")
        print("-" * 60)
        for k, v in self.results.items():
            line = f"{k:<12} {v['SS']:>10.2f} {v['DF']:>5} {v['MS']:>10.2f}"
            if 'F' in v:
                line += f" {v['F']:>10.2f} {v['p']:>10.4f}"
            print(line)


In [18]:
import pandas as pd

In [74]:
df = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DataSet.xlsx")

In [20]:
df.head()

Unnamed: 0,Alginate,Guargum,Pectin,Days,Weight_loss,Firmness,TSS,pH,L*,a*,b*,delta_E,TPC,DPPH
0,0.0,0.0,0.0,0,0.0,721.0,7.9,4.12,19.62,9.51,21.46,0.0,510.83,67.76
1,0.0,0.0,0.0,2,18.19776,468.69,4.76,4.11,15.48,6.85,18.87,6.508748,716.88,123.2
2,3.0,0.0,0.0,2,18.43008,500.96,4.85,4.07,16.47,7.51,19.52,4.172649,498.13,120.1
3,0.0,3.0,0.0,2,18.33792,513.9,4.8,4.08,15.93,6.98,19.46,5.33911,436.88,161.5
4,0.0,0.0,3.0,2,18.47232,591.24,5.04,4.11,17.41,8.42,19.81,2.965586,413.13,162.3


In [73]:
model = TwoWayAnova()
model.fit(df, 'delta_E', 'Days', 'Pectin')
model.summary()

Source               SS    DF         MS          F    p-value
------------------------------------------------------------
Factor A         169.03     3      56.34       9.44     0.0029
Factor B           8.61     5       1.72       0.29     0.9088
Interaction        6.94    15       0.46       0.08     1.0000
Error             59.69    10       5.97


In [89]:
df_1 = pd.read_excel(r"C:\Users\saqli\Desktop\FULL DATASET_ORIGINAL.xlsx")
df_new = pd.concat([df, df_1, df], ignore_index=True)
print(df_new)


     Alginate  Guargum  Pectin  Days  Weight_loss  Firmness   TSS    pH  \
0         0.0      0.0     0.0     0      0.00000    721.00  7.90  4.12   
1         0.0      0.0     0.0     2     18.19776    468.69  4.76  4.11   
2         3.0      0.0     0.0     2     18.43008    500.96  4.85  4.07   
3         0.0      3.0     0.0     2     18.33792    513.90  4.80  4.08   
4         0.0      0.0     3.0     2     18.47232    591.24  5.04  4.11   
..        ...      ...     ...   ...          ...       ...   ...   ...   
97        0.0      1.5     1.5     6     15.33504    230.17  1.30  4.42   
98        2.0      0.5     0.5     6     17.61216    402.05  4.48  4.38   
99        0.5      2.0     0.5     6     16.12608    301.92  2.88  4.36   
100       0.5      0.5     2.0     6     16.79808    317.31  3.18  4.42   
101       1.0      1.0     1.0     6     16.29696    382.75  4.19  4.46   

        L*    a*     b*    delta_E      TPC    DPPH  
0    19.62  9.51  21.46   0.000000   510.83  

In [96]:
model = TwoWayAnova()
model.fit(df_new, 'b*', 'Days', 'Pectin')
model.summary()

Source               SS    DF         MS          F    p-value
------------------------------------------------------------
Factor A         290.18     3      96.73     505.69     0.0000
Factor B           2.24     5       0.45       2.35     0.0488
Interaction        9.32    15       0.62       3.25     0.0003
Error             14.92    78       0.19
