In [2]:
# ONE WAY ANOVA
import numpy as np
from scipy.stats import f, f_oneway
 
# Dataset
year_1_scores = np.array([82, 93, 61, 74, 69, 70, 53])
year_2_scores = np.array([71, 62, 85, 94, 78, 66, 71])
year_3_scores = np.array([64, 73, 87, 91, 56, 78, 87])
 
# Combine all scores for one-way ANOVA
all_scores = np.concatenate([year_1_scores, year_2_scores, year_3_scores])
 
# Perform one-way ANOVA
f_statistic, p_value = f_oneway(year_1_scores, year_2_scores, year_3_scores)
 
# Calculate the overall mean
overall_mean = np.mean(all_scores)
 
# Calculate the total sum of squares (SST)
sst = np.sum((all_scores - overall_mean)**2)
 
# Calculate the sum of squares between groups (SSC)
ssc = np.sum(len(year_1_scores) * (np.mean(year_1_scores) - overall_mean)**2 +
             len(year_2_scores) * (np.mean(year_2_scores) - overall_mean)**2 +
             len(year_3_scores) * (np.mean(year_3_scores) - overall_mean)**2)
 
# Calculate the sum of squares within groups (SSB)
ssb = sst - ssc
 
# Calculate the degrees of freedom for each component
df_ssc = 3 - 1  # Number of groups minus 1
df_ssb = len(all_scores) - 3  # Total number of observations minus number of groups
df_total = len(all_scores) - 1  # Total number of observations minus 1
 
# Calculate the mean square between (MSC) and mean square error (MSE)
msc = ssc / df_ssc
mse = ssb / df_ssb
 
# Calculate the critical value for the F-statistic
alpha = 0.05
critical_value = f.ppf(1 - alpha, df_ssc, df_ssb)
 
# Display results
print("One-Way ANOVA F-Statistic:", f_statistic)
print("P-Value:", p_value)
print("SST (Total Sum of Squares):", sst)
print("SSC (Between-Group Sum of Squares):", ssc)
print("SSB (Within-Group Sum of Squares):", ssb)
print("MSC (Mean Square Between):", msc)
print("MSE (Mean Square Error):", mse)
print("Critical Value:", critical_value)

One-Way ANOVA F-Statistic: 0.2837261275904103
P-Value: 0.7562784116739912
SST (Total Sum of Squares): 2901.2380952380954
SSC (Between-Group Sum of Squares): 88.66666666666691
SSB (Within-Group Sum of Squares): 2812.5714285714284
MSC (Mean Square Between): 44.33333333333346
MSE (Mean Square Error): 156.25396825396825
Critical Value: 3.554557145661787


In [1]:
# TWO WAY ANOVA WITH REPLICATION
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import f
 
# Provided data
data = {
    'FRESHMAN': [3, 1, 3, 2, 4, 4, 3, 5, 4, 3, 8, 6, 4, 5, 5],
    'SOPHOMORE': [4, 2, 3, 4, 3, 3, 5, 4, 5, 5, 6, 8, 6, 7, 5],
    'JUNIOR': [5, 5, 4, 3, 4, 6, 4, 5, 5, 6, 6, 8, 7, 7, 9],
    'SENIOR': [6, 6, 7, 8, 5, 7, 8, 7, 9, 6, 7, 8, 10, 9, 8]
}
 
ICECREAM = ['SWOOPS', 'SWOOPS', 'SWOOPS', 'SWOOPS', 'SWOOPS',
         'BALDYS', 'BALDYS', 'BALDYS', 'BALDYS', 'BALDYS',
         'THE ROOST', 'THE ROOST', 'THE ROOST', 'THE ROOST', 'THE ROOST']
 
# Create a DataFrame
df = pd.DataFrame(data)
df['Brand'] = ICECREAM
 
# Melt the DataFrame for easier analysis
melted_df = pd.melt(df, id_vars=['Brand'], value_vars=['FRESHMAN', 'SOPHOMORE', 'JUNIOR', 'SENIOR'], var_name='Factor', value_name='Score')
 
# Fit a two-way ANOVA model with replication
model = ols('Score ~ C(Factor) * C(Brand)', data=melted_df).fit()
 
# Perform ANOVA
anova_table = sm.stats.anova_lm(model, type=2)
 
# Display the ANOVA table
print(anova_table)
 
# Calculate critical values for F-statistics
alpha = 0.05
 
# Critical value for Factor A
dof_factor_a = anova_table['df'][0]
dof_within = anova_table['df'][-1]
#print("Dof Within", dof_within)
critical_value_factor_a = f.ppf(1 - alpha, dof_factor_a, dof_within)
print("Critical Value for Factor A:", critical_value_factor_a)
 
# Critical value for Brand B
dof_brand_b = anova_table['df'][1]
critical_value_brand_b = f.ppf(1 - alpha, dof_brand_b, dof_within)
print("Critical Value for Brand B:", critical_value_brand_b)
 
# Critical value for Factor A x Brand B interaction
dof_interaction = anova_table['df'][2]
critical_value_interaction = f.ppf(1 - alpha, dof_interaction, dof_within)
print("Critical Value for Interaction:", critical_value_interaction)
 
# Compare F-statistics with critical values
is_factor_a_significant = anova_table['F'][0] > critical_value_factor_a
is_brand_b_significant = anova_table['F'][1] > critical_value_brand_b
is_interaction_significant = anova_table['F'][2] > critical_value_interaction
 
# Print results
print("Factor A is significant:", is_factor_a_significant)
print("Brand B is significant:", is_brand_b_significant)
print("Interaction is significant:", is_interaction_significant)

                      df     sum_sq    mean_sq          F        PR(>F)
C(Factor)            3.0  98.050000  32.683333  28.627737  9.246665e-11
C(Brand)             2.0  82.633333  41.316667  36.189781  2.609297e-10
C(Factor):C(Brand)   6.0   3.100000   0.516667   0.452555  8.395798e-01
Residual            48.0  54.800000   1.141667        NaN           NaN
Critical Value for Factor A: 2.79806063543561
Critical Value for Brand B: 3.1907273359284987
Critical Value for Interaction: 2.294601313470631
Factor A is significant: True
Brand B is significant: True
Interaction is significant: False
