In [None]:
pip install pingouin

# 1 Way ANOVA

In [8]:
import pandas as pd
import numpy as np
from scipy import stats

# Create dataframe
data = {
    'Method': ['Bounding Box'] * 4 + ['SAM'] * 4 + ['SAM2'] * 4 + ['Automated'] * 4,
    'Annotations': [299, 328, 639, 179,  # Bounding Box
                   376, 306, 1025, 251,  # SAM
                   295, 326, 1333, 298,  # SAM2
                   561, 770, 1090, 630]  # Automated
}

# Add Site information
data['Site'] = ['Natural Reef', 'Seagrass', 'Art Reef', 'Sand'] * 4

# Create DataFrame
df = pd.DataFrame(data)

# Prepare data for analysis
methods = df['Method'].unique()
sites = df['Site'].unique()

# Print descriptive statistics
print("Descriptive Statistics:")
print(df.groupby(['Method', 'Site'])['Annotations'].describe())

# Perform two-way ANOVA using scipy
from scipy import stats

# Reshape data for scipy
data_by_method_site = []
for method in methods:
    method_data = []
    for site in sites:
        values = df[(df['Method'] == method) & (df['Site'] == site)]['Annotations'].values
        method_data.append(values)
    data_by_method_site.append(method_data)

# Perform two-way ANOVA
f_statistic, p_value = stats.f_oneway(*[df[df['Method'] == method]['Annotations'] for method in methods])

print("\nOne-way ANOVA Results (Method effect):")
print(f"F-statistic: {f_statistic}")
print(f"p-value: {p_value}")

# Calculate means for each method
print("\nMean annotations by Method:")
print(df.groupby('Method')['Annotations'].mean())

print("\nMean annotations by Site:")
print(df.groupby('Site')['Annotations'].mean())

Descriptive Statistics:
                           count    mean  std     min     25%     50%     75%  \
Method       Site                                                               
Automated    Art Reef        1.0  1090.0  NaN  1090.0  1090.0  1090.0  1090.0   
             Natural Reef    1.0   561.0  NaN   561.0   561.0   561.0   561.0   
             Sand            1.0   630.0  NaN   630.0   630.0   630.0   630.0   
             Seagrass        1.0   770.0  NaN   770.0   770.0   770.0   770.0   
Bounding Box Art Reef        1.0   639.0  NaN   639.0   639.0   639.0   639.0   
             Natural Reef    1.0   299.0  NaN   299.0   299.0   299.0   299.0   
             Sand            1.0   179.0  NaN   179.0   179.0   179.0   179.0   
             Seagrass        1.0   328.0  NaN   328.0   328.0   328.0   328.0   
SAM          Art Reef        1.0  1025.0  NaN  1025.0  1025.0  1025.0  1025.0   
             Natural Reef    1.0   376.0  NaN   376.0   376.0   376.0   376.0   
    

# 2 Way ANOVA

In [9]:
import pandas as pd
import numpy as np
from scipy import stats
import pingouin as pg

# Create dataframe
data = {
    'Method': ['Bounding Box'] * 4 + ['SAM'] * 4 + ['SAM2'] * 4 + ['Automated'] * 4,
    'Site': ['Natural Reef', 'Seagrass', 'Art Reef', 'Sand'] * 4,
    'Annotations': [299, 328, 639, 179,  # Bounding Box
                   376, 306, 1025, 251,  # SAM
                   295, 326, 1333, 298,  # SAM2
                   561, 770, 1090, 630]   # Automated
}

# Create DataFrame
df = pd.DataFrame(data)

# Perform two-way ANOVA using pingouin
aov = pg.anova(dv='Annotations', 
               between=['Method', 'Site'],
               data=df,
               detailed=True)

print("Two-way ANOVA Results:")
print(aov)

# Print means for reference
print("\nMean annotations by Method:")
print(df.groupby('Method')['Annotations'].mean())
print("\nMean annotations by Site:")
print(df.groupby('Site')['Annotations'].mean())

Two-way ANOVA Results:
          Source          SS  DF             MS  np2
0         Method   338321.25   3  112773.750000  1.0
1           Site  1233996.25   3  411332.083333  1.0
2  Method * Site   228130.25   9   25347.805556  1.0
3       Residual        0.00   0            NaN  NaN

Mean annotations by Method:
Method
Automated       762.75
Bounding Box    361.25
SAM             489.50
SAM2            563.00
Name: Annotations, dtype: float64

Mean annotations by Site:
Site
Art Reef        1021.75
Natural Reef     382.75
Sand             339.50
Seagrass         432.50
Name: Annotations, dtype: float64


  ms_resid = ss_resid / df_resid


# Tukey's HSD

In [10]:
import pandas as pd
import numpy as np
from statsmodels.stats.multicomp import pairwise_tukeyhsd

data = {
    'Method': ['Bounding Box'] * 4 + ['SAM'] * 4 + ['SAM2'] * 4 + ['Automated'] * 4,
    'Annotations': [299, 328, 639, 179,  # Bounding Box
                   376, 306, 1025, 251,  # SAM
                   295, 326, 1333, 298,  # SAM2
                   561, 770, 1090, 630]  # Automated
}
df = pd.DataFrame(data)

# Perform Tukey's HSD
tukey = pairwise_tukeyhsd(df['Annotations'], df['Method'])

print("Tukey's HSD Test Results:")
print(tukey)

# Calculate mean for each method
method_means = df.groupby('Method')['Annotations'].mean()
print("\nMean annotations per method:")
print(method_means)

# The HSD value is calculated as:
# HSD = q * sqrt(MSE / n)
# where:
# q = studentized range statistic
# MSE = Mean Square Error from ANOVA
# n = number of observations per group

# Manual calculation example
def calculate_hsd(q, mse, n):
    return q * np.sqrt(mse / n)

# Example values (these would normally come from your ANOVA)
q_value = 3.63  # for α=0.05, k=4 groups, df=12
mse = np.var(df['Annotations'], ddof=1)  # approximation
n = 4  # observations per group

hsd = calculate_hsd(q_value, mse, n)
print("\nCalculated HSD value:", hsd)

# Compare two means manually
print("\nExample manual comparison:")
mean_diff = abs(method_means['Automated'] - method_means['Bounding Box'])
print(f"Difference between Automated and Bounding Box: {mean_diff}")
print(f"Is difference significant? {mean_diff > hsd}")

Tukey's HSD Test Results:
        Multiple Comparison of Means - Tukey HSD, FWER=0.05         
   group1       group2    meandiff p-adj    lower     upper   reject
--------------------------------------------------------------------
   Automated Bounding Box   -401.5  0.401 -1134.2952 331.2952  False
   Automated          SAM  -273.25 0.6922 -1006.0452 459.5452  False
   Automated         SAM2  -199.75 0.8489  -932.5452 533.0452  False
Bounding Box          SAM   128.25 0.9528  -604.5452 861.0452  False
Bounding Box         SAM2   201.75 0.8452  -531.0452 934.5452  False
         SAM         SAM2     73.5 0.9903  -659.2952 806.2952  False
--------------------------------------------------------------------

Mean annotations per method:
Method
Automated       762.75
Bounding Box    361.25
SAM             489.50
SAM2            563.00
Name: Annotations, dtype: float64

Calculated HSD value: 628.8126371314829

Example manual comparison:
Difference between Automated and Bounding Box: 401.5