In [1]:
import numpy as np
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd

def analyze_classifiers(all_data):
    """
    Run ANOVA + Tukey HSD for three classifiers (CvT, Swin, vViT).
    ------
    ANOVA results, Tukey HSD table, and plain-English interpretation.
    """
    
    # Flatten classifier results across experiments
    cvt_vals = np.concatenate([exp[0] for exp in all_data])
    swin_vals = np.concatenate([exp[1] for exp in all_data])
    vit_vals  = np.concatenate([exp[2] for exp in all_data])

    # One-way ANOVA
    f_stat, p_val = stats.f_oneway(cvt_vals, swin_vals, vit_vals)
    print("ANOVA results (classifiers only):")
    print(f"F-statistic = {f_stat:.2f}, p-value = {p_val:.3e}")

    # Tukey HSD
    values = np.concatenate([cvt_vals, swin_vals, vit_vals])
    groups = (["CvT"] * len(cvt_vals) +
              ["Swin"] * len(swin_vals) +
              ["vViT"] * len(vit_vals))

    tukey = pairwise_tukeyhsd(endog=values, groups=groups, alpha=0.05)
    print("\nTukey HSD results (classifiers only):")
    print(tukey.summary())

    # Interpretation
    print("\nInterpretation:")
    if p_val < 0.05:
        print("ANOVA shows significant differences among CvT, Swin, and vViT.")
    else:
        print("No overall group differences found (ANOVA not significant).")

    for res in tukey.summary().data[1:]:
        g1, g2, meandiff, p_adj, lower, upper, reject = res
        if reject:
            direction = "higher" if meandiff > 0 else "lower"
            print(f"{g1} is significantly {direction} than {g2} "
                  f"(mean diff = {meandiff:.2f}, p < {p_adj:.3f}).")
        else:
            print(f"No significant difference between {g1} and {g2}.")


In [11]:
all_data_pl = [[[4.74, 4.79, 4.83], [4.72, 4.71, 4.69], [4.81, 4.74, 4.75], [4.72, 4.70, 4.81]],
            [[4.77, 5.10, 4.99], [4.74, 4.73, 4.81], [5.36, 5.32, 5.25], [4.86, 4.96, 4.75]],
            [[4.81, 5.9, 4.77], [4.72, 4.72, 4.73], [4.76, 4.8, 4.89], [4.71, 4.73, 4.73]],
            [[5.58, 5.58, 5.40], [4.71, 4.71, 4.73], [5.14, 5.04, 5.11], [5.18, 5.00, 5.12]],
            # new
            [[5.37, 5.37, 5.38], [4.74, 4.74, 4.75], [5.23, 5.06, 5.11], [5.19, 5.01, 5.10]]]


all_data_pa = [[[4.79, 4.86, 4.51], [4.22, 4.21, 4.21],[5.48, 5.42, 5.46], [4.66, 4.75, 4.87]], #bar
            [[4.46, 4.49, 4.57], [4.21, 4.22, 4.22],[5.49, 5.51, 5.46], [5.10, 4.39, 4.52]], #pie
            [[4.67, 4.53, 4.83], [4.21, 4.21, 4.22],[5.39, 5.46, 5.48], [4.87, 5.08, 4.68]]] #pie_aa

all_data_bfr = [[[4.77, 4.73, 4.79], [4.74, 4.77, 4.75], [5.49, 5.4, 5.49], [5.49, 4.94, 4.78]], # rect  [[cvt],[swin],[vit]]
            [[4.79, 4.71, 4.71], [4.76, 4.74, 4.76], [5.34, 5.23, 5.21], [5.22, 5.50, 5.25]]] # bar


all_data_weber = [[[9, 9, 9.37], [5.21, 7.48, 7.17], [8.14, 8.27, 8.25], [9.01, 9, 9.03]],
            [[9, 9, 9], [8.43, 4.8, 4.87], [7.32, 7.45, 7.30], [8.16, 8.18, 8.15]],
            [[6.19, 7.48, 7.72], [5.48, 5.79, 5.93], [4.79, 4.79, 4.79],[4.8, 4.8, 4.8]]]

all_data_pcs =[[[4.6475, 4.6867, 4.2233, 4.7090, 4.7528, 4.3433, 4.7120, 4.9461, 4.1102],
                [1.0408, 1.4773, 3.3962, 0.8783, 1.4136, 3.8111, 3.5096, 1.6000],
                [4.6881, 4.6780, 4.6841, 4.7034, 4.6774, 4.6807, 4.7091, 4.6832, 4.6843]], # po_c_s
               [[1.3326, 4.1530, 4.1737, 4.9184, 0.9315, 0.3265, 5.1861, 4.4959, 0.3879],
               [4.1712, 4.2372, 5.2907, 2.8838, -0.1424, 5.3088, 3.8682, -0.3532],
               [4.7558, 4.7375, 5.1604, 4.7013, 4.6974, 5.1417, 4.7086, 4.6866]], # po_naS
               [[-0.2615, 2.3640, 1.1541, 1.3233, 0.0740, 2.3684, 1.1078, 1.3976, 4.8730, 5.4232, 0.9908, 1.3588, 5.9090, 5.6354, 7.7115, 1.3453],
               [-0.1031, 1.4313, 2.0132, 1.0071, 0.0512, 1.0731, 2.0359,  5.4074, 5.2941, 0.9899, 2.0211, 4.6754, 4.4382, 5.7243, 1.9769],
               [1.2783, 1.1193, 1.9675, 1.8651, 1.2908, 1.1779, 2.0805, 1.8956, 5.4032, 5.4260, 2.0062, 2.0678, 5.4649, 5.4918, 4.0204, 2.0240]], # length
               [[2.0529, 1.2522, 2.0714, 4.4208, 1.3300, 2.2085, 4.6764, 4.6566, 2.0680],
                [0.6481, 1.0000, 4.6830,  3.7663, 0.8735, 4.6861, 4.9196, 4.6377, 4.6812],
                [3.4153, 4.5852, 4.6709,4.6877, 4.6075, 4.6691,4.7324, 4.6751, 4.6723]], # direction
               [[1.6381, 1.9553, 2.1922, 3.7514, 2.0162, 2.1734, 3.9181, 2.4947, 2.1794],
                [0.8057, 1.7375, 2.3087, 5.3989, 1.6938, 2.2149, 5.8056, 4.3184, 2.2126],
                [4.7106, 4.7000, 4.6601, 4.6933, 4.6829, 4.6753, 4.7034, 4.6931]], # angle
               [[2.8226, 1.2471, 4.0007, 3.3340, 1.6536, 3.9931, 3.3288, 2.3639, 3.9676], 
                [0.9847, 2.4734, 1.1695, 1.6539, 2.4587, 1.1194, 1.8373, 2.4617, 1.1364], 
                [3.6141, 2.8862, 2.7350, 3.3415, 2.9742, 2.6803, 3.3714, 3.1759, 2.7153]], # area
               [[3.6220, 3.6698, 2.0363, 3.9530, 3.5850, 2.0441, 4.0943, 3.7903, 1.9940],
                [3.6646, 2.9127, 4.4665, 3.5003, 2.9205, 4.3878, 3.9148, 3.0704, 4.2699],
                [3.3659, 3.5218, 2.1463, 3.7234, 3.5971, 2.1760, 4.4058, 4.4301, 2.2110]], # volume
               [[2.0191, 1.9570, 1.7805, 3.6163, 1.2731, 1.7446, 3.6929, 2.1529, 1.7479], 
                [1.6122, 0.8810, 4.4474, 1.5979, 0.2939, 4.3190, 2.0893, 0.3884], 
               [2.7471, 2.8682, 2.7412, 4.0743, 3.0306, 2.6357, 4.0604, 3.0324, 2.7224]], # curvature
               [[2.8101, 2.7428, 0.8762, 3.9363, 2.6926, 0.8854, 4.0893, 3.0997, 0.8423],
                [0.9985, 1.4175, 0.1173, 2.0813, 1.4762, 0.1114, 1.9979, 1.5778, 0.0680],
                [3.3665, 2.5764, 3.3512, 3.6944, 2.7153, 3.4323, 3.7542, 2.8038, 3.5089]]]


print("for position length")
analyze_classifiers(all_data_pl)
print("------------------------------------------------\n")
print("for position angle")
analyze_classifiers(all_data_pa)
print("------------------------------------------------\n")
print("for bar rectangle")
analyze_classifiers(all_data_bfr)
print("------------------------------------------------\n")
print("for weber")
analyze_classifiers(all_data_weber)
print("------------------------------------------------\n")
print("for elementary perceptual")
analyze_classifiers(all_data_pcs)



for position length
ANOVA results (classifiers only):
F-statistic = 11.44, p-value = 1.084e-04

Tukey HSD results (classifiers only):
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   CvT   Swin  -0.4287 0.0001 -0.6515 -0.2059   True
   CvT   vViT   -0.134 0.3197 -0.3568  0.0888  False
  Swin   vViT   0.2947  0.007  0.0719  0.5175   True
----------------------------------------------------

Interpretation:
ANOVA shows significant differences among CvT, Swin, and vViT.
CvT is significantly lower than Swin (mean diff = -0.43, p < 0.000).
No significant difference between CvT and vViT.
Swin is significantly higher than vViT (mean diff = 0.29, p < 0.007).
------------------------------------------------

for position angle
ANOVA results (classifiers only):
F-statistic = 419.14, p-value = 2.161e-19

Tukey HSD results (classifiers only):
Multiple Comparison of Means - Tukey HSD, FW