In [1]:
import pandas as pd
import numpy as np
from scipy.stats import f_oneway, levene
import pingouin as pg
from statsmodels.stats.multicomp import pairwise_tukeyhsd

def analyze_groups(df, group_col='diagnosis', alpha=0.05):
    results = []

    # Get numeric columns (excluding the group column)
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()

    for col in numeric_cols:
        groups = df[[group_col, col]].dropna().groupby(group_col)[col].apply(list)

        if len(groups) < 2:
            continue  # Need at least two groups to compare

        # Levene's test for equal variances
        levene_stat, levene_p = levene(*groups)

        if levene_p < 0.05:
            # Use Welch's ANOVA if variances differ
            welch_result = pg.welch_anova(dv=col, between=group_col, data=df[[group_col, col]].dropna())
            p_val = welch_result['p-unc'].values[0]
            method = "Welch's ANOVA"
        else:
            # Use standard one-way ANOVA
            f_stat, p_val = f_oneway(*groups)
            method = "One-way ANOVA"

        # Prepare result
        result = {
            'variable': col,
            'method': method,
            'p_value': p_val,
            'levene_p': levene_p,
            'significant': p_val < alpha
        }

        # Optional: Tukey HSD post-hoc if ANOVA is significant and variances are equal
        if p_val < alpha and method == "One-way ANOVA":
            tukey = pairwise_tukeyhsd(endog=df[col], groups=df[group_col], alpha=alpha)
            result['tukey_summary'] = tukey_df = pd.DataFrame(data=tukey._results_table.data[1:], columns=tukey._results_table.data[0])
        elif p_val < alpha and method == "Welch's ANOVA":
            # Games-Howell post-hoc from pingouin
            posthoc = pg.pairwise_gameshowell(dv=col, between=group_col, data=df[[group_col, col]].dropna())
            result['gameshowell_summary'] = posthoc

        results.append(result)

    return results

In [None]:
df = pd.read_csv("/dementia_prediction/data.csv)

In [None]:
analyze_groups(df)