In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import shapiro, pearsonr

In [3]:
older_adults_trial_level = pd.read_csv("../../data/processed/older_adults/filtered_measurements.csv")
older_adults_average_level = pd.read_csv("../../data/processed/older_adults/average_measurements.csv")
students_trial_level = pd.read_csv("../../data/processed/students/measurements.csv")
students_average_level = pd.read_csv("../../data/processed/students/average_measurements.csv")

In [4]:
def check_statistical_assumptions_print_and_save(df, average_level=False):
    """
    Checks and prints normality and homoscedasticity assumptions
    for each (state, metric) comparing Force_Plate vs ZED_COM.
    Also saves the results to device_comparison/assumptions/ as a CSV file.

    Parameters:
    - df: pandas DataFrame with ['participant name', 'state', 'device', 'metric', 'value' or 'average_value']
    - average_level: bool, if True uses 'average_value', otherwise 'value'
    """
    value_col = 'average_value' if average_level else 'value'
    df_filtered = df[df['device'].isin(['Force_Plate', 'ZED_COM'])]
    results = []

    header = f"{'State':<10} {'Metric':<25} {'n':<4} {'Normality':<15} {'Homoscedasticity':<20} {'Summary'}"
    print(header)
    print("-" * len(header))

    for (state, metric), group in df_filtered.groupby(['state', 'metric']):
        pivot_index = ['participant name']
        if not average_level and 'trial' in group.columns:
            pivot_index.insert(1, 'trial')

        pivot = group.pivot_table(
            index=pivot_index,
            columns='device',
            values=value_col
        ).dropna(subset=['Force_Plate', 'ZED_COM'])

        n = pivot.shape[0]
        if n < 3:
            summary = {
                'state': state, 'metric': metric, 'n': n,
                'normality': 'Too few', 'heteroscedasticity': 'Too few',
                'summary': 'Too few samples'
            }
            results.append(summary)
            print(f"{state:<10} {metric:<25} {n:<4} {'Too few':<15} {'Too few':<20} Too few samples")
            continue

        diff = pivot['Force_Plate'] - pivot['ZED_COM']
        mean = (pivot['Force_Plate'] + pivot['ZED_COM']) / 2
        abs_diff = np.abs(diff)

        shapiro_p = shapiro(diff).pvalue
        normality = 'Normal' if shapiro_p >= 0.05 else 'Not normal'

        r_val, r_p = pearsonr(mean, abs_diff)
        hetero = 'Yes' if r_p < 0.05 and abs(r_val) > 0.3 else 'No'

        if normality == 'Normal' and hetero == 'No':
            summary_text = 'Assumptions met'
        elif normality != 'Normal' and hetero == 'Yes':
            summary_text = 'Both violated'
        elif normality != 'Normal':
            summary_text = 'Non-normal'
        else:
            summary_text = 'Heterosced'

        results.append({
            'state': state,
            'metric': metric,
            'n': n,
            'normality': normality,
            'heteroscedasticity': hetero,
            'summary': summary_text
        })

        print(f"{state:<10} {metric:<25} {n:<4} {normality:<15} {hetero:<20} {summary_text}")

    # Determine the filename
    folder = "assumptions"
    os.makedirs(folder, exist_ok=True)

    if df is older_adults_trial_level:
        filename = "oa_trial_assumptions.csv"
    elif df is older_adults_average_level:
        filename = "oa_average_assumptions.csv"
    elif df is students_trial_level:
        filename = "st_trial_assumptions.csv"
    elif df is students_average_level:
        filename = "st_average_assumptions.csv"
    else:
        filename = "unknown_dataset_assumptions.csv"

    # Save results
    save_path = os.path.join(folder, filename)
    pd.DataFrame(results).to_csv(save_path, index=False)
    print(f" Saved assumptions summary to: {save_path}")

## Older Adults

In [5]:
check_statistical_assumptions_print_and_save(older_adults_trial_level)

State      Metric                    n    Normality       Homoscedasticity     Summary
--------------------------------------------------------------------------------------
closed     AP MAD                    184  Normal          Yes                  Heterosced
closed     AP Max abs dev            184  Not normal      Yes                  Both violated
closed     AP MedAD                  184  Not normal      Yes                  Both violated
closed     AP RMS                    184  Not normal      Yes                  Both violated
closed     AP Range                  184  Not normal      Yes                  Both violated
closed     AP STD                    184  Not normal      Yes                  Both violated
closed     AP Variance               184  Not normal      Yes                  Both violated
closed     Ellipse area              184  Not normal      Yes                  Both violated
closed     ML MAD                    184  Normal          Yes                  Hetero

In [6]:
check_statistical_assumptions_print_and_save(older_adults_average_level, average_level=True)

State      Metric                    n    Normality       Homoscedasticity     Summary
--------------------------------------------------------------------------------------
closed     AP MAD                    37   Normal          No                   Assumptions met
closed     AP Max abs dev            37   Not normal      Yes                  Both violated
closed     AP MedAD                  37   Normal          No                   Assumptions met
closed     AP RMS                    37   Normal          No                   Assumptions met
closed     AP Range                  37   Not normal      Yes                  Both violated
closed     AP STD                    37   Normal          No                   Assumptions met
closed     AP Variance               37   Not normal      Yes                  Both violated
closed     Ellipse area              37   Not normal      Yes                  Both violated
closed     ML MAD                    37   Normal          No              

## Students

In [7]:
check_statistical_assumptions_print_and_save(students_trial_level)

State      Metric                    n    Normality       Homoscedasticity     Summary
--------------------------------------------------------------------------------------
closed     AP MAD                    134  Not normal      No                   Non-normal
closed     AP Max abs dev            134  Not normal      No                   Non-normal
closed     AP MedAD                  134  Not normal      No                   Non-normal
closed     AP RMS                    134  Normal          No                   Assumptions met
closed     AP Range                  134  Normal          No                   Assumptions met
closed     AP STD                    134  Not normal      No                   Non-normal
closed     AP Variance               134  Not normal      Yes                  Both violated
closed     Ellipse area              134  Normal          Yes                  Heterosced
closed     ML MAD                    134  Normal          No                   Assumptions me

In [8]:
check_statistical_assumptions_print_and_save(students_average_level, average_level=True)

State      Metric                    n    Normality       Homoscedasticity     Summary
--------------------------------------------------------------------------------------
closed     AP MAD                    27   Normal          No                   Assumptions met
closed     AP Max abs dev            27   Normal          No                   Assumptions met
closed     AP MedAD                  27   Normal          No                   Assumptions met
closed     AP RMS                    27   Normal          No                   Assumptions met
closed     AP Range                  27   Normal          No                   Assumptions met
closed     AP STD                    27   Normal          No                   Assumptions met
closed     AP Variance               27   Normal          No                   Assumptions met
closed     Ellipse area              27   Normal          No                   Assumptions met
closed     ML MAD                    27   Normal          No      