In [15]:
import pandas as pd
import numpy as np
import pingouin as pg
from scipy import stats

In [16]:
data = pd.read_csv('sample_ssq_data.csv')

In [None]:
nausea_symptoms = ['general_discomfort', 
                   'salivation_increased', 
                   'sweating', 
                   'nausea', 
                   'difficulty_concentrating', 
                   'stomach_awareness', 
                   'burping']
oculomotor_symptoms = ['general_discomfort', 
                       'fatigue', 
                       'headache', 
                       'eye_strain', 
                       'difficulty_focusing', 
                       'difficulty_concentrating',
                       'blurred_vision']
disorientation_symptoms = ['difficulty_focusing', 
                           'nausea', 
                           'fullness_of_head', 
                           'blurred_vision', 
                           'dizziness_eyes_open', 
                           'dizziness_eyes_closed', 
                           'vertigo']

In [18]:
data['N_sum'] = data[nausea_symptoms].sum(axis=1)
data['O_sum'] = data[oculomotor_symptoms].sum(axis=1)
data['D_sum'] = data[disorientation_symptoms].sum(axis=1)

In [19]:
data['nausea_score'] = data['N_sum'] * 9.54
data['oculomotor_score'] = data['O_sum'] * 7.58
data['disorientation_score'] = data['D_sum'] * 13.92
data['total_score'] = (data['N_sum'] + data['O_sum'] + data['D_sum']) * 3.74

In [28]:
# This cell ensures that only participants with data for all 4 methods are included in the analysis
participant_counts = data.groupby('participant_id')['method'].nunique()
complete_participants = participant_counts[participant_counts == 4].index
data_complete = data[data['participant_id'].isin(complete_participants)]
data_complete.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 25 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   participant_id            40 non-null     int64  
 1   method                    40 non-null     int64  
 2   general_discomfort        40 non-null     int64  
 3   fatigue                   40 non-null     int64  
 4   headache                  40 non-null     int64  
 5   eye_strain                40 non-null     int64  
 6   difficulty_focusing       40 non-null     int64  
 7   salivation_increased      40 non-null     int64  
 8   sweating                  40 non-null     int64  
 9   nausea                    40 non-null     int64  
 10  difficulty_concentrating  40 non-null     int64  
 11  fullness_of_head          40 non-null     int64  
 12  blurred_vision            40 non-null     int64  
 13  dizziness_eyes_open       40 non-null     int64  
 14  dizziness_ey

In [None]:
scores = ['nausea_score', 'oculomotor_score', 'disorientation_score', 'total_score']

for score in scores:
    print(f"\nAnalyzing {score}")
    
    # Check normality for each method using Shapiro-Wilk test
    normality_passed = True
    for method in data_complete['method'].unique():
        data_method = data_complete[data_complete['method'] == method][score]
        if data_method.nunique() == 1:  # Check if all values are the same
            print(f"Method {method} has constant values, assuming normality.")
        else:
            normality = pg.normality(data_method)
            if not normality['normal'].iloc[0]:
                normality_passed = False
                print(f"Normality test for method {method}: p = {normality['pval'].iloc[0]:.4f}, not normal")
            else:
                print(f"Normality test for method {method}: p = {normality['pval'].iloc[0]:.4f}, normal")
    
    if normality_passed:
        print(f"All methods are normally distributed for {score}")
        # Perform one-way repeated-measures ANOVA
        aov = pg.rm_anova(data=data_complete, dv=score, within='method', 
                         subject='participant_id', detailed=True)
        print("\nANOVA Results:")
        print(aov)
        
        # Check sphericity (Mauchly’s test)
        if aov['sphericity'][0]:
            p_value = aov['p-unc'][0]
            print(f"Sphericity holds, p-unc = {p_value:.4f}")
        else:
            p_value = aov['p-GG-corr'][0]
            print(f"Sphericity violated, using Greenhouse-Geisser correction, p-GG-corr = {p_value:.4f}")
        
        # Check significance and report effect size
        if p_value < 0.05:
            print(f"Significant difference found in ANOVA for {score} (Partial η² = {aov['np2'][0]:.4f})")
            # Post-hoc pairwise comparisons with Bonferroni correction
            posthoc = pg.pairwise_tests(data=data_complete, dv=score, within='method', 
                                       subject='participant_id', padjust='bonf', parametric=True)
            print("\nPost-hoc Pairwise Comparisons:")
            print(posthoc)
            
            # Report significant pairs
            significant_pairs = posthoc[posthoc['p-corr'] < 0.05]
            if not significant_pairs.empty:
                print("\nSignificant pairwise differences:")
                for _, row in significant_pairs.iterrows():
                    print(f"Method {row['A']} vs Method {row['B']}: p = {row['p-corr']:.4f}")
            else:
                print("\nNo significant pairwise differences after Bonferroni correction.")
        else:
            print(f"No significant difference found in ANOVA for {score} (Partial η² = {aov['np2'][0]:.4f})")
    else:
        print(f"Normality not satisfied for all methods in {score}, switching to Friedman test")
        # Perform nonparametric Friedman test
        friedman = pg.friedman(data=data_complete, dv=score, within='method', 
                              subject='participant_id')
        print("\nFriedman Test Results:")
        print(friedman)
        
        if friedman['p-unc'].iloc[0] < 0.05:
            print(f"Significant difference found in Friedman test for {score}")
            # Post-hoc pairwise Wilcoxon tests with Bonferroni correction
            posthoc = pg.pairwise_tests(data=data_complete, dv=score, within='method', 
                                       subject='participant_id', padjust='bonf', parametric=False)
            print("\nPost-hoc Pairwise Comparisons:")
            print(posthoc)
            
            # Report significant pairs
            significant_pairs = posthoc[posthoc['p-corr'] < 0.05]
            if not significant_pairs.empty:
                print("\nSignificant pairwise differences:")
                for _, row in significant_pairs.iterrows():
                    print(f"Method {row['A']} vs Method {row['B']}: p = {row['p-corr']:.4f}")
            else:
                print("\nNo significant pairwise differences after Bonferroni correction.")
        else:
            print(f"No significant difference found in Friedman test for {score}")
    
    # Calculate and print means ± SD for each method
    means_sd = data_complete.groupby('method')[score].agg(['mean', 'std'])
    print(f"\nMeans ± SD for {score} across methods:")
    for method in means_sd.index:
        mean = means_sd.loc[method, 'mean']
        sd = means_sd.loc[method, 'std']
        print(f"Method {method}: {mean:.2f} ± {sd:.2f}")


Analyzing nausea_score
Normality test for method 0: p = 0.0081, not normal
Normality test for method 1: p = 0.0199, not normal
Normality test for method 2: p = 0.0000, not normal
Normality test for method 3: p = 0.2078, normal
Normality not satisfied for all methods in nausea_score, switching to Friedman test

Friedman Test Results:
          Source         W  ddof1          Q     p-unc
Friedman  method  0.890588      3  26.717647  0.000007
Significant difference found in Friedman test for nausea_score

Post-hoc Pairwise Comparisons:
  Contrast  A  B  Paired  Parametric  W-val alternative     p-unc    p-corr  \
0   method  0  1    True       False    0.0   two-sided  0.001953  0.011719   
1   method  0  2    True       False    0.0   two-sided  0.062500  0.375000   
2   method  0  3    True       False    0.0   two-sided  0.125000  0.750000   
3   method  1  2    True       False    0.0   two-sided  0.001953  0.011719   
4   method  1  3    True       False    0.0   two-sided  0.00195