In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pingouin as pg
from scipy.stats import kruskal

In [2]:
exp1 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Questionnaires/FRS_exp1.csv")
exp2 = pd.read_csv("/Volumes/TwoTeras/1_Experiment_2/Questionnaires/FRS_Exp2.csv")
control = pd.read_csv("/Volumes/TwoTeras/Resources/SpaRe_FRS_Scores_BothConditions.csv")

In [3]:
control.columns

Index(['Participant_ID', 'Index', 'Condition', 'FRS_2_bool', 'Gender', 'Age',
       'Field_of_study', 'FRS_1_01', 'FRS_1_02', 'FRS_1_03', 'FRS_1_04',
       'FRS_1_05', 'FRS_1_06', 'FRS_1_07', 'FRS_1_08', 'FRS_1_09', 'FRS_1_10',
       'FRS_1_11', 'FRS_1_12', 'FRS_1_13', 'FRS_1_14', 'FRS_1_15', 'FRS_1_16',
       'FRS_1_17', 'FRS_1_18', 'FRS_1_19', 'FRS_2_01', 'FRS_2_02', 'FRS_2_03',
       'FRS_2_04', 'FRS_2_05', 'FRS_2_06', 'FRS_2_07', 'FRS_2_08', 'FRS_2_09',
       'FRS_2_10', 'FRS_2_11', 'FRS_2_12', 'FRS_2_13', 'FRS_2_14', 'FRS_2_15',
       'FRS_2_16', 'FRS_2_17', 'FRS_2_18', 'FRS_2_19'],
      dtype='object')

In [4]:
exp1.columns

Index(['Zeitstempel', 'Please select your prefered languague', 'ID', 'Item1',
       'Item2', 'Item3', 'Item4', 'Item5', 'Item6', 'Item7', 'Item8', 'Item9',
       'Item10', 'Item11', 'Item12', 'Item13', 'Item14', 'Item15', 'Item16',
       'Item17', 'Item18', 'Item19', 'Alter ', 'Unnamed: 23'],
      dtype='object')

In [5]:
exp1["Experiment"] = 'One'
exp2["Experiment"] = 'Two'
control["Experiment"] = 'Control'

In [6]:
exp1[exp1['ID'].isnull()]

Unnamed: 0,Zeitstempel,Please select your prefered languague,ID,Item1,Item2,Item3,Item4,Item5,Item6,Item7,...,Item13,Item14,Item15,Item16,Item17,Item18,Item19,Alter,Unnamed: 23,Experiment
4,12.11.2021 11:13:08,German,,4.0,3,3,2,2,2,3,...,3,2,2,3,3,2,4,,,One
9,09.12.2021 14:36:36,English,,6.0,4,5,5,4,5,5,...,4,5,6,6,6,5,5,,,One


In [7]:
# Rename 'Participant_ID' to 'ID'
control.rename(columns={'Participant_ID': 'ID', 'Age': 'Alter '}, inplace=True)

# Define columns to keep

# Define columns to keep
columns_to_keep = ['ID', 'Alter ', 'Experiment'] + [f'FRS_1_{i:02d}' for i in range(1, 20)]

# Rename the 'FRS_1_01' to 'FRS_1_19' columns to 'Item1' to 'Item19'
rename_mapping = {f'FRS_1_{i:02d}': f'Item{i}' for i in range(1, 20)}

# Filter columns and rename them
control =  control[columns_to_keep].rename(columns=rename_mapping)

In [8]:
control.ID.unique()

array([1004, 1005, 1008, 1010, 1011, 1013, 1017, 1018, 1019, 1021, 1022,
       1023, 1027, 1029, 1031, 1034, 1035, 1036, 1041, 1043, 1044, 1045,
       1047, 1049, 1054, 1055, 1056, 1057, 1058, 1060, 1062, 1068, 1069,
       1072, 1073, 1074, 1075, 1077, 1079, 1080, 1082, 1083, 1086, 1136,
       1038, 1088, 1089, 1090, 1091, 1092, 1093, 1100, 1101])

In [9]:
Complete_Experiments = pd.concat([exp1, exp2, control])

In [10]:
Complete_Experiments

Unnamed: 0,Zeitstempel,Please select your prefered languague,ID,Item1,Item2,Item3,Item4,Item5,Item6,Item7,...,Item14,Item15,Item16,Item17,Item18,Item19,Alter,Unnamed: 23,Experiment,Sex
0,29.10.2021 16:40:20,English,365.0,7.0,7.0,6.0,6.0,5.0,2.0,7.0,...,6.0,6.0,7.0,3.0,3.0,5.0,37.0,weiblich,One,
1,01.11.2021 14:55:03,German,3572.0,6.0,4.0,2.0,5.0,6.0,1.0,3.0,...,6.0,5.0,4.0,1.0,5.0,3.0,23.0,männlich,One,
2,01.11.2021 15:40:16,German,7264.0,6.0,6.0,4.0,4.0,7.0,1.0,5.0,...,5.0,4.0,5.0,1.0,5.0,5.0,23.0,männlich,One,
3,02.11.2021 10:11:51,German,8469.0,5.0,2.0,2.0,2.0,6.0,1.0,4.0,...,4.0,3.0,3.0,1.0,3.0,3.0,29.0,weiblich,One,
4,12.11.2021 11:13:08,German,,4.0,3.0,3.0,2.0,2.0,2.0,3.0,...,2.0,2.0,3.0,3.0,2.0,4.0,,,One,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48,,,1091.0,7.0,6.0,6.0,7.0,4.0,1.0,2.0,...,6.0,6.0,6.0,6.0,6.0,5.0,,,Control,
49,,,1092.0,5.0,3.0,5.0,5.0,6.0,3.0,3.0,...,5.0,5.0,4.0,3.0,6.0,3.0,,,Control,
50,,,1093.0,6.0,3.0,5.0,4.0,6.0,2.0,4.0,...,4.0,5.0,4.0,4.0,5.0,3.0,,,Control,
51,,,1100.0,5.0,5.0,6.0,3.0,4.0,1.0,2.0,...,4.0,5.0,3.0,1.0,6.0,2.0,,,Control,


In [11]:
Global = Complete_Experiments[["Item1", "Item4", "Item5", "Item8", "Item10", "Item12", "Item13", "Item14", "Item15","Item18"]]
Complete_Experiments['Global'] = Complete_Experiments[["Item1", "Item4", "Item5", "Item8", "Item10", "Item12", "Item13", "Item14", "Item15","Item18"]].sum(axis=1)
pg.cronbach_alpha(data=Global, ci=.99)

(0.8938857854137204, array([0.85 , 0.928]))

In [12]:
Overview = Complete_Experiments[["Item2", "Item3", "Item7", "Item9", "Item11", "Item16", "Item19"]]
Complete_Experiments['Overview'] = Complete_Experiments[['Item2', 'Item3', 'Item7', 'Item9', 'Item11', 'Item16', 'Item19']].sum(axis=1)
pg.cronbach_alpha(data=Overview, ci=.99)

(0.8683323593444701, array([0.812, 0.912]))

In [13]:
Cardinal = Complete_Experiments[["Item6", "Item17"]]
Complete_Experiments['Cardinal'] = Complete_Experiments[["Item6", "Item17"]].sum(axis=1)
pg.cronbach_alpha(data=Cardinal, ci=.99)


(0.7466852756454987, array([0.585, 0.845]))

In [14]:
Complete_Experiments

Unnamed: 0,Zeitstempel,Please select your prefered languague,ID,Item1,Item2,Item3,Item4,Item5,Item6,Item7,...,Item17,Item18,Item19,Alter,Unnamed: 23,Experiment,Sex,Global,Overview,Cardinal
0,29.10.2021 16:40:20,English,365.0,7.0,7.0,6.0,6.0,5.0,2.0,7.0,...,3.0,3.0,5.0,37.0,weiblich,One,,56.0,44.0,5.0
1,01.11.2021 14:55:03,German,3572.0,6.0,4.0,2.0,5.0,6.0,1.0,3.0,...,1.0,5.0,3.0,23.0,männlich,One,,57.0,22.0,2.0
2,01.11.2021 15:40:16,German,7264.0,6.0,6.0,4.0,4.0,7.0,1.0,5.0,...,1.0,5.0,5.0,23.0,männlich,One,,47.0,35.0,2.0
3,02.11.2021 10:11:51,German,8469.0,5.0,2.0,2.0,2.0,6.0,1.0,4.0,...,1.0,3.0,3.0,29.0,weiblich,One,,42.0,20.0,2.0
4,12.11.2021 11:13:08,German,,4.0,3.0,3.0,2.0,2.0,2.0,3.0,...,3.0,2.0,4.0,,,One,,26.0,25.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48,,,1091.0,7.0,6.0,6.0,7.0,4.0,1.0,2.0,...,6.0,6.0,5.0,,,Control,,53.0,36.0,7.0
49,,,1092.0,5.0,3.0,5.0,5.0,6.0,3.0,3.0,...,3.0,6.0,3.0,,,Control,,54.0,26.0,6.0
50,,,1093.0,6.0,3.0,5.0,4.0,6.0,2.0,4.0,...,4.0,5.0,3.0,,,Control,,51.0,26.0,6.0
51,,,1100.0,5.0,5.0,6.0,3.0,4.0,1.0,2.0,...,1.0,6.0,2.0,,,Control,,45.0,21.0,2.0


In [15]:
nan_rows = Complete_Experiments[Complete_Experiments['ID'].isnull()]
Complete_Experiments_cleaned = Complete_Experiments.dropna(subset=['ID'])

In [16]:
duplicates = Complete_Experiments_cleaned[Complete_Experiments_cleaned['ID'].duplicated(keep=False)]
print(duplicates)

            Zeitstempel Please select your prefered languague      ID  Item1  \
3   14.11.2022 15:16:48                                German  1031.0    5.0   
19  06.03.2023 13:17:37                                German  5851.0    NaN   
20  06.03.2023 15:56:27                                German  5851.0    6.0   
14                  NaN                                   NaN  1031.0    4.0   

    Item2  Item3  Item4  Item5  Item6  Item7  ...  Item17  Item18  Item19  \
3     6.0    3.0    5.0    4.0    1.0    6.0  ...     1.0     6.0     6.0   
19    NaN    NaN    NaN    NaN    NaN    NaN  ...     NaN     NaN     NaN   
20    5.0    7.0    6.0    6.0    3.0    5.0  ...     2.0     5.0     6.0   
14    7.0    2.0    2.0    7.0    1.0    2.0  ...     1.0     3.0     2.0   

    Alter   Unnamed: 23  Experiment       Sex  Global  Overview  Cardinal  
3     20.0          NaN         Two  weiblich    56.0      35.0       2.0  
19     NaN          NaN         Two       NaN     0.0       0

In [17]:
Complete_Experiments_cleaned = Complete_Experiments_cleaned.drop(index=19)
print(Complete_Experiments_cleaned)

            Zeitstempel Please select your prefered languague      ID  Item1  \
0   29.10.2021 16:40:20                               English   365.0    7.0   
1   01.11.2021 14:55:03                                German  3572.0    6.0   
2   01.11.2021 15:40:16                                German  7264.0    6.0   
3   02.11.2021 10:11:51                                German  8469.0    5.0   
5   17.11.2021 11:12:24                                German  6642.0    5.0   
..                  ...                                   ...     ...    ...   
48                  NaN                                   NaN  1091.0    7.0   
49                  NaN                                   NaN  1092.0    5.0   
50                  NaN                                   NaN  1093.0    6.0   
51                  NaN                                   NaN  1100.0    5.0   
52                  NaN                                   NaN  1101.0    2.0   

    Item2  Item3  Item4  Item5  Item6  

In [25]:
# List of IDs to include
ids_to_include = [
    365, 1005, 1008, 1010, 1011, 1013, 1017, 1018, 1019, 1021, 1022, 1023, 1031, 1054, 1055, 
    1056, 1057, 1058, 1068, 1069, 1072, 1073, 1074, 1075, 1077, 1079, 1080, 1268, 1574, 1754, 
    1843, 2258, 2693, 3310, 4176, 4580, 4597, 4598, 4796, 4847, 4875, 4917, 5161, 5189, 5741, 
    5743, 5766, 5851, 5972, 6406, 6642, 7081, 7093, 7412, 7823, 7842, 7935, 8007, 8469, 8629, 
    8673, 9297, 9472, 9502, 9586, 9601, 9627
]

# Filter the DataFrame
filtered_df = Complete_Experiments_cleaned[Complete_Experiments_cleaned['ID'].isin(ids_to_include)]

In [26]:
filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67 entries, 0 to 39
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   ID          67 non-null     float64
 1   Experiment  67 non-null     object 
 2   Global      67 non-null     float64
 3   Cardinal    67 non-null     float64
 4   Overview    67 non-null     float64
dtypes: float64(4), object(1)
memory usage: 3.1+ KB


In [27]:
# Assuming Complete_Experiments_cleaned is already loaded
df = filtered_df



# Extract the data for each subscale and perform the Kruskal-Wallis H Test
global_data = [df[df['Experiment'] == exp]['Global'].values for exp in df['Experiment'].unique()]
cardinal_data = [df[df['Experiment'] == exp]['Cardinal'].values for exp in df['Experiment'].unique()]
overview_data = [df[df['Experiment'] == exp]['Overview'].values for exp in df['Experiment'].unique()]

# Perform the Kruskal-Wallis H Test for each subscale
stat_global, p_global = kruskal(*global_data)
stat_cardinal, p_cardinal = kruskal(*cardinal_data)
stat_overview, p_overview = kruskal(*overview_data)

print(f'Kruskal-Wallis test for Global: statistic={stat_global}, p-value={p_global}')
print(f'Kruskal-Wallis test for Cardinal: statistic={stat_cardinal}, p-value={p_cardinal}')
print(f'Kruskal-Wallis test for Overview: statistic={stat_overview}, p-value={p_overview}')


Kruskal-Wallis test for Global: statistic=1.686026874770786, p-value=0.4304115489701872
Kruskal-Wallis test for Cardinal: statistic=0.13497468866189613, p-value=0.9347395502955694
Kruskal-Wallis test for Overview: statistic=0.9806060787962264, p-value=0.612440772377435


In [21]:
len(global_data[2])

26

In [22]:
# Keep only relevant columns: ID and the three conditions
relevant_columns = ['ID', 'Experiment', 'Global', 'Cardinal', 'Overview']
Complete_Experiments_cleaned = Complete_Experiments_cleaned[relevant_columns]

# Pivot the data to have one row per participant and columns for each condition
df_pivot = Complete_Experiments_cleaned.pivot(index='ID', columns='Experiment', values=['Global', 'Cardinal', 'Overview'])

# Define the Friedman test function
def perform_friedman_test(df, variables):
    results = {}
    for var in variables:
        data = [df[var]['One'], df[var]['Two'], df[var]['Control']]
        stat, p_value = friedmanchisquare(*data)
        results[var] = {'statistic': stat, 'p-value': p_value}
    return results

# Define the post-hoc test function
def perform_posthoc_tests(df, variables):
    pairwise_results = {}
    conditions = df.columns.levels[1]
    for var in variables:
        p_values = []
        comparisons = []
        for i, cond1 in enumerate(conditions):
            for cond2 in conditions[i + 1:]:
                stat, p_val = wilcoxon(df[var][cond1], df[var][cond2])
                p_values.append(p_val)
                comparisons.append(f'{cond1} vs {cond2}')
        
        # Adjust p-values for multiple comparisons
        adjusted_p_values = multipletests(p_values, method='bonferroni')[1]
        
        pairwise_results[var] = {comparisons[i]: {'p-value': p_values[i], 'adjusted p-value': adjusted_p_values[i]} for i in range(len(comparisons))}
    
    return pairwise_results

# List of variables to test
variables_to_test = ['Global', 'Cardinal', 'Overview']

# Perform the Friedman test for each variable
friedman_results = perform_friedman_test(df_pivot, variables_to_test)

# Perform post-hoc tests if Friedman test is significant
if any(result['p-value'] < 0.05 for result in friedman_results.values()):
    posthoc_results = perform_posthoc_tests(df_pivot, variables_to_test)
else:
    posthoc_results = {}

#import ace_tools as tools; tools.display_dataframe_to_user(name="Pivoted DataFrame", dataframe=df_pivot)

friedman_results, posthoc_results

NameError: name 'friedmanchisquare' is not defined

In [None]:
import pandas as pd
from scipy.stats import friedmanchisquare

# Sample DataFrame
data = {
    'ID': [365.0, 3572.0, 7264.0, 8469.0, 6642.0, 1091.0, 1092.0, 1093.0, 1100.0, 1101.0],
    'Experiment': ['One', 'One', 'One', 'One', 'One', 'Control', 'Control', 'Control', 'Control', 'Control'],
    'Global': [56.0, 57.0, 47.0, 42.0, 49.0, 53.0, 54.0, 51.0, 45.0, 37.0],
    'Cardinal': [5.0, 2.0, 2.0, 2.0, 8.0, 7.0, 6.0, 6.0, 2.0, 2.0],
    'Overview': [44.0, 22.0, 35.0, 20.0, 33.0, 36.0, 26.0, 26.0, 21.0, 31.0]
}

df = pd.DataFrame(data)

# Ensure data is sorted by Experiment and ID (if needed)
df = df.sort_values(by=['Experiment', 'ID'])

# Perform the Friedman test
stat, p = friedmanchisquare(df['Global'], df['Cardinal'], df['Overview'])

print(f'Friedman test statistic: {stat}')
print(f'p-value: {p}')


In [None]:
exp1 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Questionnaires/Results/FRSsubs_exp1.csv")
exp2 = pd.read_csv("/Volumes/TwoTeras/1_Experiment_2/Questionnaires/Results/FRSsubs.csv")
plt.style.use('default')
fig, axes = plt.subplots(1, 2, sharey=True,  figsize=(8, 6))
fig.suptitle('FRS questionnaire subscales at baseline \n', fontsize=18)
fig.tight_layout()

axes[0].boxplot(x=exp1[["Global", "Overview", "Cardinal"]])
axes[0].set_ylabel("Individual subscale aggregate")
axes[0].set_title('Experiment 1', fontsize=16)
axes[0].set_xticks([1, 2, 3], ["Global", "Survey", "Cardinal"])

axes[1].boxplot(x=exp2[["Global", "Overview", "Cardinal"]])
axes[1].set_title('Experiment 2', fontsize=16)
axes[1].set_xticks([1, 2, 3], ["Global", "Survey", "Cardinal"], fontsize=16)

for n, subplot in np.ndenumerate(axes):
    subplot.grid(True, color = "grey", linewidth = "0.3", linestyle = "-.")
    subplot.tick_params(axis='both', labelsize=16)
    subplot.yaxis.get_label().set_fontsize(16)
    #subplot.yaxis.set_major_locator(MaxNLocator(integer=True))
    
plt.savefig("/Users/tracysanchezpacheco/Desktop/FRS.png",  bbox_inches="tight",
            pad_inches=0.3, transparent=True)

In [None]:
import scipy.stats as stats
stats.mannwhitneyu(x=exp1['Global'], y=exp2['Global'], alternative = 'greater')

In [None]:
stats.mannwhitneyu(x=exp1['Overview'], y=exp2['Overview'], alternative = 'greater')

In [None]:
stats.mannwhitneyu(x=exp1['Cardinal'], y=exp2['Cardinal'], alternative = 'greater')