In [1]:
import pandas as pd

pd.set_option('future.no_silent_downcasting', True)

In [2]:
EXERCISE = 3


def load_data(catched: bool):
    df = pd.read_csv(f'./data/Exercise_{EXERCISE}_valid_catched_{catched}.csv')
    df.ID = df.ID.astype(int)
    df.TrialInfo = df.TrialInfo.astype(int)
    df.Degree = df.Degree.astype(float)
    df.Catch = df.Catch.replace({True: 1, False: 0}).astype(int)
    df.Group = df.Group.replace({"RL 7e": "RL", "RL 7f": "RL", "kontrollgruppe": "Kontrollgruppe"})

    # Load gender data: ID, gender
    df_gender = pd.read_csv("data/gender.csv")
    # add gender to the df
    df = pd.merge(df, df_gender, on="ID", how="left")

    df_mean = df.groupby(["ID", "TrialInfo"]).agg(
        Mean_Degree=("Degree", "mean"),
        Mean_Catch=("Catch", "mean"),
        Group=("Group", "first"),
        Gender=("Gender", "first"),
    ).reset_index()

    #sort by ID and TrialInfo
    df_mean = df_mean.sort_values(by=["ID", "TrialInfo", "Gender"])
    # Only keep TrialInfo 1 and 4
    df_filtered = df_mean[df_mean['TrialInfo'].isin([1, 4])]
    return df_filtered

In [3]:
import json
from scipy import stats

results = {}

objectives = ["Mean_Degree", "Mean_Catch"]
for objective in objectives:

    if objective == "Mean_Degree":
        df_filtered = load_data(catched=True)
    else:
        df_filtered = load_data(catched=False)

    # Pivot to wide format: one row = one student
    df_pivot = df_filtered.pivot_table(index=['ID', 'Group', "Gender"],
                                       columns='TrialInfo',
                                       values=objective).reset_index()

    # Rename columns. for easier handling
    df_pivot = df_pivot.rename(columns={1: 'Week1', 4: 'Week4'})

    genders = df_pivot["Gender"].unique()
    groups = df_pivot["Group"].unique()

    for group in groups:
        # Subset the group
        group_data = df_pivot[df_pivot['Group'] == group]

        week1 = group_data['Week1']
        week4 = group_data['Week4']

        # Check normality of the differences
        stat, p_normality = stats.shapiro(week1 - week4)

        if p_normality > 0.05:
            # Differences are normal --> Paired t-test
            t_stat, p_value = stats.ttest_rel(week1, week4)
            test_used = 'Paired t-test'
        else:
            # Differences are not normal --> Wilcoxon signed-rank test
            w_stat, p_value = stats.wilcoxon(week1, week4)
            test_used = 'Wilcoxon signed-rank'

        # Determine if significant
        significant = bool(p_value < 0.05)

        # Save results
        print(group)
        results[f"{group}-{objective}"] = {
            'test': test_used,
            'p_value': float(p_value),
            'significant': significant,
            "p-normality": float(p_normality),
            "gender": "gesamt",
            "size": len(week1),
            "objective": objective
        }

        for gender in genders:
            gender_data = group_data[group_data["Gender"] == gender]
            week1 = gender_data['Week1']
            week4 = gender_data['Week4']

            print(f"{group}-{gender}-{len(week1)}")

            stat, p_normality = stats.shapiro(week1 - week4)

            if p_normality > 0.05:
                t_stat, p_value = stats.ttest_rel(week1, week4)
                test_used = 'Paired t-test'
            else:
                w_stat, p_value = stats.wilcoxon(week1, week4)
                test_used = 'Wilcoxon signed-rank'

            significant = bool(p_value < 0.05)

            results[f"{group}-{gender}-{objective}"] = {
                'test': test_used,
                'p_value': float(p_value),
                'significant': significant,
                "p-normality": float(p_normality),
                "gender": gender,
                "size": len(week1),
                "objective": objective
            }

# Save results to JSON
with open(f'./results/significancy_{EXERCISE}.json', 'w') as f:
    json.dump(results, f, indent=4)


VR
VR-w-10
VR-m-16
RL
RL-w-21
RL-m-25
Kontrollgruppe
Kontrollgruppe-w-2
Kontrollgruppe-m-6
VR
VR-w-14
VR-m-16
RL
RL-w-22
RL-m-25
Kontrollgruppe
Kontrollgruppe-w-4
Kontrollgruppe-m-7


  stat, p_normality = stats.shapiro(week1 - week4)
