In [7]:
import numpy as np
import pandas as pd

pd.set_option('future.no_silent_downcasting', True)

In [8]:
EXERCISE = 4


def load_data(catched: bool):
    df = pd.read_csv(f'./data/Exercise_{EXERCISE}_valid_catched_{catched}.csv')
    df.ID = df.ID.astype(int)
    df.TrialInfo = df.TrialInfo.astype(int)
    df.Distance = df.Distance.astype(float)
    df.Catch = df.Catch.replace({True: 1, False: 0}).astype(int)
    df.Group = df.Group.replace({"RL 7e": "RL", "RL 7f": "RL", "kontrollgruppe": "Kontrollgruppe"})

    # Load gender data: ID, gender
    df_gender = pd.read_csv("data/gender.csv")
    # add gender to the df
    df = pd.merge(df, df_gender, on="ID", how="left")

    df_mean = df.groupby(["ID", "TrialInfo"]).agg(
        Mean_Distance=("Distance", "mean"),
        Mean_Catch=("Catch", "mean"),
        Group=("Group", "first"),
        Gender=("Gender", "first"),
    ).reset_index()

    #sort by ID and TrialInfo
    df_mean = df_mean.sort_values(by=["ID", "TrialInfo", "Gender"])
    # Only keep TrialInfo 1 and 4
    df_filtered = df_mean[df_mean['TrialInfo'].isin([1, 4])]
    return df_filtered


In [9]:
import json
from scipy import stats

results = {}

objectives = ["Mean_Distance", "Mean_Catch"]
for objective in objectives:

    if objective == "Mean_Distance":
        df_filtered = load_data(catched=True)
    else:
        df_filtered = load_data(catched=False)

    # Pivot to wide format: one row = one student
    df_pivot = df_filtered.pivot_table(index=['ID', 'Group', "Gender"],
                                       columns='TrialInfo',
                                       values=objective).reset_index()

    # Rename columns. for easier handling
    df_pivot = df_pivot.rename(columns={1: 'Week1', 4: 'Week4'})

    genders = df_pivot["Gender"].unique()
    groups = df_pivot["Group"].unique()

    for group in groups:
        # Subset the group
        group_data = df_pivot[df_pivot['Group'] == group]

        week1 = group_data['Week1']
        week4 = group_data['Week4']

        # Check normality of the differences
        stat, p_normality = stats.shapiro(week1 - week4)

        if p_normality > 0.05:
            # Differences are normal --> Paired t-test
            t_stat, p_value = stats.ttest_rel(week1, week4)
            test_used = 'Paired t-test'
        else:
            # Differences are not normal --> Wilcoxon signed-rank test
            w_stat, p_value = stats.wilcoxon(week1, week4)
            test_used = 'Wilcoxon signed-rank'

        # Determine if significant
        significant = bool(p_value < 0.05)

        # Save results
        print(group)
        results[f"{group}-{objective}"] = {
            'test': test_used,
            'p_value': float(p_value),
            'significant': significant,
            "p-normality": float(p_normality),
            "gender": "gesamt",
            "size": len(week1),
            "objective": objective
        }

        for gender in genders:
            gender_data = group_data[group_data["Gender"] == gender]
            week1 = gender_data['Week1']
            week4 = gender_data['Week4']

            print(f"{group}-{gender}-{len(week1)}")

            stat, p_normality = stats.shapiro(week1 - week4)

            if p_normality > 0.05:
                t_stat, p_value = stats.ttest_rel(week1, week4)
                test_used = 'Paired t-test'
            else:
                w_stat, p_value = stats.wilcoxon(week1, week4)
                test_used = 'Wilcoxon signed-rank'

            significant = bool(p_value < 0.05)

            results[f"{group}-{gender}-{objective}"] = {
                'test': test_used,
                'p_value': float(p_value),
                'significant': significant,
                "p-normality": float(p_normality),
                "gender": gender,
                "size": len(week1),
                "objective": objective
            }

# Save results to JSON
with open(f'./results/significancy_{EXERCISE}.json', 'w') as f:
    json.dump(results, f, indent=4)


VR
VR-m-2
VR-w-0
RL
RL-m-17
RL-w-11
Kontrollgruppe
Kontrollgruppe-m-6
Kontrollgruppe-w-0
VR
VR-w-13


  stat, p_normality = stats.shapiro(week1 - week4)
  stat, p_normality = stats.shapiro(week1 - week4)
  return fun(*args, **kwargs)


VR-m-16
RL
RL-w-22
RL-m-25
Kontrollgruppe
Kontrollgruppe-w-4
Kontrollgruppe-m-7


# Step 3: Across all groups

In [10]:
# Pivot to wide format: one row = one student
df_pivot = df_mean.pivot_table(index=['ID', 'Group'],
                               columns='TrialInfo',
                               values='Mean_Distance').reset_index()

# Rename columns for easier handling
df_pivot = df_pivot.rename(columns={1: "Week1", 2: "Week2", 3: "Week3", 4: 'Week4'})

print(df_pivot)

TrialInfo   ID           Group     Week1     Week2     Week3     Week4
0            8              VR  0.053097  0.055863  0.036504  0.309735
1           16              VR  0.088496  0.172566  0.164159  0.088274
2           61              RL  0.136816  0.000000  0.074627  0.093284
3           62              RL  0.059701  0.039801  0.039801  0.013930
4           63              RL  0.000000  0.116086  0.215589  0.075871
5           64              RL  0.124378  0.248756  0.149254  0.005970
6           66              RL  0.031509  0.009950  0.148259  0.055721
7           69              RL  0.331675  0.132670  0.268657  0.058043
8           70              RL  0.569154  0.131841  0.023632  0.009950
9           72              RL  0.290216  0.149254  0.149254  0.072968
10          74              RL  0.000000  0.009950  0.014925  0.000000
11          77              RL  0.016169  0.012438  0.004975  0.001990
12          78              RL  0.000995  0.000000  0.000000  0.000000
13    

In [11]:
from sklearn.linear_model import LinearRegression

# Define X once, same for all
X = np.array([1, 2, 3, 4]).reshape(-1, 1)


# Function to calculate slope for one student
def calculate_slope(row):
    y = np.array([row['Week1'], row['Week2'], row['Week3'], row['Week4']])
    model = LinearRegression().fit(X, y)
    return model.coef_[0]  # coef_ returns an array, take first element (slope)


# Apply to each row
df_pivot['Slope'] = df_pivot.apply(calculate_slope, axis=1)

# Add metrics
df_pivot['Improvement_%'] = (df_pivot['Week4'] - df_pivot['Week1']) / df_pivot['Week1'] * 100


ModuleNotFoundError: No module named 'sklearn'

In [18]:
import scipy.stats as stats

# Define all pairs you want to compare
group_pairs = [('VR', 'Kontrollgruppe'), ('RL', 'Kontrollgruppe'), ('VR', 'RL')]

# Metrics to analyze
metrics = ['Improvement_%', 'Slope']

results = {}

for metric in metrics:
    print(f"\n\n--- Analyzing {metric} ---")
    results[metric] = {}

    for group1, group2 in group_pairs:
        data1 = df_pivot[df_pivot['Group'] == group1][metric]
        data2 = df_pivot[df_pivot['Group'] == group2][metric]

        # Check normality for both groups
        p_norm1 = stats.shapiro(data1)[1]
        p_norm2 = stats.shapiro(data2)[1]

        normal = (p_norm1 > 0.05) and (p_norm2 > 0.05)

        if normal:
            # Use independent t-test
            stat, p_value = stats.ttest_ind(data1, data2)
            test_used = 'Independent t-test'
        else:
            # Use Mann-Whitney U test
            stat, p_value = stats.mannwhitneyu(data1, data2, alternative='two-sided')
            test_used = 'Mann-Whitney U'

        # Save results
        results[metric][(group1, group2)] = {
            'test': test_used,
            'p_value': p_value
        }

        # Print results
        print(f"\n{group1} vs {group2}")
        print(f"Test used: {test_used}")
        print(f"p-value: {p_value:.4f}")
        if p_value < 0.05:
            print("✅ Significant difference!")
        else:
            print("❌ No significant difference.")




--- Analyzing Improvement_% ---

VR vs Kontrollgruppe
Test used: Mann-Whitney U
p-value: 0.2857
❌ No significant difference.

RL vs Kontrollgruppe
Test used: Mann-Whitney U
p-value: nan
❌ No significant difference.

VR vs RL
Test used: Mann-Whitney U
p-value: nan
❌ No significant difference.


--- Analyzing Slope ---

VR vs Kontrollgruppe
Test used: Mann-Whitney U
p-value: 0.4286
❌ No significant difference.

RL vs Kontrollgruppe
Test used: Independent t-test
p-value: 0.0400
✅ Significant difference!

VR vs RL
Test used: Mann-Whitney U
p-value: 0.0736
❌ No significant difference.


  p_norm1 = stats.shapiro(data1)[1]


# Table

In [None]:
import pandas as pd
import numpy as np

# Metrics you want to report
metrics = ['Improvement_%', 'Slope']

# Create empty list to collect rows
summary_data = []

# Loop through each group
for group in df_pivot['Group'].unique():
    row = {'Gruppe': group}

    for metric in metrics:
        group_data = df_pivot[df_pivot['Group'] == group][metric]

        # Remove inf, -inf, and NaN before calculation
        group_data_clean = group_data.replace([np.inf, -np.inf], np.nan).dropna()

        mean = group_data_clean.mean()
        std = group_data_clean.std()

        # Format "mean ± std"
        row[metric] = f"{mean:.2f} ± {std:.2f}"

    # Add number of participants (after cleaning inf values if you want)
    n_participants = df_pivot[df_pivot['Group'] == group]['ID'].nunique()
    row['Anzahl der Teilnehmenden'] = n_participants

    summary_data.append(row)

# Create DataFrame
summary_df = pd.DataFrame(summary_data)

# Reorder columns nicely
summary_df = summary_df[['Gruppe', 'Improvement_%', 'Slope', 'Anzahl der Teilnehmenden']]

# Rename columns for nice output
summary_df.columns = ['Gruppe', 'Metrik 1 (Improvement %)', 'Metrik 2 (Slope)', 'Anzahl der Teilnehmenden']

# Show final table
print(summary_df)

# save to ./results as csv
summary_df.to_csv(f'./results/summary_table_{EXERCISE}.csv', index=False)
df_pivot.to_csv(f'./results/df_metrics_{EXERCISE}.csv', index=False)
