In [51]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from utils import (
    amputees,
    channel_names,
    intact,
    participants,
    recordings,
    targets,
    test_recordings,
)

pd.set_option('display.width', 200)          # Total width of the display
pd.set_option('display.max_columns', None)   # Show all columns
pd.set_option('display.max_colwidth', None)  # Don't truncate column contents
pd.set_option('display.max_rows', None)  # Don't truncate column contents

In [4]:
movements = recordings + test_recordings
data_folder = Path('/home/haptix/haptix/biomech_PCP/paper_utils/paper_data/trajectories')

ptcID = {
    'P_149': 'P1',
    'P_238': 'P2',
    'P_407': 'P3',
    'P_426': 'P4',
    'P_577': 'P5',
    'P_668': 'P6',
    'P_711': 'P7',
    'P_950': 'P8',
    'P7_453': 'A1',
    'P6_820': 'A2',
}

In [52]:
# now we will read the data from each participant and movement and calculate correlations and MSE by movement
online_participants = [part for part in participants if 'P6' not in part]
for perturb in [True, False]:
    for condition in ['before', 'after']:
        df_mse = pd.DataFrame(index=online_participants, columns=movements); df_mse.loc[:, 'Participant'] = df_mse.index
        df_corr = pd.DataFrame(index=online_participants, columns=targets); df_corr.loc[:, 'Participant'] = df_corr.index
        for participant in online_participants:
            gt_cat = []
            pred_cat = []
            for movement in movements:
                gt = np.load(data_folder / participant / movement / 'GT/pred.npy'); gt_cat.append(gt)
                pred = np.load(data_folder / participant / movement / f'perturb_{str(perturb)}/{condition}_online/pred.npy'); pred_cat.append(pred)

                mse = np.mean((gt - pred) ** 2, axis=0)
                df_mse.loc[participant, movement] = np.mean(mse)
            
            gt_cat = np.concatenate(gt_cat, axis=0)
            pred_cat = np.concatenate(pred_cat, axis=0)

            for i, target in enumerate(targets):
                corr = np.corrcoef(gt_cat[:, i], pred_cat[:, i])[0, 1]
                df_corr.loc[participant, target] = corr

        # Save the results to CSV files
        save_name = ('non_' if not perturb else '') + 'perturbed_' + condition + 'Online'
        df_mse.to_csv(f'MSE_{save_name}.csv')
        df_corr.to_csv(f'corr_{save_name}.csv')
        # print(f'Participant {participant} - Perturbation: {perturb}, Condition: {condition} - MSE and Correlation calculated.')
        # print(df_mse)
        # print(df_corr)

In [26]:
import pandas as pd
import numpy as np
from pathlib import Path

mse_rows = []
corr_rows = []

# online_participants = [part for part in participants if 'P6' not in part]
online_participants = participants

for perturb in [True, False]:
    for condition in ['before', 'after']:
        for participant in online_participants:
            if 'P6_' in participant and condition == 'after':
                continue
            gt_cat = []
            pred_cat = []
            for movement in movements:
                gt = np.load(data_folder / participant / movement / 'GT/pred.npy')
                pred = np.load(data_folder / participant / movement / f'perturb_{str(perturb)}/{condition}_online/pred.npy')

                gt_cat.append(gt)
                pred_cat.append(pred)

                mse = np.mean((gt - pred) ** 2, axis=0)
                mse_row = {
                    'Participant': participant,
                    'Perturb': perturb,
                    'Condition': condition,
                    'Movement': movement,
                    'MSE': np.mean(mse)
                }
                mse_rows.append(mse_row)

            # Concatenate across all movements for correlation
            gt_cat = np.concatenate(gt_cat, axis=0)
            pred_cat = np.concatenate(pred_cat, axis=0)

            for i, target in enumerate(targets):
                corr = np.corrcoef(gt_cat[:, i], pred_cat[:, i])[0, 1]
                corr_row = {
                    'Participant': participant,
                    'Perturb': perturb,
                    'Condition': condition,
                    'Target': target,
                    'Correlation': corr
                }
                corr_rows.append(corr_row)

# Create final long-form dataframes
df_mse = pd.DataFrame(mse_rows)
df_corr = pd.DataFrame(corr_rows)

# Optional: save
df_mse.to_csv('all_mse_results.csv', index=False)
df_corr.to_csv('all_correlation_results.csv', index=False)


In [27]:
import pandas as pd
import numpy as np
from pathlib import Path

corr_rows = []
corr_rows_known = []
corr_rows_new = []

# online_participants = [part for part in participants if 'P6' not in part]
online_participants = participants # i guess P6 was in here?


for perturb in [True, False]:
    for condition in ['before', 'after']:
        for participant in online_participants:
            if 'P6_' in participant and condition == 'after':
                print(f'Skipping participant {participant} for perturbation {perturb} and condition {condition}')
                continue
            gt_cat_new = []
            pred_cat_new = []
            gt_cat_known = []
            pred_cat_known = []
            gt_cat = []
            pred_cat = []

            for movement in movements:
                gt = np.load(data_folder / participant / movement / 'GT/pred.npy')
                pred = np.load(data_folder / participant / movement / f'perturb_{str(perturb)}/{condition}_online/pred.npy')

                gt_cat.append(gt)
                pred_cat.append(pred)
                if movement in recordings:
                    gt_cat_known.append(gt)
                    pred_cat_known.append(pred)
                elif movement in test_recordings:
                    gt_cat_new.append(gt)
                    pred_cat_new.append(pred)

            # Concatenate across all movements for correlation
            gt_cat = np.concatenate(gt_cat, axis=0)
            pred_cat = np.concatenate(pred_cat, axis=0)
            gt_cat_known = np.concatenate(gt_cat_known, axis=0)
            pred_cat_known = np.concatenate(pred_cat_known, axis=0)
            gt_cat_new = np.concatenate(gt_cat_new, axis=0)
            pred_cat_new = np.concatenate(pred_cat_new, axis=0)

            for i, target in enumerate(targets):
                corr = np.corrcoef(gt_cat[:, i], pred_cat[:, i])[0, 1]
                corr_known = np.corrcoef(gt_cat_known[:, i], pred_cat_known[:, i])[0, 1]
                corr_new = np.corrcoef(gt_cat_new[:, i], pred_cat_new[:, i])[0, 1]
                corr_row = {
                    'Participant': participant,
                    'Perturb': perturb,
                    'Condition': condition,
                    'Target': target,
                    'Correlation': corr
                }
                corr_rows.append(corr_row)
                corr_row_known = {
                    'Participant': participant,
                    'Perturb': perturb,
                    'Condition': condition,
                    'Target': target,
                    'Correlation': corr_known
                }
                corr_rows_known.append(corr_row_known)
                corr_row_new = {
                    'Participant': participant,
                    'Perturb': perturb,
                    'Condition': condition,
                    'Target': target,
                    'Correlation': corr_new
                }
                corr_rows_new.append(corr_row_new)


# Create final long-form dataframes
df_corr = pd.DataFrame(corr_rows)
df_corr_known = pd.DataFrame(corr_rows_known)
df_corr_new = pd.DataFrame(corr_rows_new)

# Optional: save
df_corr.to_csv('all_correlation_results.csv', index=False)
df_corr_known.to_csv('all_correlation_results_known.csv', index=False)
df_corr_new.to_csv('all_correlation_results_new.csv', index=False)


Skipping participant P6_820 for perturbation True and condition after
Skipping participant P6_820 for perturbation False and condition after


In [57]:
df = pd.read_csv('all_mse_results.csv')
# print(df)

# now use some groupby wizardry to generate a table for each of the conditions:
# first, we want the non perturbed before results
df_non_perturbed_before = df[(df['Perturb'] == False) & (df['Condition'] == 'before')]; print(df_non_perturbed_before['Participant'].unique())
df_perturbed_after = df[(df['Perturb'] == True) & (df['Condition'] == 'after')]; print(df_perturbed_after['Participant'].unique())
df_non_perturbed_after = df[(df['Perturb'] == False) & (df['Condition'] == 'after')]; print(df_non_perturbed_after['Participant'].unique())
df_perturbed_before = df[(df['Perturb'] == True) & (df['Condition'] == 'before')]; print(df_perturbed_before['Participant'].unique())

# print(df_non_perturbed_after)

# now we want to create a new table has columns for each movement and rows for each participant
def summaryTable(df):
    # Pivot the DataFrame to have movements as columns and participants as rows
    df_pivot = df.pivot(index='Participant', columns='Movement', values='MSE')
    df_pivot.reset_index(inplace=True)
    df_pivot.columns.name = None  # Remove the name of the columns index
    df_pivot = df_pivot.rename_axis(None, axis=1)  # Remove the name of the columns index

    # Replace participant IDs with their numbers, but only if that participant has data in this table
    for i, participant in enumerate(df_pivot['Participant']):
        if participant in ptcID:
            df_pivot.loc[i, 'Participant'] = ptcID[participant]
        else:
            df_pivot.loc[i, 'Participant'] = participant

    df_pivot.sort_values(by='Participant', inplace=True)

    # Calculate mean across all movements
    df_pivot['Mean All Movements'] = df_pivot.loc[:, movements].mean(axis=1)
    # Calculate mean across known movements
    df_pivot['Mean Known Movements'] = df_pivot.loc[:, recordings].mean(axis=1)
    # Calculate mean across new movements
    df_pivot['Mean New Movements'] = df_pivot.loc[:, test_recordings].mean(axis=1)
    
    return df_pivot
# print(df_non_perturbed_before_pivot)

df_non_perturbed_before_pivot = summaryTable(df_non_perturbed_before)
df_perturbed_after_pivot = summaryTable(df_perturbed_after)
df_non_perturbed_after_pivot = summaryTable(df_non_perturbed_after)
df_perturbed_before_pivot = summaryTable(df_perturbed_before)

# Print the summary tables
# print("Non Perturbed Before:")
# print(df_non_perturbed_before_pivot)
# print("\nPerturbed After:")
# print(df_perturbed_after_pivot)
# print("\nNon Perturbed After:")
# print(df_non_perturbed_after_pivot)
# print("\nPerturbed Before:")
# print(df_perturbed_before_pivot)

# # convert to latex
# print("\nNon Perturbed Before LaTeX:")  
# print(df_non_perturbed_before_pivot.to_latex(index=False, float_format="%.3f", escape=False))
# print("\nPerturbed After LaTeX:")
# print(df_perturbed_after_pivot.to_latex(index=False, float_format="%.3f", escape=False))
# print("\nNon Perturbed After LaTeX:")
# print(df_non_perturbed_after_pivot.to_latex(index=False, float_format="%.3f", escape=False))
# print("\nPerturbed Before LaTeX:")
# print(df_perturbed_before_pivot.to_latex(index=False, float_format="%.3f", escape=False))


['P_149' 'P_238' 'P_407' 'P_426' 'P_577' 'P_668' 'P_711' 'P_950' 'P7_453'
 'P6_820']
['P_149' 'P_238' 'P_407' 'P_426' 'P_577' 'P_668' 'P_711' 'P_950' 'P7_453']
['P_149' 'P_238' 'P_407' 'P_426' 'P_577' 'P_668' 'P_711' 'P_950' 'P7_453']
['P_149' 'P_238' 'P_407' 'P_426' 'P_577' 'P_668' 'P_711' 'P_950' 'P7_453'
 'P6_820']


In [73]:
df = pd.read_csv('all_correlation_results_known.csv')
# print(df)

# now use some groupby wizardry to generate a table for each of the conditions:
# first, we want the non perturbed before results
df_non_perturbed_before = df[(df['Perturb'] == False) & (df['Condition'] == 'before')]
df_perturbed_after = df[(df['Perturb'] == True) & (df['Condition'] == 'after')]
df_non_perturbed_after = df[(df['Perturb'] == False) & (df['Condition'] == 'after')]
df_perturbed_before = df[(df['Perturb'] == True) & (df['Condition'] == 'before')]

# now we want to create a new table has columns for each movement and rows for each participant
def summaryTable(df):
    # Pivot the DataFrame to have movements as columns and participants as rows
    df_pivot = df.pivot(index='Participant', columns='Target', values='Correlation')
    df_pivot.reset_index(inplace=True)
    df_pivot.columns.name = None  # Remove the name of the columns index
    df_pivot = df_pivot.rename_axis(None, axis=1)  # Remove the name of the columns index

    # Replace participant IDs with short names
    # df_pivot['Participant'] = df_pivot['Participant'].replace(ptcID)
    for i, participant in enumerate(df_pivot['Participant']):
        if participant in ptcID:
            df_pivot.loc[i, 'Participant'] = ptcID[participant]
        else:
            df_pivot.loc[i, 'Participant'] = participant

    df_pivot.sort_values(by='Participant', inplace=True)

    # do a fisher transformation on the correlations
    transformed_corr = np.arctanh(df_pivot.loc[:, targets])

    # have to do this by cohort
    # mean_transformed_corr_amp = transformed_corr.loc[df_pivot['Participant'].str.startswith('A')].mean(axis=1)
    # mean_transformed_corr_intact = transformed_corr.loc[df_pivot['Participant'].str.startswith('P')].mean(axis=1)

    mean_transformed_corr = transformed_corr.mean(axis=1)
    # apply the inverse fisher transformation to get back to correlation space

    # Calculate mean across all movements
    df_pivot['Mean Correlation'] = df_pivot.loc[:, targets].mean(axis=1)
    df_pivot['Mean Transformed Correlation'] = np.tanh(mean_transformed_corr)

    # add an extra row that is the mean across all participants
    mean_row = df_pivot.loc[df_pivot['Participant'].str.startswith('A'), targets].mean()
    mean_transform = transformed_corr.loc[df_pivot['Participant'].str.startswith('A')].mean().mean()
    mean_row['Participant'] = 'Amputees'
    mean_row['Mean Correlation'] = mean_row.loc[targets].mean()
    mean_row['Mean Transformed Correlation'] = np.tanh(mean_transform)

    mean_row_ = df_pivot.loc[df_pivot['Participant'].str.startswith('P'), targets].mean()
    mean_transform_ = transformed_corr.loc[df_pivot['Participant'].str.startswith('P')].mean().mean()
    mean_row_['Participant'] = 'Intact'
    mean_row_['Mean Correlation'] = mean_row_.loc[targets].mean()
    mean_row_['Mean Transformed Correlation'] = np.tanh(mean_transform_)

    df_pivot = pd.concat([df_pivot, mean_row.to_frame().T, mean_row_.to_frame().T], ignore_index=True)

    return df_pivot
# print(df_non_perturbed_before_pivot)

df_non_perturbed_before_pivot = summaryTable(df_non_perturbed_before)
df_perturbed_after_pivot = summaryTable(df_perturbed_after)
df_non_perturbed_after_pivot = summaryTable(df_non_perturbed_after)
df_perturbed_before_pivot = summaryTable(df_perturbed_before)

# Print the summary tables
print("Non Perturbed Before:")
print(df_non_perturbed_before_pivot)
print("\nPerturbed After:")
print(df_perturbed_after_pivot)
print("\nNon Perturbed After:")
print(df_non_perturbed_after_pivot)
print("\nPerturbed Before:")
print(df_perturbed_before_pivot)

# convert to latex
# print("\nNon Perturbed Before LaTeX:")  
# print(df_non_perturbed_before_pivot.to_latex(index=False, float_format="%.3f", escape=False))
# print("\nPerturbed After LaTeX:")
# print(df_perturbed_after_pivot.to_latex(index=False, float_format="%.3f", escape=False))
# print("\nNon Perturbed After LaTeX:")
# print(df_non_perturbed_after_pivot.to_latex(index=False, float_format="%.3f", escape=False))
# print("\nPerturbed Before LaTeX:")
# print(df_perturbed_before_pivot.to_latex(index=False, float_format="%.3f", escape=False))


Non Perturbed Before:
   Participant  indexAng    midAng  pinkyAng   ringAng thumbInPlaneAng thumbOutPlaneAng wristFlex Mean Correlation Mean Transformed Correlation
0           A1  0.432817  0.783062  0.772695     0.798        0.587512         0.696648  0.834737         0.700782                     0.721464
1           A2  0.564108  0.840717  0.876979  0.842683        0.389431         0.605249   0.67636         0.685075                     0.722517
2           P1  0.590285  0.871499  0.878547  0.880554         0.32172         0.597532  0.721669         0.694544                     0.742933
3           P2  0.562743  0.797293  0.770678  0.769205        0.506427         0.595382   0.78507         0.683828                     0.699969
4           P3  0.699111  0.877848   0.87653  0.874538        0.413075         0.547442  0.774866         0.723344                     0.763445
5           P4  0.499135  0.783116  0.784181  0.783314        0.397502         0.456353   0.85927         0.651839

In [74]:
def df_to_latex(df, caption, label):
    col_names = list(df.columns)
    # col_names.replace('Mean Correlation', 'Mean Corr.')
    # col_names.replace('Mean Transformed Correlation', 'Mean Transformed Corr.')
    # Create LaTeX column format string (e.g., "|l|ccc|")
    col_format = "|l|" + "c" * (len(col_names) - 1) + "|"

    # Create header row
    header_row = " & ".join(col_names) + " \\\\"

    # we want to go in ascending order of participants
    # df = df.sort_values(by='Participant')

    # Create data rows
    data_rows = "\n".join(
        " & ".join([str(row[0])] + [f"{val:.3f}" for val in row[1:]]) + " \\\\"
        for index, row in df.iterrows()
    )

    latex_str = f"""\\begin{{table}}[ht]
    \\caption{{{caption}}}
    \\label{{{label}}}
    \\centering
    \\resizebox{{\\textwidth}}{{!}}{{%
    \\begin{{tabular}}{{{col_format}}}
        \\hline
        {header_row}
        \\hline
        {data_rows}
        \\hline
    \\end{{tabular}}
    }}
\\end{{table}}"""

    return latex_str


def df_to_latex_split(df, caption, label):
    # Split main movement columns from mean summary columns
    df = df.sort_values(by='Participant')
    mean_cols = [col for col in df.columns if 'Mean' in col]

    # Format the main table
    header_main = " & ".join(['\\textbf{Participant}'] + movements) + " \\\\"
    rows_main = "\n".join(
        " & ".join([str(row['Participant'])] + [f"{row[col]:.3f}" for col in movements]) + " \\\\"
        for _, row in df.iterrows()
    )

    # Format the smaller table
    header_summary = " & ".join(['\\textbf{Participant}'] + [f"\\textbf{{{col}}}" for col in mean_cols]) + " \\\\"
    rows_summary = "\n".join(
        " & ".join([str(row['Participant'])] + [f"{row[col]:.3f}" for col in mean_cols]) + " \\\\"
        for _, row in df.iterrows()
    )

    return f"""
\\begin{{table}}[ht!]
    \\caption{{{caption}}}
    \\label{{{label}}}
    \\centering
    \\resizebox{{\\textwidth}}{{!}}{{%
    \\begin{{tabular}}{{|l|{'c' * len(movements)}|}}
        \\hline
        {header_main}
        \\hline
        {rows_main}
        \\hline
    \\end{{tabular}}
    }}\\\\[1em]
    \\begin{{scriptsize}}
    \\begin{{tabular}}{{|l|{'c' * len(mean_cols)}|}}
        \\hline
        {header_summary}
        \\hline
        {rows_summary}
        \\hline
    \\end{{tabular}}
    \\end{{scriptsize}}
\\end{{table}}
"""

# MSE
# dfs = [df_non_perturbed_before_pivot, df_perturbed_after_pivot, df_perturbed_before_pivot, df_non_perturbed_after_pivot]
# captions = [f'MSE for each movement ({cap})' for cap in ['initial training', 'after online training', 'before online training, perturbed input', 'after online training, perturbed input']]
# labels = [f'tab:mse_{label}' for label in ['initial', 'online', 'after_perturbed', 'before_perturbed']]

# Correlation
dfs = [df_non_perturbed_before_pivot, df_perturbed_after_pivot, df_perturbed_before_pivot, df_non_perturbed_after_pivot]
captions = [f'Known Movements Correlation Results ({cap})' for cap in ['initial training', 'after online training', 'before online training, perturbed input', 'after online training, perturbed input']]
labels = [f'tab:corr_known_{label}' for label in ['initial', 'online', 'after_perturbed', 'before_perturbed']]

for df, caption, label in zip(dfs, captions, labels):
    # latex_str = df_to_latex_split(df, caption, label)
    latex_str = df_to_latex(df, caption, label)
    print(latex_str)
    print("\n")  # Add a newline for better readability between tables


\begin{table}[ht]
    \caption{Known Movements Correlation Results (initial training)}
    \label{tab:corr_known_initial}
    \centering
    \resizebox{\textwidth}{!}{%
    \begin{tabular}{|l|ccccccccc|}
        \hline
        Participant & indexAng & midAng & pinkyAng & ringAng & thumbInPlaneAng & thumbOutPlaneAng & wristFlex & Mean Correlation & Mean Transformed Correlation \\
        \hline
        A1 & 0.433 & 0.783 & 0.773 & 0.798 & 0.588 & 0.697 & 0.835 & 0.701 & 0.721 \\
A2 & 0.564 & 0.841 & 0.877 & 0.843 & 0.389 & 0.605 & 0.676 & 0.685 & 0.723 \\
P1 & 0.590 & 0.871 & 0.879 & 0.881 & 0.322 & 0.598 & 0.722 & 0.695 & 0.743 \\
P2 & 0.563 & 0.797 & 0.771 & 0.769 & 0.506 & 0.595 & 0.785 & 0.684 & 0.700 \\
P3 & 0.699 & 0.878 & 0.877 & 0.875 & 0.413 & 0.547 & 0.775 & 0.723 & 0.763 \\
P4 & 0.499 & 0.783 & 0.784 & 0.783 & 0.398 & 0.456 & 0.859 & 0.652 & 0.688 \\
P5 & 0.835 & 0.879 & 0.898 & 0.884 & 0.521 & 0.723 & 0.790 & 0.790 & 0.815 \\
P6 & 0.460 & 0.841 & 0.878 & 0.862 & 0.748 & 0.66

In [None]:
# we also want to perform some statistical tests to compare the MSE between conditions
from scipy.stats import wilcoxon

# we want to perfrom these on the means between different conditions 
def perform_wilcoxon_test(df1, df2):
    # Perform Wilcoxon signed-rank test
    stat, p_value = wilcoxon(df1, df2)
    return stat, p_value

# Perform all tests and collect output
results = {}

# Pure online training (non-perturbed, before vs after)
results['Online Training - ALL'] = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean All Movements'], df_non_perturbed_after_pivot['Mean All Movements'])
results['Online Training - KNOWN'] = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean Known Movements'], df_non_perturbed_after_pivot['Mean Known Movements'])
results['Online Training - NEW'] = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean New Movements'], df_non_perturbed_after_pivot['Mean New Movements'])

# Online learning, perturbation study (perturbed before vs after)
results['Perturbed Study - ALL'] = perform_wilcoxon_test(df_perturbed_before_pivot['Mean All Movements'], df_perturbed_after_pivot['Mean All Movements'])
results['Perturbed Study - KNOWN'] = perform_wilcoxon_test(df_perturbed_before_pivot['Mean Known Movements'], df_perturbed_after_pivot['Mean Known Movements'])
results['Perturbed Study - NEW'] = perform_wilcoxon_test(df_perturbed_before_pivot['Mean New Movements'], df_perturbed_after_pivot['Mean New Movements'])

# Overall learning: pre-online vs post-perturbed
results['Overall Learning - ALL'] = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean All Movements'], df_perturbed_after_pivot['Mean All Movements'])
results['Overall Learning - KNOWN'] = perform_wilcoxon_test(df_non_perturbed_after_pivot['Mean Known Movements'], df_perturbed_before_pivot['Mean Known Movements'])
results['Overall Learning - NEW'] = perform_wilcoxon_test(df_perturbed_after_pivot['Mean New Movements'], df_non_perturbed_before_pivot['Mean New Movements'])

# Perturbation breakdown: non-perturbed before vs perturbed before
results['Perturbation Breakdown - ALL'] = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean All Movements'], df_perturbed_before_pivot['Mean All Movements'])
results['Perturbation Breakdown - KNOWN'] = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean Known Movements'], df_perturbed_before_pivot['Mean Known Movements'])
results['Perturbation Breakdown - NEW'] = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean New Movements'], df_perturbed_before_pivot['Mean New Movements'])

#  Build a DataFrame from the Wilcoxon results
summary_rows = []
for label, (stat, p_val) in results.items():
    group, metric = label.split(" - ")
    summary_rows.append({
        "Test Group": group,
        "Subset": metric,
        "Wilcoxon Statistic": stat,
        "p-value": p_val
    })

df_stats = pd.DataFrame(summary_rows)

# Sort by logical groupings
df_stats['Group Order'] = df_stats['Test Group'].map({
    'Online Training': 0,
    'Perturbed Study': 1,
    'Overall Learning': 2,
    'Perturbation Breakdown': 3
})
df_stats['Subset Order'] = df_stats['Subset'].map({'ALL': 0, 'KNOWN': 1, 'NEW': 2})
df_stats.sort_values(by=['Group Order', 'Subset Order'], inplace=True)
df_stats.drop(columns=['Group Order', 'Subset Order'], inplace=True)
print(df_stats)

# # pure online training for new movements
# stat, p_value = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean All Movements'], df_non_perturbed_after_pivot['Mean All Movements'])
# print(f"Wilcoxon tests for online training:\n\tALL  : Statistic={stat}, p-value={p_value}")
# stat, p_value = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean Known Movements'], df_non_perturbed_after_pivot['Mean Known Movements'])
# print(f"\tKNOWN: Statistic={stat}, p-value={p_value}")
# stat, p_value = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean New Movements'], df_non_perturbed_after_pivot['Mean New Movements'])
# print(f"\tNEW  : Statistic={stat}, p-value={p_value}")

# # online learning, perturbation study
# stat, p_value = perform_wilcoxon_test(df_perturbed_after_pivot['Mean All Movements'], df_perturbed_before_pivot['Mean All Movements'])
# print(f"\nWilcoxon tests for online learning, perturbation:\n\tALL  : Statistic={stat}, p-value={p_value}")
# stat, p_value = perform_wilcoxon_test(df_perturbed_after_pivot['Mean Known Movements'], df_perturbed_before_pivot['Mean Known Movements'])
# print(f"\tKNOWN: Statistic={stat}, p-value={p_value}")
# stat, p_value = perform_wilcoxon_test(df_perturbed_after_pivot['Mean New Movements'], df_perturbed_before_pivot['Mean New Movements'])
# print(f"\tNEW  : Statistic={stat}, p-value={p_value}")

# # for all learning
# stat, p_value = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean All Movements'], df_perturbed_after_pivot['Mean All Movements'])
# print(f"\nWilcoxon test for overall learning:\n\tALL  : Statistic={stat}, p-value={p_value}")
# stat, p_value = perform_wilcoxon_test(df_non_perturbed_after_pivot['Mean Known Movements'], df_perturbed_before_pivot['Mean Known Movements'])
# print(f"\tKNOWN: Statistic={stat}, p-value={p_value}")
# stat, p_value = perform_wilcoxon_test(df_perturbed_after_pivot['Mean New Movements'], df_non_perturbed_before_pivot['Mean New Movements'])
# print(f"\tNEW  : Statistic={stat}, p-value={p_value}")

# # validate that perturbation breaks down the MSE
# stat, p_value = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean All Movements'], df_perturbed_before_pivot['Mean All Movements'])
# print(f"\nWilcoxon test for perturbation breakdown:\n\tALL  : Statistic={stat}, p-value={p_value}")
# stat, p_value = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean Known Movements'], df_perturbed_before_pivot['Mean Known Movements'])
# print(f"\tKNOWN: Statistic={stat}, p-value={p_value}")
# stat, p_value = perform_wilcoxon_test(df_non_perturbed_before_pivot['Mean New Movements'], df_perturbed_before_pivot['Mean New Movements'])
# print(f"\tNEW  : Statistic={stat}, p-value={p_value}")

                Test Group Subset  Wilcoxon Statistic   p-value
0          Online Training    ALL                12.0  0.250000
1          Online Training  KNOWN                 3.0  0.019531
2          Online Training    NEW                 0.0  0.003906
3          Perturbed Study    ALL                 0.0  0.003906
4          Perturbed Study  KNOWN                 0.0  0.003906
5          Perturbed Study    NEW                 0.0  0.003906
6         Overall Learning    ALL                17.0  0.570312
7         Overall Learning  KNOWN                 0.0  0.003906
8         Overall Learning    NEW                 0.0  0.003906
9   Perturbation Breakdown    ALL                 0.0  0.003906
10  Perturbation Breakdown  KNOWN                 0.0  0.003906
11  Perturbation Breakdown    NEW                 2.0  0.011719


In [53]:
import numpy as np
from scipy.stats import rankdata

# Manual rank-biserial effect size computation for paired samples
def rank_biserial(pre, post):
    diffs = np.array(pre) - np.array(post)
    non_zero = diffs != 0
    diffs = diffs[non_zero]
    signs = np.sign(diffs)
    ranks = rankdata(abs(diffs))
    effect_size = np.sum(signs * ranks) / np.sum(ranks)
    return effect_size

# Recompute all effect sizes manually
effect_sizes_manual = {}

# Pure online training (non-perturbed, before vs after)
effect_sizes_manual['Online Training - ALL'] = rank_biserial(df_non_perturbed_before_pivot['Mean All Movements'], df_non_perturbed_after_pivot['Mean All Movements'])
effect_sizes_manual['Online Training - KNOWN'] = rank_biserial(df_non_perturbed_before_pivot['Mean Known Movements'], df_non_perturbed_after_pivot['Mean Known Movements'])
effect_sizes_manual['Online Training - NEW'] = rank_biserial(df_non_perturbed_before_pivot['Mean New Movements'], df_non_perturbed_after_pivot['Mean New Movements'])

# Online learning, perturbation study (perturbed before vs after)
effect_sizes_manual['Perturbed Study - ALL'] = rank_biserial(df_perturbed_before_pivot['Mean All Movements'], df_perturbed_after_pivot['Mean All Movements'])
effect_sizes_manual['Perturbed Study - KNOWN'] = rank_biserial(df_perturbed_before_pivot['Mean Known Movements'], df_perturbed_after_pivot['Mean Known Movements'])
effect_sizes_manual['Perturbed Study - NEW'] = rank_biserial(df_perturbed_before_pivot['Mean New Movements'], df_perturbed_after_pivot['Mean New Movements'])

# Overall learning: pre-online vs post-perturbed
effect_sizes_manual['Overall Learning - ALL'] = rank_biserial(df_non_perturbed_before_pivot['Mean All Movements'], df_perturbed_after_pivot['Mean All Movements'])
effect_sizes_manual['Overall Learning - KNOWN'] = rank_biserial(df_non_perturbed_after_pivot['Mean Known Movements'], df_perturbed_before_pivot['Mean Known Movements'])
effect_sizes_manual['Overall Learning - NEW'] = rank_biserial(df_perturbed_after_pivot['Mean New Movements'], df_non_perturbed_before_pivot['Mean New Movements'])

# Perturbation breakdown: non-perturbed before vs perturbed before
effect_sizes_manual['Perturbation Breakdown - ALL'] = rank_biserial(df_non_perturbed_before_pivot['Mean All Movements'], df_perturbed_before_pivot['Mean All Movements'])
effect_sizes_manual['Perturbation Breakdown - KNOWN'] = rank_biserial(df_non_perturbed_before_pivot['Mean Known Movements'], df_perturbed_before_pivot['Mean Known Movements'])
effect_sizes_manual['Perturbation Breakdown - NEW'] = rank_biserial(df_non_perturbed_before_pivot['Mean New Movements'], df_perturbed_before_pivot['Mean New Movements'])

# Build DataFrame
df_effects_manual = pd.DataFrame([
    {"Test Group": label.split(" - ")[0], "Subset": label.split(" - ")[1], "Effect Size (r)": r}
    for label, r in effect_sizes_manual.items()
])

# Sort like before
df_effects_manual['Group Order'] = df_effects_manual['Test Group'].map({
    'Online Training': 0,
    'Perturbed Study': 1,
    'Overall Learning': 2,
    'Perturbation Breakdown': 3
})
df_effects_manual['Subset Order'] = df_effects_manual['Subset'].map({'ALL': 0, 'KNOWN': 1, 'NEW': 2})
df_effects_manual.sort_values(by=['Group Order', 'Subset Order'], inplace=True)
df_effects_manual.drop(columns=['Group Order', 'Subset Order'], inplace=True)

print(df_effects_manual)

# tools.display_dataframe_to_user(name="Wilcoxon Effect Sizes (Manual)", dataframe=df_effects_manual)


                Test Group Subset  Effect Size (r)
0          Online Training    ALL         0.466667
1          Online Training  KNOWN        -0.866667
2          Online Training    NEW         1.000000
3          Perturbed Study    ALL         1.000000
4          Perturbed Study  KNOWN         1.000000
5          Perturbed Study    NEW         1.000000
6         Overall Learning    ALL         0.244444
7         Overall Learning  KNOWN        -1.000000
8         Overall Learning    NEW        -1.000000
9   Perturbation Breakdown    ALL        -1.000000
10  Perturbation Breakdown  KNOWN        -1.000000
11  Perturbation Breakdown    NEW        -0.911111


In [54]:
# Merge p-values and effect sizes into one summary table
df_merged = pd.merge(df_stats, df_effects_manual, on=["Test Group", "Subset"])

# Reorder columns for clarity
df_merged = df_merged[[
    "Test Group", "Subset", "Wilcoxon Statistic", "p-value", "Effect Size (r)"
]]

# Save to LaTeX
latex_combined = df_merged.to_latex(index=False, float_format="%.5f", caption="Wilcoxon test results with rank-biserial effect sizes for MSE comparisons across conditions.", label="tab:wilcoxon_full_results")
print(latex_combined)

\begin{table}
\caption{Wilcoxon test results with rank-biserial effect sizes for MSE comparisons across conditions.}
\label{tab:wilcoxon_full_results}
\begin{tabular}{llrrr}
\toprule
Test Group & Subset & Wilcoxon Statistic & p-value & Effect Size (r) \\
\midrule
Online Training & ALL & 12.00000 & 0.25000 & 0.46667 \\
Online Training & KNOWN & 3.00000 & 0.01953 & -0.86667 \\
Online Training & NEW & 0.00000 & 0.00391 & 1.00000 \\
Perturbed Study & ALL & 0.00000 & 0.00391 & 1.00000 \\
Perturbed Study & KNOWN & 0.00000 & 0.00391 & 1.00000 \\
Perturbed Study & NEW & 0.00000 & 0.00391 & 1.00000 \\
Overall Learning & ALL & 17.00000 & 0.57031 & 0.24444 \\
Overall Learning & KNOWN & 0.00000 & 0.00391 & -1.00000 \\
Overall Learning & NEW & 0.00000 & 0.00391 & -1.00000 \\
Perturbation Breakdown & ALL & 0.00000 & 0.00391 & -1.00000 \\
Perturbation Breakdown & KNOWN & 0.00000 & 0.00391 & -1.00000 \\
Perturbation Breakdown & NEW & 2.00000 & 0.01172 & -0.91111 \\
\bottomrule
\end{tabular}
\end{table

In [15]:
%matplotlib qt
# WHAT DO WE WANT TO CALCULATE:
# 1. MSE for each movement, by participant, for each perturbation condition (before/after)
# 2. Pearson correlation for each movement for each target, by participant, for each perturbation condition (before/after)
# 3. RSS and TSS for each movement for each target, by participant, for each perturbation condition (before/after)
# 4. Fisher's Z transformation to aggregate the correlations
import numpy as np
import pandas as pd
from pathlib import Path

from numpy import arctanh, tanh

# Setup
data_folder = Path('/home/haptix/haptix/biomech_PCP/paper_utils/paper_data/trajectories')
results = []  # Will hold all row dicts

# Loop
# for perturb in [True, False]:
#     for condition in ['before', 'after']:
#         for participant in online_participants:
#             for movement in movements
online_participants = [part for part in participants if 'P6' not in part]
for participant in online_participants:
    for perturb in [False, True]:
        for condition in ['after', 'before']:
            for movement in movements:
                gt = np.load(data_folder / participant / movement / 'GT/pred.npy')        # (T, D)
                pred = np.load(data_folder / participant / movement / f'perturb_{str(perturb)}/{condition}_online/pred.npy')


                rss = np.sum((gt - pred)**2, axis=0)  # shape (D,)
                tss = np.sum((gt - np.mean(gt, axis=0))**2, axis=0)  # shape (D,)
                mse = np.mean((gt - pred)**2, axis=0)  # shape (D,)
                corrs = np.array([np.corrcoef(gt[:, i], pred[:, i])[0, 1] for i in range(gt.shape[1])])  # (D,)
                R2 = np.where(tss > 1e-12, 1 - rss / tss, np.nan)  # shape (D,)

                for i, target in enumerate(targets):
                    results.append({
                        'Participant': participant,
                        'Movement': movement,
                        'Perturb': perturb,
                        'Condition': condition,
                        'Joint': target,
                        'MSE': mse[i],
                        'RSS': rss[i],
                        'TSS': tss[i],
                        'R2': R2[i],
                        'Corr': corrs[i]
                    })

            #     # get the all joints value
            #     results.append({
            #         'Participant': participant,
            #         'Movement': movement,
            #         'Perturb': perturb,
            #         'Condition': condition,
            #         'Joint': 'All',
            #         'MSE': np.mean(mse),
            #         'RSS': np.sum(rss),
            #         'TSS': np.sum(tss),
            #         'R2': 1 - np.sum(rss) / np.sum(tss) if np.sum(tss) > 1e-12 else np.nan,
            #         'Corr': np.NaN # we can't just aggregate!
            #     })
            
            # # then we also want to get all of the above for the participant and perturbation condition
            # results.append({
            #     'Participant': participant,
            #     'Movement': 'All',
            #     'Perturb': perturb,
            #     'Condition': condition,
            #     'Joint': 'All',
            #     'MSE': np.mean([res['MSE'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition]),
            #     'RSS': np.sum([res['RSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition]),
            #     'TSS': np.sum([res['TSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition]),
            #     'R2': 1 - np.sum([res['RSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition]) / 
            #             np.sum([res['TSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition]) 
            #             if np.sum([res['TSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition]) > 1e-12 else np.nan,
            #     'Corr': np.NaN  # we can't just aggregate!
            # })

            # # and also broken down by each joint across all movements like this
            # for target in targets:
            #     results.append({
            #         'Participant': participant,
            #         'Movement': 'All',
            #         'Perturb': perturb,
            #         'Condition': condition,
            #         'Joint': target,
            #         'MSE': np.mean([res['MSE'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition and res['Joint'] == target]),
            #         'RSS': np.sum([res['RSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition and res['Joint'] == target]),
            #         'TSS': np.sum([res['TSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition and res['Joint'] == target]),
            #         'R2': 1 - np.sum([res['RSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition and res['Joint'] == target]) / 
            #                 np.sum([res['TSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition and res['Joint'] == target]) 
            #                 if np.sum([res['TSS'] for res in results if res['Participant'] == participant and res['Perturb'] == perturb and res['Condition'] == condition and res['Joint'] == target]) > 1e-12 else np.nan,
            #         'Corr': np.NaN  # we can't just aggregate!
            #     })

# Convert to dataframe
df = pd.DataFrame(results)

# Save per-target metrics
df.to_csv('all_performance_metrics.csv', index=False)

# Replace extremely small TSS values to avoid divide-by-zero
df['TSS_safe'] = df['TSS'].where(df['TSS'] > 1e-12, np.nan)

# Compute participant × condition × perturb × joint aggregates (across movements)
agg_joint = df[df['Movement'] != 'All'].groupby(['Participant', 'Perturb', 'Condition', 'Joint']).agg({
    'MSE': 'mean',
    'RSS': 'sum',
    'TSS_safe': 'sum'
}).reset_index()

agg_joint['R2'] = 1 - agg_joint['RSS'] / agg_joint['TSS_safe']
agg_joint['Movement'] = 'All'
agg_joint['Corr'] = np.nan  # can't aggregate correlation directly

# Compute participant × condition × perturb aggregates across all joints/movements
agg_all = df[df['Movement'] != 'All'].groupby(['Participant', 'Perturb', 'Condition']).agg({
    'MSE': 'mean',
    'RSS': 'sum',
    'TSS_safe': 'sum'
}).reset_index()

agg_all['R2'] = 1 - agg_all['RSS'] / agg_all['TSS_safe']
agg_all['Movement'] = 'All'
agg_all['Joint'] = 'All'
agg_all['Corr'] = np.nan  # can't aggregate correlation
agg_all = agg_all[['Participant', 'Perturb', 'Condition', 'Movement', 'Joint', 'MSE', 'RSS', 'TSS_safe', 'R2', 'Corr']]

# Fix original df column to match new aggregated ones
df['TSS_safe'] = df['TSS']
df = df[['Participant', 'Perturb', 'Condition', 'Movement', 'Joint', 'MSE', 'RSS', 'TSS_safe', 'R2', 'Corr']]

# Combine all
df_final = pd.concat([df, agg_joint, agg_all], ignore_index=True)

# Optional: save to CSV
df_final.to_csv('aggregated_model_metrics.csv', index=False)


# now we want to do some aggregations
# Example aggregation: mean MSE per participant (can group by anything else too)
# agg_mse = df.groupby(['Participant', 'Perturb', 'Condition']) \
#            .apply(lambda g: np.mean(g['MSE'])) \
#            .reset_index(name='Mean_MSE')
# agg_mse.to_csv('mse_participant.csv', index=False)

# need to get a bit more clever for correlations and R^2 because 


# # Compute Fisher-Z aggregated correlation
# df['Z'] = arctanh(df['Corr'])  # Fisher Z-transform

# # Example aggregation: mean z per participant (can group by anything else too)
# agg_corr = df.groupby(['Participant', 'Perturb', 'Condition']) \
#              .apply(lambda g: tanh(np.average(g['Z']))) \
#              .reset_index(name='FisherZ_Aggregated_Corr')

# agg_corr.to_csv('aggregated_correlations.csv', index=False)

# # Example aggregation: R^2 per participant (sum RSS/TSS)
# agg_r2 = df.groupby(['Participant', 'Perturb', 'Condition']) \
#            .apply(lambda g: 1 - g['RSS'].sum() / g['TSS'].sum()) \
#            .reset_index(name='Global_R2')

# agg_r2.to_csv('aggregated_r2.csv', index=False)


  R2 = np.where(tss > 1e-12, 1 - rss / tss, np.nan)  # shape (D,)


In [54]:
import numpy as np
import pandas as pd
from pathlib import Path
from numpy import arctanh, tanh

rows = []

online_participants = [p for p in participants if 'P6' not in p]

for perturb in [True, False]:
    for condition in ['before', 'after']:
        for participant in online_participants:
            gt_cat = []
            pred_cat = []

            for movement in movements:
                gt = np.load(data_folder / participant / movement / 'GT/pred.npy')
                pred = np.load(data_folder / participant / movement / f'perturb_{str(perturb)}/{condition}_online/pred.npy')
                gt_cat.append(gt)
                pred_cat.append(pred)

            gt_cat = np.concatenate(gt_cat, axis=0)
            pred_cat = np.concatenate(pred_cat, axis=0)

            # Calculate per-target Pearson r and apply Fisher z
            r_vals = np.array([np.corrcoef(gt_cat[:, i], pred_cat[:, i])[0, 1] for i in range(len(targets))])
            z_vals = arctanh(r_vals)  # Fisher transform

            # No aggregation here — just transform back
            r_fisher_corrected = tanh(z_vals)

            row = {
                'Participant': participant,
                'Perturb': perturb,
                'Condition': condition
            }
            for target, r in zip(targets, r_fisher_corrected):
                row[target] = r

            rows.append(row)

# Final DataFrame: wide-form with one row per (participant, perturb, condition)
df_corr_fisher = pd.DataFrame(rows)

# Optional: save to CSV
df_corr_fisher.to_csv('corr_fisher_corrected_by_joint.csv', index=False)
