In [2]:
from pathlib import Path

import numpy as np
import pandas as pd
from utils import (
    participants,
    recordings,
    targets,
    test_recordings,
)

pd.set_option("display.width", 200)  # Total width of the display
pd.set_option("display.max_columns", None)  # Show all columns
pd.set_option("display.max_colwidth", None)  # Don't truncate column contents
pd.set_option("display.max_rows", None)  # Don't truncate column contents

In [3]:
movements = recordings + test_recordings
# data_folder = Path('/home/haptix/haptix/biomech_PCP/paper_utils/paper_data/trajectories')
data_folder = Path("/home/haptix/haptix/biomech_PCP/paper_utils/paper_data/predictions")

ptcID = {
    "P_149": "P1",
    "P_238": "P2",
    "P_407": "P3",
    "P_426": "P4",
    "P_577": "P5",
    "P_668": "P6",
    "P_711": "P7",
    "P_950": "P8",
    "P7_453": "A1",
    "P6_820": "A2",
}

In [None]:
from pathlib import Path

import pandas as pd

corr_rows = []
mse_rows = []

for perturb in [True, False]:
    for condition in ["before", "after"]:
        for participant in participants:
            gt_cat_new = []
            pred_cat_new = []
            gt_cat_known = []
            pred_cat_known = []
            gt_cat = []
            pred_cat = []

            for movement in movements:
                pred = pd.read_parquet(
                    data_folder
                    / participant
                    / f"perturbed_{perturb}-pred_{movement}_{condition}.parquet"
                )
                gt = pd.read_parquet(
                    data_folder / participant / f"target_{movement}_{condition}.parquet"
                )

                mse = np.mean((gt - pred) ** 2, axis=0)
                mse_row = {
                    "Participant": participant,
                    "Perturb": perturb,
                    "Condition": condition,
                    "Movement": movement,
                    "MSE": np.mean(mse),
                }
                mse_rows.append(mse_row)

                gt_cat.append(gt)
                pred_cat.append(pred)
                if movement in recordings:
                    gt_cat_known.append(gt)
                    pred_cat_known.append(pred)
                elif movement in test_recordings:
                    gt_cat_new.append(gt)
                    pred_cat_new.append(pred)

            # Concatenate across all movements for correlation
            gt_cat = np.concatenate(gt_cat, axis=0)
            pred_cat = np.concatenate(pred_cat, axis=0)
            gt_cat_known = np.concatenate(gt_cat_known, axis=0)
            pred_cat_known = np.concatenate(pred_cat_known, axis=0)
            gt_cat_new = np.concatenate(gt_cat_new, axis=0)
            pred_cat_new = np.concatenate(pred_cat_new, axis=0)

            for i, target in enumerate(targets):
                corr = np.corrcoef(gt_cat[:, i], pred_cat[:, i])[0, 1]
                corr_known = np.corrcoef(gt_cat_known[:, i], pred_cat_known[:, i])[0, 1]
                corr_new = np.corrcoef(gt_cat_new[:, i], pred_cat_new[:, i])[0, 1]
                corr_row = {
                    "Participant": participant,
                    "Perturb": perturb,
                    "Condition": condition,
                    "Target": target,
                    "Type": "Combined",
                    "Correlation": corr,
                }
                corr_rows.append(corr_row)
                corr_row = {
                    "Participant": participant,
                    "Perturb": perturb,
                    "Condition": condition,
                    "Target": target,
                    "Type": "Known",
                    "Correlation": corr_known,
                }
                corr_rows.append(corr_row)
                corr_row = {
                    "Participant": participant,
                    "Perturb": perturb,
                    "Condition": condition,
                    "Target": target,
                    "Type": "New",
                    "Correlation": corr_new,
                }
                corr_rows.append(corr_row)


# Create final long-form dataframes
df_corr = pd.DataFrame(corr_rows)
df_mse = pd.DataFrame(mse_rows)

# Optional: save
df_corr.to_csv("all_correlation_results.csv", index=False)
df_mse.to_csv("all_mse_results.csv", index=False)

In [None]:
# for correlation tables!
def df_to_latex(df, caption, label):
    col_names = list(df.columns)
    # col_names.replace('Mean Correlation', 'Mean Corr.')
    # col_names.replace('Mean Transformed Correlation', 'Mean Transformed Corr.')
    # Create LaTeX column format string (e.g., "|l|ccc|")
    col_format = "|l|" + "c" * (len(col_names) - 1) + "|"

    # Create header row
    header_row = " & ".join(col_names) + " \\\\"

    # we want to go in ascending order of participants
    # df = df.sort_values(by='Participant')

    # Create data rows
    data_rows = "\n".join(
        " & ".join([str(row[0])] + [f"{val:.3f}" for val in row[1:]]) + " \\\\"
        for index, row in df.iterrows()
    )

    latex_str = f"""\\begin{{table}}[ht]
    \\caption{{{caption}}}
    \\label{{{label}}}
    \\centering
    \\resizebox{{\\textwidth}}{{!}}{{%
    \\begin{{tabular}}{{{col_format}}}
        \\hline
        {header_row}
        \\hline
        {data_rows}
        \\hline
    \\end{{tabular}}
    }}
\\end{{table}}"""

    return latex_str


# for MSE tables!
def df_to_latex_split(df, caption, label):
    # Split main movement columns from mean summary columns
    df = df.sort_values(by="Participant")
    mean_cols = [col for col in df.columns if "Mean" in col]

    # Format the main table
    header_main = " & ".join(["\\textbf{Participant}"] + movements) + " \\\\"
    rows_main = "\n".join(
        " & ".join([str(row["Participant"])] + [f"{row[col]:.3f}" for col in movements])
        + " \\\\"
        for _, row in df.iterrows()
    )

    # Format the smaller table
    header_summary = (
        " & ".join(
            ["\\textbf{Participant}"] + [f"\\textbf{{{col}}}" for col in mean_cols]
        )
        + " \\\\"
    )
    rows_summary = "\n".join(
        " & ".join([str(row["Participant"])] + [f"{row[col]:.3f}" for col in mean_cols])
        + " \\\\"
        for _, row in df.iterrows()
    )

    return f"""
\\begin{{table}}[ht!]
    \\caption{{{caption}}}
    \\label{{{label}}}
    \\centering
    \\resizebox{{\\textwidth}}{{!}}{{%
    \\begin{{tabular}}{{|l|{"c" * len(movements)}|}}
        \\hline
        {header_main}
        \\hline
        {rows_main}
        \\hline
    \\end{{tabular}}
    }}\\\\[1em]
    \\begin{{scriptsize}}
    \\begin{{tabular}}{{|l|{"c" * len(mean_cols)}|}}
        \\hline
        {header_summary}
        \\hline
        {rows_summary}
        \\hline
    \\end{{tabular}}
    \\end{{scriptsize}}
\\end{{table}}
"""

In [35]:
# now make the MSE dataframes as appropriate
df_mse = pd.read_csv("all_mse_results.csv")

mse_str = """"""

desiredOrder = [(False, "before"), (False, "after"), (True, "before"), (True, "after")]
# for perturb in [True, False]:
#     for condition in ['before', 'after']:
for perturb, condition in desiredOrder:
    df = df_mse[(df_mse["Perturb"] == perturb) & (df_mse["Condition"] == condition)]
    df = df.pivot(index="Participant", columns="Movement", values="MSE")
    df.reset_index(inplace=True)
    df = df.rename_axis(None, axis=1)  # Remove the name of the columns index
    # rename the participant IDs to their numbers
    df["Participant"] = df["Participant"].map(ptcID).fillna(df["Participant"])
    df.sort_values(by="Participant", inplace=True)

    # Calculate mean across all movements
    df["Mean Combined Movements"] = df.loc[:, movements].mean(axis=1)
    # Calculate mean across known movements
    df["Mean Known Movements"] = df.loc[:, recordings].mean(axis=1)
    # Calculate mean across new movements
    df["Mean New Movements"] = df.loc[:, test_recordings].mean(axis=1)

    # Save the dataframe
    df.to_csv(f"mse_results_{perturb}_{condition}.csv", index=False)

    captions = {
        (True, "before"): "MSE for each movement (With Perturbations)",
        (True, "after"): "MSE for each movement (After Learning Perturbations)",
        (False, "before"): "MSE for each movement (After Initial Training)",
        (False, "after"): "MSE for each movement (After Online Training)",
    }

    label = f"tab:mse_{perturb}_{condition}"

    # Create a string representation of the dataframe
    thisString = df_to_latex_split(df, captions[(perturb, condition)], label)
    mse_str += thisString + "\n\n"

In [None]:
print(mse_str)

In [32]:
# now make the corr dataframes as appropriate
df_corr = pd.read_csv("all_correlation_results.csv")

corr_str = """"""

desiredOrder = [(False, "before"), (False, "after"), (True, "before"), (True, "after")]
movementTypes = ["Combined", "Known", "New"]
desiredOrder = [
    (movementType, perturb, condition)
    for movementType in movementTypes
    for perturb, condition in desiredOrder
]
for movementType, perturb, condition in desiredOrder:
    df = df_corr[
        (df_corr["Perturb"] == perturb)
        & (df_corr["Condition"] == condition)
        & (df_corr["Type"] == movementType)
    ]
    df = df.pivot(index="Participant", columns="Target", values="Correlation")
    df.reset_index(inplace=True)
    df = df.rename_axis(None, axis=1)  # Remove the name of the columns index
    # rename the participant IDs to their numbers
    df["Participant"] = df["Participant"].map(ptcID).fillna(df["Participant"])
    df.sort_values(by="Participant", inplace=True)

    # do a fisher transformation on the correlations
    transformed_corr = np.arctanh(df.loc[:, targets])

    mean_transformed_corr = transformed_corr.mean(axis=1)
    # apply the inverse fisher transformation to get back to correlation space

    # Calculate mean across all movements
    # df['Mean Correlation'] = df.loc[:, targets].mean(axis=1)
    df["Mean Correlation"] = np.tanh(mean_transformed_corr)

    # add an extra row that is the mean across all participants
    mean_row = df.loc[df["Participant"].str.startswith("A"), targets].mean()
    mean_transform = (
        transformed_corr.loc[df["Participant"].str.startswith("A")].mean().mean()
    )
    mean_row["Participant"] = "Amputees"
    # mean_row['Mean Correlation'] = mean_row.loc[targets].mean()
    mean_row["Mean Correlation"] = np.tanh(mean_transform)

    mean_row_ = df.loc[df["Participant"].str.startswith("P"), targets].mean()
    mean_transform_ = (
        transformed_corr.loc[df["Participant"].str.startswith("P")].mean().mean()
    )
    mean_row_["Participant"] = "Intact"
    # mean_row_['Mean Correlation'] = mean_row_.loc[targets].mean()
    mean_row_["Mean Correlation"] = np.tanh(mean_transform_)

    df = pd.concat(
        [df, mean_row.to_frame().T, mean_row_.to_frame().T], ignore_index=True
    )

    df.to_csv(
        f"correlation_results_{perturb}_{condition}_{movementType}.csv", index=False
    )

    captions = {
        (
            True,
            "before",
        ): f"{movementType} Movements Correlation Results (With Perturbations)",
        (
            True,
            "after",
        ): f"{movementType} Movements Correlation Results (After Learning Perturbations)",
        (
            False,
            "before",
        ): f"{movementType} Movements Correlation Results (After Initial Training)",
        (
            False,
            "after",
        ): f"{movementType} Movements Correlation Results (After Online Training)",
    }

    label = f"tab:corr_{movementType}_{perturb}_{condition}"

    # Create a string representation of the dataframe
    thisString = df_to_latex(df, captions[(perturb, condition)], label)
    corr_str += thisString + "\n\n"

In [None]:
print(corr_str)

In [43]:
# we also want to perform some statistical tests to compare the MSE between conditions
from scipy.stats import wilcoxon


# we want to perfrom these on the means between different conditions
def perform_wilcoxon_test(df1, df2):
    # Perform Wilcoxon signed-rank test
    stat, p_value = wilcoxon(df1, df2)
    # rank_biserial_effect_size = stat / (len(df1) * (len(df1) + 1) / 2)  # Calculate rank-biserial effect size
    diffs = df2 - df1
    ranks = abs(diffs).rank()
    W_pos = ranks[diffs > 0].sum()
    W_neg = ranks[diffs < 0].sum()
    n = len(diffs)
    r_rb = (W_pos - W_neg) / (n * (n + 1) / 2)
    return stat, p_value, r_rb


# Perform all tests and collect output
results = []

# we want to make the comparisons between:
# 1) After Initial Training to After Online Training
# 2) With Perturbation to After Learning Perturbations
# 3) After Initial Training to With Perturbation
# 4) After Initial Training to After Learning Perturbations
# for each of the three movement types (Combined, Known, New)

df_AIT = pd.read_csv("mse_results_False_before.csv")
df_AOT = pd.read_csv("mse_results_False_after.csv")
df_WP = pd.read_csv("mse_results_True_before.csv")
df_ALP = pd.read_csv("mse_results_True_after.csv")

for movement_type in [
    "Mean Combined Movements",
    "Mean Known Movements",
    "Mean New Movements",
]:
    type = movement_type.replace("Mean ", "").replace(" Movements", "")
    # Compare After Initial Training to After Online Training
    stat, p_value, effect_size = perform_wilcoxon_test(
        df_AIT[movement_type], df_AOT[movement_type]
    )
    results.append(
        {
            "Condition": "Online Training",
            "Test Set": type,
            "Statistic": stat,
            "p-value": p_value,
            "Effect Size": effect_size,
        }
    )

    # Compare With Perturbation to After Learning Perturbations
    stat, p_value, effect_size = perform_wilcoxon_test(
        df_WP[movement_type], df_ALP[movement_type]
    )
    results.append(
        {
            "Condition": "Learning Perturbations",
            "Test Set": type,
            "Statistic": stat,
            "p-value": p_value,
            "Effect Size": effect_size,
        }
    )

    # Compare After Initial Training to After Learning Perturbations
    stat, p_value, effect_size = perform_wilcoxon_test(
        df_AIT[movement_type], df_ALP[movement_type]
    )
    results.append(
        {
            "Condition": "Pre-Perturbation Baseline",
            "Test Set": type,
            "Statistic": stat,
            "p-value": p_value,
            "Effect Size": effect_size,
        }
    )

    # Compare After Initial Training to With Perturbation
    stat, p_value, effect_size = perform_wilcoxon_test(
        df_AIT[movement_type], df_WP[movement_type]
    )
    results.append(
        {
            "Condition": "Perturbation Breakdowm",
            "Test Set": type,
            "Statistic": stat,
            "p-value": p_value,
            "Effect Size": effect_size,
        }
    )


# Create a DataFrame from the results
df_results = pd.DataFrame(results)
# Save the results to a CSV file
df_results.to_csv("wilcoxon_results.csv", index=False)

# reorder the table to group the conditions
df_results["Condition"] = pd.Categorical(
    df_results["Condition"],
    categories=[
        "Online Training",
        "Learning Perturbations",
        "Pre-Perturbation Baseline",
        "Perturbation Breakdowm",
    ],
    ordered=True,
)
df_results.sort_values(by=["Condition", "Test Set"], inplace=True)
print(df_results)
df_results.to_csv("wilcoxon_results.csv", index=False)

latex_results = df_results.to_latex(
    index=False,
    caption="Wilcoxon Signed-Rank Test Results for MSE Comparisons",
    label="tab:wilcoxon_results",
    column_format="|l l c c c|",
    escape=False,
    float_format="%.3f",
)
# Print the results DataFrame
print(df_results)

                    Condition  Test Set  Statistic   p-value  Effect Size
0             Online Training  Combined        0.0  0.001953    -1.000000
4             Online Training     Known       23.0  0.695312     0.163636
8             Online Training       New        0.0  0.001953    -1.000000
1      Learning Perturbations  Combined        0.0  0.001953    -1.000000
5      Learning Perturbations     Known        0.0  0.001953    -1.000000
9      Learning Perturbations       New        0.0  0.001953    -1.000000
2   Pre-Perturbation Baseline  Combined        9.0  0.064453    -0.672727
6   Pre-Perturbation Baseline     Known        4.0  0.013672     0.854545
10  Pre-Perturbation Baseline       New        0.0  0.001953    -1.000000
3      Perturbation Breakdowm  Combined        0.0  0.001953     1.000000
7      Perturbation Breakdowm     Known        0.0  0.001953     1.000000
11     Perturbation Breakdowm       New        4.0  0.013672     0.854545
                    Condition  Test Se