## Question 1: Synergy Similarity between Phases 

### Best Matching Pairs with Hungarian Algorithm

In [None]:
"""
Cosine Similarity and Pearson Correlation between 
Reach & Grasp (Phase 1) and Lift & Hold (Phase 2)
with Best Matching (Hungarian Algorithm) and One-Sample T-Tests
Creates a single annotated bar plot of synergy pairs.
"""
    
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, ttest_1samp
from scipy.optimize import linear_sum_assignment
import seaborn as sns

# ------------------------------------------------------------------------------------------
# 0) Configuration
# ------------------------------------------------------------------------------------------
BASE_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Experimental Data"
PARTICIPANTS = [1, 2, 3, 4, 5, 6, 7, 8]
PHASES = ["phase1", "phase2"]
OUTPUT_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q1 Synergy Similarity Between Phases\Best Matching Hungarian"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ------------------------------------------------------------------------------------------
# 1) Helper Functions
# ------------------------------------------------------------------------------------------
def cosine_similarity(vec_a, vec_b):
    """Compute cosine similarity between two vectors."""
    denom = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
    return np.dot(vec_a, vec_b) / denom if denom > 1e-12 else np.nan

def load_synergy_factors(participant_dir, trial_idx, phase_name):
    """
    Load synergy matrix W for a given participant, trial, and phase.
    """
    synergy_dir = os.path.join(participant_dir, "Extracted Synergies")
    prefix = f"trial_{trial_idx:02d}_{phase_name}"
    w_file = os.path.join(synergy_dir, prefix + "_W.npy")
    
    if not os.path.exists(w_file):
        return None
    W = np.load(w_file)
    return W

def match_synergies_best(W1, W2):
    """
    Match synergies from W1 to W2 using the Hungarian algorithm to maximize cosine similarity.
    Returns a list of (i1, i2) index pairs.
    """
    n_s1, n_s2 = W1.shape[1], W2.shape[1]
    cost_matrix = np.zeros((n_s1, n_s2))
    for i in range(n_s1):
        for j in range(n_s2):
            sim = cosine_similarity(W1[:, i], W2[:, j])
            cost_matrix[i, j] = -sim if not np.isnan(sim) else 0
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    return list(zip(row_ind, col_ind))

def sig_label(p):
    """
    Return significance stars based on p-value.
    """
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    else:
        return 'ns'

# ------------------------------------------------------------------------------------------
# 2) Main Analysis Function
# ------------------------------------------------------------------------------------------
def synergy_similarity_analysis():
    similarity_rows = []

    for pid in PARTICIPANTS:
        participant_str = f"P({pid})"
        participant_dir = os.path.join(BASE_DIR, participant_str)
        if not os.path.isdir(participant_dir):
            print(f"[WARN] Missing folder for {participant_str}. Skipping.")
            continue
        
        for trial_idx in range(1, 25):
            # Load Phase 1 & Phase 2
            W1 = load_synergy_factors(participant_dir, trial_idx, PHASES[0])
            W2 = load_synergy_factors(participant_dir, trial_idx, PHASES[1])
            if W1 is None or W2 is None:
                print(f"[INFO] Missing synergy data for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
            if W1.shape[1] != W2.shape[1]:
                print(f"[WARN] Mismatch in number of synergies for Trial {trial_idx} of {participant_str}. Skipping.")
                continue

            # Best matching
            pairs = match_synergies_best(W1, W2)
            for pair_num, (i1, i2) in enumerate(pairs, start=1):
                vec1, vec2 = W1[:, i1], W2[:, i2]
                pear, _ = pearsonr(vec1, vec2)
                cosi = cosine_similarity(vec1, vec2)
                similarity_rows.append({
                    'synergy_pair': pair_num,
                    'pearson_corr': pear,
                    'cosine_sim': cosi
                })

    # Create DataFrame
    synergy_df = pd.DataFrame(similarity_rows)
    csv_raw = os.path.join(OUTPUT_DIR, "best_matching_synergy_raw.csv")
    synergy_df.to_csv(csv_raw, index=False)
    print(f"[INFO] Saved raw synergy data to {csv_raw}")

    # Summary Statistics
    summary_df = synergy_df.groupby('synergy_pair').agg(
        avg_pearson=('pearson_corr', 'mean'),
        std_pearson=('pearson_corr', 'std'),
        avg_cosine=('cosine_sim', 'mean'),
        std_cosine=('cosine_sim', 'std')
    ).reset_index()

    # Round to three decimal places
    summary_df = summary_df.round({
        'avg_pearson': 3,
        'std_pearson': 3,
        'avg_cosine': 3,
        'std_cosine': 3
    })

    csv_summary = os.path.join(OUTPUT_DIR, "best_matching_synergy_summary.csv")
    summary_df.to_csv(csv_summary, index=False)
    print(f"[INFO] Saved synergy summary to {csv_summary}")

    # One-Sample T-Tests
    ttest_rows = []
    for s_pair in summary_df['synergy_pair']:
        sub_df = synergy_df[synergy_df['synergy_pair'] == s_pair]
        pear_vals = sub_df['pearson_corr'].dropna()
        cos_vals = sub_df['cosine_sim'].dropna()
        
        # Pearson
        t_p, p_two = ttest_1samp(pear_vals, 0, nan_policy='omit')
        p_pear = p_two / 2 if t_p > 0 else 1 - p_two / 2
        
        # Cosine
        t_c, c_two = ttest_1samp(cos_vals, 0, nan_policy='omit')
        p_cos = c_two / 2 if t_c > 0 else 1 - c_two / 2
        
        ttest_rows.append({
            'synergy_pair': s_pair,
            't_pear': t_p,
            'p_pear': p_pear,
            't_cos': t_c,
            'p_cos': p_cos
        })
    
    ttest_df = pd.DataFrame(ttest_rows)

    # Round to three decimal places
    ttest_df = ttest_df.round({
        't_pear': 3,
        'p_pear': 3,
        't_cos': 3,
        'p_cos': 3
    })

    csv_ttest = os.path.join(OUTPUT_DIR, "best_matching_synergy_ttest.csv")
    ttest_df.to_csv(csv_ttest, index=False)
    print(f"[INFO] Saved synergy T-test results to {csv_ttest}")

    # Merge for Plotting
    merged = summary_df.merge(ttest_df, on='synergy_pair')
    merged['pearson_sig'] = merged['p_pear'].apply(sig_label)
    merged['cosine_sig'] = merged['p_cos'].apply(sig_label)

    # Plotting
    plt.figure(figsize=(14, 8))
    sns.set(style="whitegrid")
    bar_width = 0.35
    x = np.arange(len(merged))

    # Bars for Pearson Correlation
    plt.bar(
        x - bar_width/2, 
        merged['avg_pearson'], 
        bar_width, 
        yerr=merged['std_pearson'], 
        capsize=5, 
        label='Pearson Correlation', 
        color='skyblue'
    )

    # Bars for Cosine Similarity
    plt.bar(
        x + bar_width/2,
        merged['avg_cosine'],
        bar_width,
        yerr=merged['std_cosine'],
        capsize=5,
        label='Cosine Similarity',
        color='lightgreen'
    )

    # Annotate significance stars
    for idx, row in merged.iterrows():
        # Pearson
        plt.text(
            x[idx] - bar_width/2, 
            row['avg_pearson'] + row['std_pearson'] + 0.05, 
            row['pearson_sig'], 
            ha='center', 
            va='bottom',
            fontsize=14,
            color='black'
        )
        # Cosine
        plt.text(
            x[idx] + bar_width/2, 
            row['avg_cosine'] + row['std_cosine'] + 0.05, 
            row['cosine_sig'], 
            ha='center', 
            va='bottom',
            fontsize=14,
            color='black'
        )

    # Labels and Title
    plt.xlabel('Synergy Pair', fontsize=12)
    plt.ylabel('Similarity Metrics', fontsize=12)
    plt.title('Average Pearson Correlation and Cosine Similarity per Synergy Pair\n(Phase 1 vs Phase 2)', fontsize=16)
    plt.xticks(x, [f"Pair {int(s)}" for s in merged['synergy_pair']], fontsize=12)
    
    # Calculate maximum y-value for ylim
    max_p = (merged['avg_pearson'] + merged['std_pearson']).max()
    max_c = (merged['avg_cosine'] + merged['std_cosine']).max()
    max_value = max(max_p, max_c) + 0.2
    plt.ylim(0, max_value)  # Start y-axis at 0

    plt.legend()
    plt.tight_layout()

    # Save Plot
    plot_path = os.path.join(OUTPUT_DIR, "annotated_similarity_synergy_pairs.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"[INFO] Saved annotated bar plot to {plot_path}")

    # Save Summary Table as PNG
    fig, ax = plt.subplots(figsize=(14, 2 + 0.5 * len(merged)))
    ax.axis('off')

    # Prepare table data
    table_df = merged.copy()
    table_df.rename(columns={
        'synergy_pair': 'Synergy Pair',
        'avg_pearson': 'Avg Pearson Corr',
        'std_pearson': 'Std Pearson Corr',
        't_pear': 'Pearson t',
        'p_pear': 'Pearson p (1-sided)',
        'pearson_sig': 'Pearson Sig',
        'avg_cosine': 'Avg Cosine Sim',
        'std_cosine': 'Std Cosine Sim',
        't_cos': 'Cosine t',
        'p_cos': 'Cosine p (1-sided)',
        'cosine_sig': 'Cosine Sig'
    }, inplace=True)

    # Select and order columns
    table_display = ax.table(
        cellText=table_df[['Synergy Pair', 'Avg Pearson Corr', 'Std Pearson Corr',
                           'Pearson t', 'Pearson p (1-sided)', 'Pearson Sig',
                           'Avg Cosine Sim', 'Std Cosine Sim',
                           'Cosine t', 'Cosine p (1-sided)', 'Cosine Sig']].values,
        colLabels=['Synergy Pair', 'Avg Pearson Corr', 'Std Pearson Corr',
                   'Pearson t', 'Pearson p (1-sided)', 'Pearson Sig',
                   'Avg Cosine Sim', 'Std Cosine Sim',
                   'Cosine t', 'Cosine p (1-sided)', 'Cosine Sig'],
        cellLoc='center',
        loc='center'
    )

    # Customize table appearance
    table_display.auto_set_font_size(False)
    table_display.set_fontsize(10)
    table_display.scale(1.2, 1.2)

    # Add title
    plt.title("Synergy Pair Similarity: One-Sample T-tests vs Zero", fontsize=14, pad=20)
    plt.tight_layout()

    # Save Table as PNG
    table_png = os.path.join(OUTPUT_DIR, "summary_table_synergy_pairs.png")
    plt.savefig(table_png, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"[INFO] Saved summary table as PNG to {table_png}")

# ------------------------------------------------------------------------------------------
# 3) Execute Analysis
# ------------------------------------------------------------------------------------------
def main():
    synergy_similarity_analysis()

if __name__ == "__main__":
    main()


### Best Matching Pairs with Greedy Algorithm

In [17]:
"""
Cosine Similarity and Pearson Correlation between 
Reach & Grasp (Phase 1) and Lift & Hold (Phase 2)
using Greedy Best Matching and a One-Sample T-Test vs. 0.7

We keep all synergy pairs (no threshold) and test:
H0: mean correlation <= 0.7  vs  H1: mean correlation > 0.7
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, ttest_1samp
import seaborn as sns

# ------------------------------------------------------------------------------------------
# 0) Configuration
# ------------------------------------------------------------------------------------------
BASE_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Experimental Data"
PARTICIPANTS = [1, 2, 3, 4, 5, 6, 7, 8]
PHASES = ["phase1", "phase2"]

OUTPUT_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q1 Synergy Similarity Between Phases\Best Matching Greedy"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ------------------------------------------------------------------------------------------
# 1) Helper Functions
# ------------------------------------------------------------------------------------------
def cosine_similarity(vec_a, vec_b):
    """Compute cosine similarity between two vectors."""
    denom = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
    return np.dot(vec_a, vec_b) / denom if denom > 1e-12 else np.nan

def load_synergy_factors(participant_dir, trial_idx, phase_name):
    """
    Load synergy matrix W for a given participant, trial, and phase.
    """
    synergy_dir = os.path.join(participant_dir, "Extracted Synergies")
    prefix = f"trial_{trial_idx:02d}_{phase_name}"
    w_file = os.path.join(synergy_dir, prefix + "_W.npy")
    
    if not os.path.exists(w_file):
        return None
    W = np.load(w_file)
    return W

def match_synergies_greedy(W1, W2):
    """
    Greedy matching of synergies to maximize cosine similarity.
    Iteratively picks the highest similarity pair, then removes
    those indices from further consideration.
    Returns a list of (i1, i2) index pairs.
    """
    n_s1 = W1.shape[1]
    n_s2 = W2.shape[1]
    
    unmatched1 = list(range(n_s1))
    unmatched2 = list(range(n_s2))
    matched_pairs = []
    
    while unmatched1 and unmatched2:
        best_sim = -np.inf
        best_pair = (None, None)
        
        # Find highest-similarity pair among unmatched synergies
        for i in unmatched1:
            for j in unmatched2:
                sim = cosine_similarity(W1[:, i], W2[:, j])
                if not np.isnan(sim) and sim > best_sim:
                    best_sim = sim
                    best_pair = (i, j)
        
        # If we cannot find a valid match, break
        if best_pair == (None, None):
            break

        # Record this match, remove from unmatched lists
        matched_pairs.append(best_pair)
        unmatched1.remove(best_pair[0])
        unmatched2.remove(best_pair[1])
    
    return matched_pairs

def sig_label(p_one_sided):
    """
    Return significance stars based on a one-sided p-value.
    """
    if p_one_sided < 0.001:
        return '***'
    elif p_one_sided < 0.01:
        return '**'
    elif p_one_sided < 0.05:
        return '*'
    else:
        return 'ns'

# ------------------------------------------------------------------------------------------
# 2) Main Analysis Function
# ------------------------------------------------------------------------------------------
def synergy_similarity_analysis():
    similarity_rows = []

    for pid in PARTICIPANTS:
        participant_str = f"P({pid})"
        participant_dir = os.path.join(BASE_DIR, participant_str)
        if not os.path.isdir(participant_dir):
            print(f"[WARN] Missing folder for {participant_str}. Skipping.")
            continue
        
        for trial_idx in range(1, 25):
            # Load Phase 1 & Phase 2
            W1 = load_synergy_factors(participant_dir, trial_idx, PHASES[0])
            W2 = load_synergy_factors(participant_dir, trial_idx, PHASES[1])
            if W1 is None or W2 is None:
                print(f"[INFO] Missing synergy data for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
            if W1.shape[1] != W2.shape[1]:
                print(f"[WARN] Mismatch in number of synergies for Trial {trial_idx} of {participant_str}. Skipping.")
                continue

            # Best matching using Greedy approach
            pairs = match_synergies_greedy(W1, W2)
            for pair_num, (i1, i2) in enumerate(pairs, start=1):
                vec1, vec2 = W1[:, i1], W2[:, i2]
                
                pear, _ = pearsonr(vec1, vec2)
                cosi = cosine_similarity(vec1, vec2)
                
                # Keep all pairs
                similarity_rows.append({
                    'synergy_pair': pair_num,
                    'pearson_corr': pear,
                    'cosine_sim': cosi
                })

    # Create DataFrame of all synergy pairs
    synergy_df = pd.DataFrame(similarity_rows)
    csv_raw = os.path.join(OUTPUT_DIR, "best_matching_synergy_raw.csv")
    synergy_df.to_csv(csv_raw, index=False)
    print(f"[INFO] Saved raw synergy data to {csv_raw}")

    if synergy_df.empty:
        print("[INFO] No synergy data found at all. Stopping here.")
        return

    # Summary Statistics
    summary_df = synergy_df.groupby('synergy_pair').agg(
        avg_pearson=('pearson_corr', 'mean'),
        std_pearson=('pearson_corr', 'std'),
        avg_cosine=('cosine_sim', 'mean'),
        std_cosine=('cosine_sim', 'std')
    ).reset_index()

    # Round to three decimal places
    summary_df = summary_df.round({
        'avg_pearson': 3,
        'std_pearson': 3,
        'avg_cosine': 3,
        'std_cosine': 3
    })

    csv_summary = os.path.join(OUTPUT_DIR, "best_matching_synergy_summary.csv")
    summary_df.to_csv(csv_summary, index=False)
    print(f"[INFO] Saved synergy summary to {csv_summary}")

    # ----------------------------------------------------------------------
    # One-Sample T-Tests (Against 0.7, one-sided)
    # H0: mean correlation <= 0.7
    # H1: mean correlation > 0.7
    # ----------------------------------------------------------------------
    ttest_rows = []
    for s_pair in summary_df['synergy_pair']:
        sub_df = synergy_df[synergy_df['synergy_pair'] == s_pair]
        pear_vals = sub_df['pearson_corr'].dropna()
        cos_vals = sub_df['cosine_sim'].dropna()
        
        # T-test vs 0.7 for Pearson
        t_p, p_two_pear = ttest_1samp(pear_vals, 0.7, nan_policy='omit')
        # If t > 0, then mean > 0.7 -> p = p_two / 2, else p = 1 - p_two/2
        if t_p > 0:
            p_pear = p_two_pear / 2.0
        else:
            p_pear = 1.0 - (p_two_pear / 2.0)
        
        # T-test vs 0.7 for Cosine
        t_c, p_two_cos = ttest_1samp(cos_vals, 0.7, nan_policy='omit')
        # Same one-sided logic
        if t_c > 0:
            p_cos = p_two_cos / 2.0
        else:
            p_cos = 1.0 - (p_two_cos / 2.0)
        
        ttest_rows.append({
            'synergy_pair': s_pair,
            't_pear': t_p,
            'p_pear': p_pear,
            't_cos': t_c,
            'p_cos': p_cos
        })
    
    ttest_df = pd.DataFrame(ttest_rows)
    # Round the T-test results
    ttest_df = ttest_df.round({
        't_pear': 3, 'p_pear': 3, 't_cos': 3, 'p_cos': 3
    })

    csv_ttest = os.path.join(OUTPUT_DIR, "best_matching_synergy_ttest.csv")
    ttest_df.to_csv(csv_ttest, index=False)
    print(f"[INFO] Saved synergy T-test (vs 0.7) results to {csv_ttest}")

    # Merge results for Plotting
    merged = summary_df.merge(ttest_df, on='synergy_pair')
    merged['pearson_sig'] = merged['p_pear'].apply(sig_label)
    merged['cosine_sig'] = merged['p_cos'].apply(sig_label)

    # ---------------------------------------------------------------------------------------
    # Plotting
    # ---------------------------------------------------------------------------------------
    plt.figure(figsize=(14, 8))
    sns.set(style="whitegrid")
    bar_width = 0.35
    x = np.arange(len(merged))
    ax = plt.gca()

    # Bars for Pearson Correlation
    plt.bar(
        x - bar_width/2, 
        merged['avg_pearson'], 
        bar_width, 
        yerr=merged['std_pearson'], 
        capsize=5, 
        label='Pearson Correlation', 
        color='skyblue'
    )

    # Bars for Cosine Similarity
    plt.bar(
        x + bar_width/2,
        merged['avg_cosine'],
        bar_width,
        yerr=merged['std_cosine'],
        capsize=5,
        label='Cosine Similarity',
        color='lightgreen'
    )

    # Annotate significance stars
    for idx, row in merged.iterrows():
        # Pearson
        plt.text(
            x[idx] - bar_width/2, 
            row['avg_pearson'] + row['std_pearson'] + 0.02, 
            row['pearson_sig'], 
            ha='center', 
            va='bottom',
            fontsize=21,
            color='black'
        )
        # Cosine
        plt.text(
            x[idx] + bar_width/2, 
            row['avg_cosine'] + row['std_cosine'] + 0.02, 
            row['cosine_sig'], 
            ha='center', 
            va='bottom',
            fontsize=21,
            color='black'
        )

    # Labels and Title
    plt.xlabel('Synergy Pairs', fontsize=21)
    plt.ylabel('Similarity Metrics', fontsize=21)
    plt.title(
        'Average Pearson Correlation and Cosine Similarity\n'
        'Between Reach & Grasp Phase and Lift & Hold Phase',
        fontsize=22
    )
    plt.xticks(x, [f"Pair {int(s)}" for s in merged['synergy_pair']], fontsize=16)
    plt.yticks(fontsize=16)
    
    # Add annotation about H₀
    plt.text(
        0.01, 0.98,
        "One-sample t-test: H0: mean <= 0.7 vs. H1: mean > 0.7",
        transform=ax.transAxes,
        ha='left',
        va='top',
        fontsize=15,
        bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", alpha=0.5)
    )

    # Calculate maximum y-value for ylim
    max_p = (merged['avg_pearson'] + merged['std_pearson']).max()
    max_c = (merged['avg_cosine'] + merged['std_cosine']).max()
    max_value = max(max_p, max_c) + 0.2
    plt.ylim(0, max_value)

    plt.legend(loc='upper right', fontsize=17)
    plt.tight_layout()

    # Save Plot
    plot_path = os.path.join(OUTPUT_DIR, "annotated_similarity_synergy_pairs.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"[INFO] Saved annotated bar plot to {plot_path}")

    # ---------------------------------------------------------------------------------------
    # Save Summary Table as PNG
    # ---------------------------------------------------------------------------------------
    fig, ax = plt.subplots(figsize=(14, 2 + 0.5 * len(merged)))
    ax.axis('off')

    # Prepare table data
    table_df = merged.copy()
    table_df.rename(columns={
        'synergy_pair': 'Synergy Pair',
        'avg_pearson': 'Avg Pearson Corr',
        'std_pearson': 'Std Pearson Corr',
        't_pear': 'Pearson t',
        'p_pear': 'Pearson p (1-sided)',
        'pearson_sig': 'Pearson Sig',
        'avg_cosine': 'Avg Cosine Sim',
        'std_cosine': 'Std Cosine Sim',
        't_cos': 'Cosine t',
        'p_cos': 'Cosine p (1-sided)',
        'cosine_sig': 'Cosine Sig'
    }, inplace=True)

    # Select and order columns
    table_display = ax.table(
        cellText=table_df[[
            'Synergy Pair', 'Avg Pearson Corr', 'Std Pearson Corr',
            'Pearson t', 'Pearson p (1-sided)', 'Pearson Sig',
            'Avg Cosine Sim', 'Std Cosine Sim',
            'Cosine t', 'Cosine p (1-sided)', 'Cosine Sig'
        ]].values,
        colLabels=[
            'Synergy Pair', 'Avg Pearson Corr', 'Std Pearson Corr',
            'Pearson t', 'Pearson p (1-sided)', 'Pearson Sig',
            'Avg Cosine Sim', 'Std Cosine Sim',
            'Cosine t', 'Cosine p (1-sided)', 'Cosine Sig'
        ],
        cellLoc='center',
        loc='center'
    )

    # Customize table appearance
    table_display.auto_set_font_size(False)
    table_display.set_fontsize(10)
    table_display.scale(1.2, 1.2)

    # Add title
    plt.title(
        "Synergy Pair Similarity (Greedy): One-Sample T-tests vs 0.7\n"
        "H₀: mean <= 0.7, H₁: mean > 0.7",
        fontsize=14, pad=20
    )
    plt.tight_layout()

    # Save Table as PNG
    table_png = os.path.join(OUTPUT_DIR, "summary_table_synergy_pairs.png")
    plt.savefig(table_png, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"[INFO] Saved summary table as PNG to {table_png}")

# ------------------------------------------------------------------------------------------
# 3) Execute Analysis
# ------------------------------------------------------------------------------------------
def main():
    synergy_similarity_analysis()

if __name__ == "__main__":
    main()


[INFO] Missing synergy data for Trial 20 of P(7). Skipping.
[INFO] Missing synergy data for Trial 22 of P(7). Skipping.
[INFO] Saved raw synergy data to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q1 Synergy Similarity Between Phases\Best Matching Greedy\best_matching_synergy_raw.csv
[INFO] Saved synergy summary to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q1 Synergy Similarity Between Phases\Best Matching Greedy\best_matching_synergy_summary.csv
[INFO] Saved synergy T-test (vs 0.7) results to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q1 Synergy Similarity Between Phases\Best Matching Greedy\best_matching_synergy_ttest.csv
[INFO] Saved annotated bar plot to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q1 Synergy Similarity Between Phases\Best Matching Greedy\annotated_similarity_synergy_pairs.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(table_png, bbox_inches='tight', dpi=300)
  plt.savefig(table_png, bbox_inches='tight', dpi=300)


[INFO] Saved summary table as PNG to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q1 Synergy Similarity Between Phases\Best Matching Greedy\summary_table_synergy_pairs.png


## Question 2: Synergy Similarity between Phases by Known and Unknown Conditions

### Best Matching Pairs with Hungarian Algorithm

In [None]:
"""
Impact of Preplanning and Task Knowledge on Extracted Synergies
Compare "Unknown Weight Distribution" vs "Known Weight Distribution" conditions
Applies Best Matching (Hungarian Algorithm) before calculating Pearson Correlation
Excludes specified trials for Participants 7 and 8
Creates annotated bar plots and summary tables with rounded numerical values.
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, ttest_rel
from scipy.optimize import linear_sum_assignment
import seaborn as sns

# ------------------------------------------------------------------------------------------
# 0) Global Configuration
# ------------------------------------------------------------------------------------------
BASE_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Experimental Data"
PARTICIPANTS = [1, 2, 3, 4, 5, 6, 7, 8]
PHASES = ["phase1", "phase2"]  # "Reach & Grasp" vs. "Lift & Hold"

OUTPUT_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q2 Synergy Similarity By Knowledge Condition"
os.makedirs(OUTPUT_DIR, exist_ok=True)  # Create the directory if it doesn't exist

# ------------------------------------------------------------------------------------------
# 1) Helper Functions
# ------------------------------------------------------------------------------------------
def trial_info(trial_number):
    """
    Returns metadata about each trial, including whether the weight distribution
    is Known or Unknown (field 'knowledge' == 'Yes' or 'No').
    """
    protocol = {
        1:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "No"),
        2:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "Yes"),
        3:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "No"),
        4:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "Yes"),
        5:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "No"),
        6:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "Yes"),
        7:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "No"),
        8:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        9:  ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "No"),
        10: ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "Yes"),
        11: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "No"),
        12: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "Yes"),
        13: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "No"),
        14: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        15: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "No"),
        16: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "Yes"),
        17: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "No"),
        18: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "Yes"),
        19: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "No"),
        20: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "Yes"),
        21: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "No"),
        22: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "Yes"),
        23: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "No"),
        24: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "Yes"),
    }
    if trial_number not in protocol:
        return None
    tup = protocol[trial_number]
    return {
        'grasp_type':   tup[0],
        'handle_type':  tup[1],
        'weight_kg':    tup[2],
        'lever_side':   tup[3],
        'knowledge':    tup[4],  # 'Yes' => Known, 'No' => Unknown
    }

def cosine_similarity(vec_a, vec_b):
    """
    Computes the cosine similarity between two 1D vectors.
    Returns NaN if the denominator is too small.
    """
    denom = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
    if denom < 1e-12:
        return np.nan
    return np.dot(vec_a, vec_b) / denom

def load_synergy_factors(participant_dir, trial_idx, phase_name):
    """
    Loads the synergy factor (W) and synergy activation (H) arrays for a given
    participant, trial, and phase from the "Extracted Synergies" folder.
    
    Returns:
        W (numpy.ndarray): Synergy matrix (n_features, n_synergies)
        H (numpy.ndarray): Synergy activation matrix (n_synergies, n_samples)
        or (None, None) if files do not exist.
    """
    synergy_dir = os.path.join(participant_dir, "Extracted Synergies")
    prefix = f"trial_{trial_idx:02d}_{phase_name}"
    w_file = os.path.join(synergy_dir, prefix + "_W.npy")
    h_file = os.path.join(synergy_dir, prefix + "_H.npy")
    if not (os.path.exists(w_file) and os.path.exists(h_file)):
        return None, None
    W = np.load(w_file)
    H = np.load(h_file)
    return W, H

def match_synergies_best(W1, W2):
    """
    Uses the Hungarian algorithm to find synergy pairs that maximize total cosine similarity.
    Returns a list of (i1, i2) index pairs for synergies in W1 and W2.
    """
    n_s1 = W1.shape[1]
    n_s2 = W2.shape[1]
    if n_s1 != n_s2:
        print("[WARN] Number of synergies in Phase 1 and Phase 2 do not match. Skipping matching.")
        return []
    cost_matrix = np.zeros((n_s1, n_s2))
    for i in range(n_s1):
        for j in range(n_s2):
            sim = cosine_similarity(W1[:, i], W2[:, j])
            cost_matrix[i, j] = -sim if not np.isnan(sim) else 0
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    return list(zip(row_ind, col_ind))

def sig_label(p):
    """
    Return significance stars based on p-value.
    """
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    else:
        return 'ns'

# ------------------------------------------------------------------------------------------
# 2) Main Analysis Function
# ------------------------------------------------------------------------------------------
def synergy_similarity_analysis_repeated():
    """
    Performs a repeated-measures (within-subjects) analysis comparing synergy similarities
    between "Known" and "Unknown" weight distribution conditions using best matching.
    
    Steps:
        1. Iterates over participants and trials.
        2. Groups trials by condition (Known vs. Unknown).
        3. Applies best matching between Phase 1 & Phase 2 synergies.
        4. Computes Pearson correlation between matched synergies.
        5. Aggregates data for each participant and synergy.
        6. Performs paired t-tests for each synergy.
        7. Generates and saves visualizations, including annotated plots and summary tables.
    """
    similarity_rows = []
    
    # ---- Gather synergy data from all participants & trials ----
    for pid in PARTICIPANTS:
        participant_str = f"P({pid})"
        participant_dir = os.path.join(BASE_DIR, participant_str)
        if not os.path.isdir(participant_dir):
            print(f"[WARN] Missing folder for {participant_str}. Skipping.")
            continue

        print(f"\n=== Analyzing {participant_str} ===")
    
        for trial_idx in range(1, 25):
            
            if pid == 7 and trial_idx in [5, 6, 7, 8]:
                print(f"[INFO] Skipping Participant {pid}, Trial {trial_idx} as per exclusion criteria.")
                continue

            meta = trial_info(trial_idx)
            if meta is None:
                print(f"[INFO] Trial {trial_idx} has no metadata. Skipping.")
                continue
    
            knowledge_flag = meta['knowledge']  # "Yes" => Known, "No" => Unknown
            condition = "Known" if knowledge_flag == "Yes" else "Unknown"
    
            W_phase1, _ = load_synergy_factors(participant_dir, trial_idx, PHASES[0])
            W_phase2, _ = load_synergy_factors(participant_dir, trial_idx, PHASES[1])
            if W_phase1 is None or W_phase2 is None:
                print(f"[INFO] Missing synergy data for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
            if W_phase1.shape[1] != W_phase2.shape[1]:
                print(f"[WARN] Synergy count mismatch for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
    
            # Best matching
            matched_pairs = match_synergies_best(W_phase1, W_phase2)
            if not matched_pairs:
                print(f"[WARN] No matched synergies for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
    
            for pair_num, (i1, i2) in enumerate(matched_pairs, start=1):
                vec1 = W_phase1[:, i1]
                vec2 = W_phase2[:, i2]
    
                pear, _ = pearsonr(vec1, vec2)
    
                # Save row
                similarity_rows.append({
                    'participant': pid,
                    'trial_idx': trial_idx,
                    'condition': condition,      # 'Known' or 'Unknown'
                    'synergy_idx': pair_num,     # Synergy pair number within the trial
                    'pearson_corr': pear,
                })
    
    # ---- Convert to DataFrame & Save Raw Data ----
    synergy_df = pd.DataFrame(similarity_rows)
    raw_csv = os.path.join(OUTPUT_DIR, "repeated_synergy_data_raw.csv")
    synergy_df.to_csv(raw_csv, index=False)
    print(f"\n[INFO] Saved raw synergy data to {raw_csv}")
    
    # ---- Aggregation: Average per Participant, Condition, Synergy ----
    grouped = synergy_df.groupby(['participant','condition','synergy_idx']).agg(
        pearson_mean=('pearson_corr','mean'),
    ).reset_index()
    agg_csv = os.path.join(OUTPUT_DIR, "repeated_synergy_data_agg.csv")
    grouped.to_csv(agg_csv, index=False)
    print(f"[INFO] Saved aggregated synergy data to {agg_csv}")
    
    # ---- Pivot for Paired T-Tests ----
    # For Pearson
    pivot_pearson = grouped.pivot_table(index=['participant','synergy_idx'],
                                        columns='condition',
                                        values='pearson_mean').reset_index()
    pivot_pearson.columns.name = None  
    
    # ---- Perform Paired T-Tests ----
    ttest_rows = []
    synergy_list = sorted(grouped['synergy_idx'].unique())
    for s_idx in synergy_list:
        pearson_subset = pivot_pearson[pivot_pearson['synergy_idx'] == s_idx]
        known_vals_pearson = pearson_subset['Known'].dropna()
        unknown_vals_pearson = pearson_subset['Unknown'].dropna()
    
        # Ensure matching participants
        if not known_vals_pearson.index.equals(unknown_vals_pearson.index):
            print(f"[WARN] Participant mismatch for synergy {s_idx}. Skipping.")
            continue
    
        # Paired t-test for Pearson correlation
        t_p, p_p = ttest_rel(known_vals_pearson, unknown_vals_pearson, nan_policy='omit')
    
        ttest_rows.append({
            'synergy_idx': s_idx,
            'pearson_t': t_p,
            'pearson_p': p_p,
        })
    
    # ---- Convert T-Test Results to DataFrame ----
    ttest_df = pd.DataFrame(ttest_rows)
    ttest_csv = os.path.join(OUTPUT_DIR, "paired_ttest_synergy.csv")
    ttest_df.to_csv(ttest_csv, index=False)
    print(f"[INFO] Paired t-test results saved to {ttest_csv}\n")
    
    # ---- Print T-Test Results to Console ----
    print("Paired T-Test Results for Each Synergy:")
    print(ttest_df.to_string(index=False))
    
    # ---- Significance Labeling ----
    ttest_df['pearson_sig'] = ttest_df['pearson_p'].apply(sig_label)
    
    enhanced_csv = os.path.join(OUTPUT_DIR, "paired_ttest_synergy_enhanced.csv")
    ttest_df.to_csv(enhanced_csv, index=False)
    print(f"[INFO] Enhanced t-test results with significance labels saved to {enhanced_csv}")
    
    # ------------------------------------------------------------------------------------------
    # 3) Annotated Bar Plots
    # ------------------------------------------------------------------------------------------
    summary_pearson = grouped.groupby(['condition','synergy_idx']).agg(
        avg_pearson=('pearson_mean','mean'),
        std_pearson=('pearson_mean','std')
    ).reset_index()
    
    # Pivot for plotting
    pivot_plot_pearson = summary_pearson.pivot(index='synergy_idx', columns='condition', values='avg_pearson').reset_index()
    pivot_plot_pearson_std = summary_pearson.pivot(index='synergy_idx', columns='condition', values='std_pearson').reset_index()
    
    # Plotting Annotated Bar Plot for Pearson Correlation
    plt.figure(figsize=(12, 8))
    sns.set(style="whitegrid")
    bar_width = 0.35
    indices = np.arange(1, len(synergy_list)+1)  # Synergy indices
    
    # Bars for Known and Unknown
    known_means_pearson = pivot_plot_pearson[pivot_plot_pearson['synergy_idx'].isin(synergy_list)]['Known']
    known_stds_pearson = pivot_plot_pearson_std[pivot_plot_pearson_std['synergy_idx'].isin(synergy_list)]['Known']
    unknown_means_pearson = pivot_plot_pearson[pivot_plot_pearson['synergy_idx'].isin(synergy_list)]['Unknown']
    unknown_stds_pearson = pivot_plot_pearson_std[pivot_plot_pearson_std['synergy_idx'].isin(synergy_list)]['Unknown']
    
    plt.bar(indices - bar_width/2, known_means_pearson, 
            width=bar_width, yerr=known_stds_pearson,
            capsize=5, label='Known', color='skyblue')
    
    plt.bar(indices + bar_width/2, unknown_means_pearson, 
            width=bar_width, yerr=unknown_stds_pearson,
            capsize=5, label='Unknown', color='lightgreen')
    
    # Add significance stars based on t-tests
    for idx, row in ttest_df.iterrows():
        synergy = row['synergy_idx']
        label = row['pearson_sig']
        # Position the stars slightly above the higher bar
        y_max = max(known_means_pearson.iloc[idx] + known_stds_pearson.iloc[idx],
                    unknown_means_pearson.iloc[idx] + unknown_stds_pearson.iloc[idx])
        plt.text(s=label, x=synergy, 
                 y=y_max + 0.02, 
                 ha='center', va='bottom', color='black', fontsize=14)
    
    plt.xlabel('Synergy Pair', fontsize=14)
    plt.ylabel('Average Pearson Correlation', fontsize=14)
    plt.title('Average Pearson Correlation by Condition per Synergy Pair', fontsize=16)
    plt.xticks(indices, [f"Synergy Pair {i}" for i in synergy_list], fontsize=12)
    
    # Calculate maximum y-value for ylim
    max_p = (known_means_pearson + known_stds_pearson).max()
    max_u = (unknown_means_pearson + unknown_stds_pearson).max()
    max_value = max(max_p, max_u) + 0.2
    plt.ylim(0, max_value)  # Start y-axis at 0
    
    # Adjust legend position to lower right
    plt.legend(title='Condition', loc='lower right', fontsize=12)
    
    plt.tight_layout()
    # Save the plot
    annotated_pearson_plot_path = os.path.join(OUTPUT_DIR, "annotated_pearson_correlation_by_condition.png")
    plt.savefig(annotated_pearson_plot_path, dpi=300)
    plt.close()
    print(f"[INFO] Annotated Pearson correlation by condition plot saved to: {annotated_pearson_plot_path}")
    
    # ------------------------------------------------------------------------------------------
    # 4) Boxplots
    # ------------------------------------------------------------------------------------------
    # Boxplot for Pearson Correlation by Condition
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='condition', y='pearson_corr', data=synergy_df, palette='Blues')
    plt.title('Pearson Correlation by Condition', fontsize=14)
    plt.xlabel('Condition', fontsize=12)
    plt.ylabel('Pearson Correlation', fontsize=12)
    plt.tight_layout()
    # Save the boxplot
    pearson_boxplot_condition_path = os.path.join(OUTPUT_DIR, "boxplot_pearson_by_condition.png")
    plt.savefig(pearson_boxplot_condition_path, dpi=300)
    plt.close()
    print(f"[INFO] Boxplot of Pearson correlation saved to {pearson_boxplot_condition_path}")
    
    # ------------------------------------------------------------------------------------------
    # 5) Summary Statistics Table Adjusted as per User Request
    # ------------------------------------------------------------------------------------------
    # ---- Create Summary Statistics ----
    # Group by synergy index and condition to compute mean and std
    summary_stats = grouped.groupby(['synergy_idx', 'condition']).agg(
        Avg_Pearson_Corr=('pearson_mean', 'mean'),
        Std_Pearson_Corr=('pearson_mean', 'std')
    ).reset_index()

    # ---- Pivot the Table ----
    # Pivot to have 'Known' and 'Unknown' conditions side by side for each synergy
    summary_pivot = summary_stats.pivot(index='synergy_idx', columns='condition', values=['Avg_Pearson_Corr', 'Std_Pearson_Corr']).reset_index()

    # Flatten the MultiIndex columns
    summary_pivot.columns = ['Synergy'] + [f"{stat}_{cond}" for stat, cond in summary_pivot.columns[1:]]

    # ---- Merge with T-Test Results ----
    # Select relevant columns from ttest_df
    ttest_df_subset = ttest_df[['synergy_idx', 'pearson_t', 'pearson_p']]

    # Merge with the pivoted summary statistics
    summary_pivot = summary_pivot.merge(ttest_df_subset, left_on='Synergy', right_on='synergy_idx')

    # Drop the redundant 'synergy_idx' column after merge
    summary_pivot = summary_pivot.drop('synergy_idx', axis=1)

    # ---- Round Numerical Values to Three Decimal Places ----
    summary_pivot = summary_pivot.round({
        'Avg_Pearson_Corr_Known': 3,
        'Std_Pearson_Corr_Known': 3,
        'Avg_Pearson_Corr_Unknown': 3,
        'Std_Pearson_Corr_Unknown': 3,
        'pearson_t': 3,
        'pearson_p': 3,
    })

    # ---- Select and Rename Columns for Clarity ----
    summary_pivot = summary_pivot[['Synergy',
                                   'Avg_Pearson_Corr_Known', 'Avg_Pearson_Corr_Unknown',
                                   'pearson_t', 'pearson_p']]

    summary_pivot.rename(columns={
        'Avg_Pearson_Corr_Known': 'Pearson Known',
        'Avg_Pearson_Corr_Unknown': 'Pearson Unknown',
        'pearson_t': 'Pearson t',
        'pearson_p': 'Pearson p',
    }, inplace=True)

    # ---- Create and Save the Table as PNG ----
    # Initialize a matplotlib figure
    fig, ax = plt.subplots(figsize=(14, 1 + 0.5 * len(summary_pivot)))  # Adjust height based on number of synergies
    ax.axis('off')  # Hide the axes

    # Create the table
    table = ax.table(cellText=summary_pivot.values,
                     colLabels=summary_pivot.columns,
                     cellLoc='center',
                     loc='center')

    # Customize table appearance
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    table.scale(1.2, 1.2)  # Adjust as needed for better fit

    # Add a title
    plt.title('Synergy Correlation Results by Condition', fontsize=16, pad=20)

    # Adjust layout to ensure headers fit
    plt.tight_layout()

    # Define the path to save the table PNG
    summary_table_png = os.path.join(OUTPUT_DIR, "synergy_correlation_results_table.png")

    # Save the table as a PNG image
    plt.savefig(summary_table_png, bbox_inches='tight', dpi=300)

    # Close the plot to free memory
    plt.close()

    print(f"[INFO] Synergy correlation results table saved as PNG to: {summary_table_png}")

    # ------------------------------------------------------------------------------------------
    # 6) Annotated Summary Statistics Table with Significance
    # ------------------------------------------------------------------------------------------
    ttest_df_pearson = ttest_df[['synergy_idx', 'pearson_sig']].rename(columns={'pearson_sig': 'Pearson_Sig'})
    
    summary_stats = summary_stats.merge(ttest_df_pearson, on='synergy_idx')

    # ---- Pivot again with significance ----
    summary_pivot_sig = summary_stats.pivot(index='synergy_idx', columns='condition', values=['Avg_Pearson_Corr', 'Std_Pearson_Corr',
                                                                                               'Pearson_Sig']).reset_index()
    # Flatten the MultiIndex columns
    summary_pivot_sig.columns = ['Synergy'] + [f"{stat}_{cond}" for stat, cond in summary_pivot_sig.columns[1:-1]] + ['Pearson_Sig']

    # Sort by Synergy index
    summary_pivot_sig.sort_values('Synergy', inplace=True)

    # Create a figure for the annotated table
    fig, ax = plt.subplots(figsize=(18, 2 + 0.6 * len(summary_pivot_sig)))  # Adjust height based on number of synergies
    ax.axis('off')  # Hide the axes

    # Prepare table data with significance annotations
    table_data = summary_pivot_sig.values
    column_labels = summary_pivot_sig.columns.tolist()

    # Create the table
    table = ax.table(
        cellText=table_data,
        colLabels=column_labels,
        cellLoc='center',
        loc='center'
    )

    # Customize table appearance
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    table.scale(1.2, 1.2)

    # Add title
    plt.title("Summary Statistics of Synergy Similarities by Condition with Significance", fontsize=16, pad=20)

    # Adjust layout to ensure headers fit
    plt.tight_layout()

    # Save the annotated table as PNG
    summary_table_sig_png = os.path.join(OUTPUT_DIR, "summary_statistics_synergy_with_significance.png")
    plt.savefig(summary_table_sig_png, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"[INFO] Summary statistics table with significance saved as PNG to: {summary_table_sig_png}")

# ------------------------------------------------------------------------------------------
# 3) Execute Analysis
# ------------------------------------------------------------------------------------------
def main():
    synergy_similarity_analysis_repeated()

if __name__ == "__main__":
    main()


### Best Matching Pairs with Greedy Algorithm 

In [None]:
"""
Impact of Preplanning and Task Knowledge on Extracted Synergies
Compare "Unknown Weight Distribution" vs "Known Weight Distribution" conditions
Applies Best Matching (Greedy Algorithm) before calculating Pearson Correlation
Creates annotated bar plots and summary tables with rounded numerical values.
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, ttest_rel
import seaborn as sns

# ------------------------------------------------------------------------------------------
# 0) Global Configuration
# ------------------------------------------------------------------------------------------
BASE_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Experimental Data"
PARTICIPANTS = [1, 2, 3, 4, 5, 6, 7, 8]
PHASES = ["phase1", "phase2"]  # "Reach & Grasp" vs. "Lift & Hold"

# NEW OUTPUT DIRECTORY FOR GREEDY MATCHING
OUTPUT_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q2 Synergy Similarity By Knowledge Condition\Best Matching Greedy"
os.makedirs(OUTPUT_DIR, exist_ok=True)  # Create the directory if it doesn't exist

# ------------------------------------------------------------------------------------------
# 1) Helper Functions
# ------------------------------------------------------------------------------------------
def trial_info(trial_number):
    """
    Returns metadata about each trial, including whether the weight distribution
    is Known or Unknown (field 'knowledge' == 'Yes' or 'No').
    """
    protocol = {
        1:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "No"),
        2:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "Yes"),
        3:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "No"),
        4:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "Yes"),
        5:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "No"),
        6:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "Yes"),
        7:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "No"),
        8:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        9:  ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "No"),
        10: ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "Yes"),
        11: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "No"),
        12: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "Yes"),
        13: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "No"),
        14: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        15: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "No"),
        16: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "Yes"),
        17: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "No"),
        18: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "Yes"),
        19: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "No"),
        20: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "Yes"),
        21: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "No"),
        22: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "Yes"),
        23: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "No"),
        24: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "Yes"),
    }
    if trial_number not in protocol:
        return None
    tup = protocol[trial_number]
    return {
        'grasp_type':   tup[0],
        'handle_type':  tup[1],
        'weight_kg':    tup[2],
        'lever_side':   tup[3],
        'knowledge':    tup[4],  # 'Yes' => Known, 'No' => Unknown
    }

def cosine_similarity(vec_a, vec_b):
    """
    Computes the cosine similarity between two 1D vectors.
    Returns NaN if the denominator is too small.
    """
    denom = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
    if denom < 1e-12:
        return np.nan
    return np.dot(vec_a, vec_b) / denom

def load_synergy_factors(participant_dir, trial_idx, phase_name):
    """
    Loads the synergy factor (W) and synergy activation (H) arrays for a given
    participant, trial, and phase from the "Extracted Synergies" folder.
    
    Returns:
        W (numpy.ndarray): Synergy matrix (n_features, n_synergies)
        H (numpy.ndarray): Synergy activation matrix (n_synergies, n_samples)
        or (None, None) if files do not exist.
    """
    synergy_dir = os.path.join(participant_dir, "Extracted Synergies")
    prefix = f"trial_{trial_idx:02d}_{phase_name}"
    w_file = os.path.join(synergy_dir, prefix + "_W.npy")
    h_file = os.path.join(synergy_dir, prefix + "_H.npy")
    if not (os.path.exists(w_file) and os.path.exists(h_file)):
        return None, None
    W = np.load(w_file)
    H = np.load(h_file)
    return W, H

def match_synergies_greedy(W1, W2):
    """
    Greedy matching of synergies to maximize cosine similarity.
    Iteratively picks the highest similarity pair, then removes
    those indices from further consideration.
    Returns a list of (i1, i2) index pairs.
    """
    n_s1 = W1.shape[1]
    n_s2 = W2.shape[1]
    
    if n_s1 != n_s2:
        print("[WARN] Number of synergies in Phase 1 and Phase 2 do not match. Skipping matching.")
        return []
    
    unmatched1 = list(range(n_s1))
    unmatched2 = list(range(n_s2))
    matched_pairs = []
    
    while unmatched1 and unmatched2:
        best_sim = -np.inf
        best_pair = (None, None)
        
        # Find the highest similarity pair among the unmatched synergy columns
        for i in unmatched1:
            for j in unmatched2:
                sim = cosine_similarity(W1[:, i], W2[:, j])
                if not np.isnan(sim) and sim > best_sim:
                    best_sim = sim
                    best_pair = (i, j)
        
        if best_pair == (None, None):
            # No valid match found
            break
        
        # Record the best match
        matched_pairs.append(best_pair)
        # Remove the matched indices from further consideration
        unmatched1.remove(best_pair[0])
        unmatched2.remove(best_pair[1])
    
    return matched_pairs

def sig_label(p):
    """
    Return significance stars based on p-value.
    """
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    else:
        return 'ns'

# ------------------------------------------------------------------------------------------
# 2) Main Analysis Function
# ------------------------------------------------------------------------------------------
def synergy_similarity_analysis_repeated():
    """
    Performs a repeated-measures (within-subjects) analysis comparing synergy similarities
    between "Known" and "Unknown" weight distribution conditions using best matching (Greedy).
    
    Steps:
        1. Iterates over participants and trials.
        2. Groups trials by condition (Known vs. Unknown).
        3. Applies best matching between Phase 1 & Phase 2 synergies (Greedy).
        4. Computes Pearson correlation between matched synergies.
        5. Aggregates data for each participant and synergy.
        6. Performs paired t-tests for each synergy.
        7. Generates and saves visualizations, including annotated plots and summary tables.
    """
    similarity_rows = []
    
    # ---- Gather synergy data from all participants & trials ----
    for pid in PARTICIPANTS:
        participant_str = f"P({pid})"
        participant_dir = os.path.join(BASE_DIR, participant_str)
        if not os.path.isdir(participant_dir):
            print(f"[WARN] Missing folder for {participant_str}. Skipping.")
            continue

        print(f"\n=== Analyzing {participant_str} ===")
    
        for trial_idx in range(1, 25):
            # Exclude specified trials for participant 7
            if pid == 7 and trial_idx in [5, 6, 7, 8]:
                print(f"[INFO] Skipping Participant {pid}, Trial {trial_idx} as per exclusion criteria.")
                continue

            meta = trial_info(trial_idx)
            if meta is None:
                print(f"[INFO] Trial {trial_idx} has no metadata. Skipping.")
                continue
    
            knowledge_flag = meta['knowledge']  # "Yes" => Known, "No" => Unknown
            condition = "Known" if knowledge_flag == "Yes" else "Unknown"
    
            W_phase1, _ = load_synergy_factors(participant_dir, trial_idx, PHASES[0])
            W_phase2, _ = load_synergy_factors(participant_dir, trial_idx, PHASES[1])
            if W_phase1 is None or W_phase2 is None:
                print(f"[INFO] Missing synergy data for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
            if W_phase1.shape[1] != W_phase2.shape[1]:
                print(f"[WARN] Synergy count mismatch for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
    
            # Best matching (GREEDY)
            matched_pairs = match_synergies_greedy(W_phase1, W_phase2)
            if not matched_pairs:
                print(f"[WARN] No matched synergies for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
    
            for pair_num, (i1, i2) in enumerate(matched_pairs, start=1):
                vec1 = W_phase1[:, i1]
                vec2 = W_phase2[:, i2]
    
                pear, _ = pearsonr(vec1, vec2)
    
                # Save row
                similarity_rows.append({
                    'participant': pid,
                    'trial_idx': trial_idx,
                    'condition': condition,      # 'Known' or 'Unknown'
                    'synergy_idx': pair_num,     # Synergy pair number within the trial
                    'pearson_corr': pear,
                })
    
    # ---- Convert to DataFrame & Save Raw Data ----
    synergy_df = pd.DataFrame(similarity_rows)
    raw_csv = os.path.join(OUTPUT_DIR, "repeated_synergy_data_raw.csv")
    synergy_df.to_csv(raw_csv, index=False)
    print(f"\n[INFO] Saved raw synergy data to {raw_csv}")
    
    if synergy_df.empty:
        print("[INFO] No synergy data collected. Exiting.")
        return

    # ---- Aggregation: Average per Participant, Condition, Synergy ----
    grouped = synergy_df.groupby(['participant','condition','synergy_idx']).agg(
        pearson_mean=('pearson_corr','mean'),
    ).reset_index()
    agg_csv = os.path.join(OUTPUT_DIR, "repeated_synergy_data_agg.csv")
    grouped.to_csv(agg_csv, index=False)
    print(f"[INFO] Saved aggregated synergy data to {agg_csv}")
    
    # ---- Pivot for Paired T-Tests ----
    pivot_pearson = grouped.pivot_table(index=['participant','synergy_idx'],
                                        columns='condition',
                                        values='pearson_mean').reset_index()
    pivot_pearson.columns.name = None  # Remove multi-level column name
    
    # ---- Perform Paired T-Tests ----
    ttest_rows = []
    synergy_list = sorted(grouped['synergy_idx'].unique())
    for s_idx in synergy_list:
        pearson_subset = pivot_pearson[pivot_pearson['synergy_idx'] == s_idx]
        known_vals_pearson = pearson_subset['Known'].dropna()
        unknown_vals_pearson = pearson_subset['Unknown'].dropna()
    
        # Ensure matching participants
        if not known_vals_pearson.index.equals(unknown_vals_pearson.index):
            print(f"[WARN] Participant mismatch for synergy {s_idx}. Skipping.")
            continue
    
        # Paired t-test for Pearson correlation
        t_p, p_p = ttest_rel(known_vals_pearson, unknown_vals_pearson, nan_policy='omit')
    
        ttest_rows.append({
            'synergy_idx': s_idx,
            'pearson_t': t_p,
            'pearson_p': p_p,
        })
    
    # ---- Convert T-Test Results to DataFrame ----
    ttest_df = pd.DataFrame(ttest_rows)
    ttest_csv = os.path.join(OUTPUT_DIR, "paired_ttest_synergy.csv")
    ttest_df.to_csv(ttest_csv, index=False)
    print(f"[INFO] Paired t-test results saved to {ttest_csv}\n")
    
    # ---- Print T-Test Results to Console ----
    print("Paired T-Test Results for Each Synergy:")
    print(ttest_df.to_string(index=False))
    
    # ---- Significance Labeling ----
    ttest_df['pearson_sig'] = ttest_df['pearson_p'].apply(sig_label)
    
    enhanced_csv = os.path.join(OUTPUT_DIR, "paired_ttest_synergy_enhanced.csv")
    ttest_df.to_csv(enhanced_csv, index=False)
    print(f"[INFO] Enhanced t-test results with significance labels saved to {enhanced_csv}")
    
    # ------------------------------------------------------------------------------------------
    # 3) Annotated Bar Plots (Pearson Correlation: Known vs. Unknown)
    # ------------------------------------------------------------------------------------------
    # Prepare data for plotting Pearson correlations
    summary_pearson = grouped.groupby(['condition','synergy_idx']).agg(
        avg_pearson=('pearson_mean','mean'),
        std_pearson=('pearson_mean','std')
    ).reset_index()
    
    # Pivot for plotting
    pivot_plot_pearson = summary_pearson.pivot(index='synergy_idx', columns='condition', values='avg_pearson').reset_index()
    pivot_plot_pearson_std = summary_pearson.pivot(index='synergy_idx', columns='condition', values='std_pearson').reset_index()
    
    # Plotting Annotated Bar Plot for Pearson Correlation
    plt.figure(figsize=(12, 8))
    sns.set(style="whitegrid")
    bar_width = 0.35
    indices = np.arange(1, len(synergy_list)+1)  # Synergy indices
    
    # Bars for Known and Unknown
    known_means_pearson = pivot_plot_pearson[pivot_plot_pearson['synergy_idx'].isin(synergy_list)]['Known']
    known_stds_pearson = pivot_plot_pearson_std[pivot_plot_pearson_std['synergy_idx'].isin(synergy_list)]['Known']
    unknown_means_pearson = pivot_plot_pearson[pivot_plot_pearson['synergy_idx'].isin(synergy_list)]['Unknown']
    unknown_stds_pearson = pivot_plot_pearson_std[pivot_plot_pearson_std['synergy_idx'].isin(synergy_list)]['Unknown']
    
    plt.bar(indices - bar_width/2, known_means_pearson, 
            width=bar_width, yerr=known_stds_pearson,
            capsize=5, label='Known', color='#F08080')
    
    plt.bar(indices + bar_width/2, unknown_means_pearson, 
            width=bar_width, yerr=unknown_stds_pearson,
            capsize=5, label='Unknown', color='#F0E68C')
    
    for idx, row in ttest_df.iterrows():
        synergy = row['synergy_idx']
        label = row['pearson_sig']
        # Convert synergy_idx to the index in synergy_list
        synergy_position = synergy_list.index(synergy)  
        km = known_means_pearson.iloc[synergy_position]
        ks = known_stds_pearson.iloc[synergy_position]
        um = unknown_means_pearson.iloc[synergy_position]
        us = unknown_stds_pearson.iloc[synergy_position]
        y_max = max(km + ks, um + us)
        
        plt.text(s=label, 
                 x=indices[synergy_position], 
                 y=y_max + 0.02, 
                 ha='center', va='bottom', color='black', fontsize=21)  # 1.5× original
    
    plt.xlabel('Synergy Pairs', fontsize=21)   # 1.5× original
    plt.ylabel('Average Pearson Correlation', fontsize=21)  # 1.5× original
    plt.title('Average Pearson Correlation between Synergy Pairs for Known & Unknown', fontsize=22)  # 1.5× original
    plt.xticks(indices, [f"{s}" for s in synergy_list], fontsize=16)  # 1.5× original
    plt.yticks(fontsize=16)

    # Calculate maximum y-value for ylim
    max_p = (known_means_pearson + known_stds_pearson).max()
    max_u = (unknown_means_pearson + unknown_stds_pearson).max()
    max_value = max(max_p, max_u) + 0.2
    plt.ylim(0, max_value)  # Start y-axis at 0
    
    # Adjust legend position
    plt.legend(title='Condition', loc='upper right', fontsize=17)  # 1.5× original
    
    plt.tight_layout()
    # Save the plot
    annotated_pearson_plot_path = os.path.join(OUTPUT_DIR, "annotated_pearson_correlation_by_condition.png")
    plt.savefig(annotated_pearson_plot_path, dpi=300)
    plt.close()
    print(f"[INFO] Annotated Pearson correlation by condition plot saved to: {annotated_pearson_plot_path}")
    
    # ------------------------------------------------------------------------------------------
    # 4) Boxplots
    # ------------------------------------------------------------------------------------------
    # Boxplot for Pearson Correlation by Condition
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='condition', y='pearson_corr', data=synergy_df, palette='Blues')
    plt.title('Pearson Correlation by Condition', fontsize=21)  # 1.5× original
    plt.xlabel('Condition', fontsize=18)  # 1.5× original
    plt.ylabel('Pearson Correlation', fontsize=18)  # 1.5× original
    plt.tight_layout()
    # Save the boxplot
    pearson_boxplot_condition_path = os.path.join(OUTPUT_DIR, "boxplot_pearson_by_condition.png")
    plt.savefig(pearson_boxplot_condition_path, dpi=300)
    plt.close()
    print(f"[INFO] Boxplot of Pearson correlation saved to {pearson_boxplot_condition_path}")
    
    # ------------------------------------------------------------------------------------------
    # 5) Summary Statistics Table Adjusted
    # ------------------------------------------------------------------------------------------
    # Group by synergy index and condition to compute mean and std
    summary_stats = grouped.groupby(['synergy_idx', 'condition']).agg(
        Avg_Pearson_Corr=('pearson_mean', 'mean'),
        Std_Pearson_Corr=('pearson_mean', 'std')
    ).reset_index()

    # Pivot to have 'Known' and 'Unknown' side by side
    summary_pivot = summary_stats.pivot(
        index='synergy_idx', 
        columns='condition', 
        values=['Avg_Pearson_Corr', 'Std_Pearson_Corr']
    ).reset_index()

    # Flatten the MultiIndex columns
    summary_pivot.columns = ['Synergy'] + [
        f"{stat}_{cond}" for stat, cond in summary_pivot.columns[1:]
    ]

    # Merge with T-Test Results
    ttest_df_subset = ttest_df[['synergy_idx', 'pearson_t', 'pearson_p']]
    summary_pivot = summary_pivot.merge(ttest_df_subset, left_on='Synergy', right_on='synergy_idx')
    summary_pivot = summary_pivot.drop('synergy_idx', axis=1)

    # Round
    summary_pivot = summary_pivot.round({
        'Avg_Pearson_Corr_Known': 3,
        'Std_Pearson_Corr_Known': 3,
        'Avg_Pearson_Corr_Unknown': 3,
        'Std_Pearson_Corr_Unknown': 3,
        'pearson_t': 3,
        'pearson_p': 3,
    })

    # Select and rename columns
    summary_pivot = summary_pivot[[
        'Synergy',
        'Avg_Pearson_Corr_Known', 'Avg_Pearson_Corr_Unknown',
        'pearson_t', 'pearson_p'
    ]]
    summary_pivot.rename(columns={
        'Avg_Pearson_Corr_Known': 'Pearson Known',
        'Avg_Pearson_Corr_Unknown': 'Pearson Unknown',
        'pearson_t': 'Pearson t',
        'pearson_p': 'Pearson p',
    }, inplace=True)

    # ---- Create and Save the Table as PNG ----
    fig, ax = plt.subplots(figsize=(14, 1 + 0.5 * len(summary_pivot)))
    ax.axis('off')

    table = ax.table(cellText=summary_pivot.values,
                     colLabels=summary_pivot.columns,
                     cellLoc='center',
                     loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(18)  # 1.5× original
    table.scale(1.2, 1.2)

    plt.title('Synergy Correlation Results by Condition (Greedy Matching)', fontsize=24, pad=30)  # 1.5× original
    plt.tight_layout()

    summary_table_png = os.path.join(OUTPUT_DIR, "synergy_correlation_results_table.png")
    plt.savefig(summary_table_png, bbox_inches='tight', dpi=300)
    plt.close()

    print(f"[INFO] Synergy correlation results table saved as PNG to: {summary_table_png}")

    # ------------------------------------------------------------------------------------------
    # 6) Annotated Summary Statistics Table with Significance
    # ------------------------------------------------------------------------------------------
    # Merge significance labels
    ttest_df_pearson = ttest_df[['synergy_idx', 'pearson_sig']].rename(columns={'pearson_sig': 'Pearson_Sig'})
    summary_stats = summary_stats.merge(ttest_df_pearson, on='synergy_idx', how='left')

    # Pivot again with significance
    summary_pivot_sig = summary_stats.pivot(
        index='synergy_idx', 
        columns='condition', 
        values=['Avg_Pearson_Corr', 'Std_Pearson_Corr', 'Pearson_Sig']
    ).reset_index()

    # Flatten columns
    col_names = summary_pivot_sig.columns
    new_cols = []
    for c1, c2 in col_names:
        if c1 == 'synergy_idx':
            new_cols.append("Synergy")
        else:
            new_cols.append(f"{c1}_{c2}")
    summary_pivot_sig.columns = new_cols

    # Sort by synergy
    summary_pivot_sig.sort_values('Synergy', inplace=True)

    fig, ax = plt.subplots(figsize=(18, 2 + 0.6 * len(summary_pivot_sig)))
    ax.axis('off')

    table_data = summary_pivot_sig.values
    column_labels = summary_pivot_sig.columns.tolist()

    table = ax.table(
        cellText=table_data,
        colLabels=column_labels,
        cellLoc='center',
        loc='center'
    )
    table.auto_set_font_size(False)
    table.set_fontsize(18)  # 1.5× original
    table.scale(1.2, 1.2)

    plt.title("Summary Statistics of Synergy Similarities by Condition (Greedy) with Significance", 
              fontsize=24, pad=30)  # 1.5× original
    plt.tight_layout()

    summary_table_sig_png = os.path.join(OUTPUT_DIR, "summary_statistics_synergy_with_significance.png")
    plt.savefig(summary_table_sig_png, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"[INFO] Summary statistics table with significance saved as PNG to {summary_table_sig_png}")

# ------------------------------------------------------------------------------------------
# 3) Execute Analysis
# ------------------------------------------------------------------------------------------
def main():
    synergy_similarity_analysis_repeated()

if __name__ == "__main__":
    main()


### Phase Correlation around Lift - Onset

In [19]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, ttest_rel
import seaborn as sns

# ------------------------------------------------------------------------------------------
# 0) Global Configuration
# ------------------------------------------------------------------------------------------
BASE_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Experimental Data"
PARTICIPANTS = [1, 2, 3, 4, 5, 6, 7, 8]
PHASES = ["phase1", "phase2"]

# NEW OUTPUT DIRECTORY FOR GREEDY MATCHING WITH LIFTONSET SYNERGIES
OUTPUT_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q2 Synergy Similarity By Knowledge Condition\Best Matching Greedy\LiftOnset"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ------------------------------------------------------------------------------------------
# 1) Helper Functions
# ------------------------------------------------------------------------------------------
def trial_info(trial_number):
    protocol = {
        1:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "No"),
        2:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "Yes"),
        3:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "No"),
        4:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "Yes"),
        5:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "No"),
        6:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "Yes"),
        7:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "No"),
        8:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        9:  ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "No"),
        10: ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "Yes"),
        11: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "No"),
        12: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "Yes"),
        13: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "No"),
        14: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        15: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "No"),
        16: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "Yes"),
        17: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "No"),
        18: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "Yes"),
        19: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "No"),
        20: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "Yes"),
        21: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "No"),
        22: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "Yes"),
        23: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "No"),
        24: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "Yes"),
    }
    if trial_number not in protocol:
        return None
    tup = protocol[trial_number]
    return {
        'grasp_type':   tup[0],
        'handle_type':  tup[1],
        'weight_kg':    tup[2],
        'lever_side':   tup[3],
        'knowledge':    tup[4],
    }

def cosine_similarity(vec_a, vec_b):
    denom = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
    if denom < 1e-12:
        return np.nan
    return np.dot(vec_a, vec_b) / denom

def load_synergy_factors(participant_dir, trial_idx, phase_name):
    """
    Modified to load synergy factors at lift onset.
    """
    # Point to the directory containing synergies at lift onset
    synergy_dir = os.path.join(participant_dir, "Synergies at Lift-Onset")
    # Adjust prefix to include '_liftonset' suffix as per file naming pattern
    prefix = f"trial_{trial_idx:02d}_{phase_name}_liftonset"
    w_file = os.path.join(synergy_dir, prefix + "_W.npy")
    h_file = os.path.join(synergy_dir, prefix + "_H.npy")
    if not (os.path.exists(w_file) and os.path.exists(h_file)):
        return None, None
    W = np.load(w_file)
    H = np.load(h_file)
    return W, H

def match_synergies_greedy(W1, W2):
    n_s1 = W1.shape[1]
    n_s2 = W2.shape[1]
    
    if n_s1 != n_s2:
        print("[WARN] Number of synergies do not match. Skipping matching.")
        return []
    
    unmatched1 = list(range(n_s1))
    unmatched2 = list(range(n_s2))
    matched_pairs = []
    
    while unmatched1 and unmatched2:
        best_sim = -np.inf
        best_pair = (None, None)
        for i in unmatched1:
            for j in unmatched2:
                sim = cosine_similarity(W1[:, i], W2[:, j])
                if not np.isnan(sim) and sim > best_sim:
                    best_sim = sim
                    best_pair = (i, j)
        if best_pair == (None, None):
            break
        matched_pairs.append(best_pair)
        unmatched1.remove(best_pair[0])
        unmatched2.remove(best_pair[1])
    
    return matched_pairs

def sig_label(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    else:
        return 'ns'

# ------------------------------------------------------------------------------------------
# 2) Main Analysis Function
# ------------------------------------------------------------------------------------------
def synergy_similarity_analysis_repeated():
    similarity_rows = []
    
    for pid in PARTICIPANTS:
        participant_str = f"P({pid})"
        participant_dir = os.path.join(BASE_DIR, participant_str)
        if not os.path.isdir(participant_dir):
            print(f"[WARN] Missing folder for {participant_str}. Skipping.")
            continue

        print(f"\n=== Analyzing {participant_str} ===")
    
        for trial_idx in range(1, 25):
            if pid == 7 and trial_idx in [5, 6, 7, 8]:
                print(f"[INFO] Skipping Participant {pid}, Trial {trial_idx}.")
                continue

            meta = trial_info(trial_idx)
            if meta is None:
                print(f"[INFO] Trial {trial_idx} has no metadata. Skipping.")
                continue
    
            knowledge_flag = meta['knowledge']
            condition = "Known" if knowledge_flag == "Yes" else "Unknown"
    
            W_phase1, _ = load_synergy_factors(participant_dir, trial_idx, PHASES[0])
            W_phase2, _ = load_synergy_factors(participant_dir, trial_idx, PHASES[1])
            if W_phase1 is None or W_phase2 is None:
                print(f"[INFO] Missing synergy data for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
            if W_phase1.shape[1] != W_phase2.shape[1]:
                print(f"[WARN] Synergy count mismatch for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
    
            matched_pairs = match_synergies_greedy(W_phase1, W_phase2)
            if not matched_pairs:
                print(f"[WARN] No matched synergies for Trial {trial_idx} of {participant_str}. Skipping.")
                continue
    
            for pair_num, (i1, i2) in enumerate(matched_pairs, start=1):
                vec1 = W_phase1[:, i1]
                vec2 = W_phase2[:, i2]
    
                pear, _ = pearsonr(vec1, vec2)
    
                similarity_rows.append({
                    'participant': pid,
                    'trial_idx': trial_idx,
                    'condition': condition,
                    'synergy_idx': pair_num,
                    'pearson_corr': pear,
                })
    
    synergy_df = pd.DataFrame(similarity_rows)
    raw_csv = os.path.join(OUTPUT_DIR, "repeated_synergy_data_raw_liftonset.csv")
    synergy_df.to_csv(raw_csv, index=False)
    print(f"\n[INFO] Saved raw synergy data to {raw_csv}")
    
    if synergy_df.empty:
        print("[INFO] No synergy data collected. Exiting.")
        return

    grouped = synergy_df.groupby(['participant','condition','synergy_idx']).agg(
        pearson_mean=('pearson_corr','mean'),
    ).reset_index()
    agg_csv = os.path.join(OUTPUT_DIR, "repeated_synergy_data_agg_liftonset.csv")
    grouped.to_csv(agg_csv, index=False)
    print(f"[INFO] Saved aggregated synergy data to {agg_csv}")
    
    pivot_pearson = grouped.pivot_table(index=['participant','synergy_idx'],
                                        columns='condition',
                                        values='pearson_mean').reset_index()
    pivot_pearson.columns.name = None
    
    ttest_rows = []
    synergy_list = sorted(grouped['synergy_idx'].unique())
    for s_idx in synergy_list:
        pearson_subset = pivot_pearson[pivot_pearson['synergy_idx'] == s_idx]
        known_vals_pearson = pearson_subset['Known'].dropna()
        unknown_vals_pearson = pearson_subset['Unknown'].dropna()
    
        if not known_vals_pearson.index.equals(unknown_vals_pearson.index):
            print(f"[WARN] Participant mismatch for synergy {s_idx}. Skipping.")
            continue
    
        t_p, p_p = ttest_rel(known_vals_pearson, unknown_vals_pearson, nan_policy='omit')
    
        ttest_rows.append({
            'synergy_idx': s_idx,
            'pearson_t': t_p,
            'pearson_p': p_p,
        })
    
    ttest_df = pd.DataFrame(ttest_rows)
    ttest_csv = os.path.join(OUTPUT_DIR, "paired_ttest_synergy_liftonset.csv")
    ttest_df.to_csv(ttest_csv, index=False)
    print(f"[INFO] Paired t-test results saved to {ttest_csv}\n")
    
    print("Paired T-Test Results for Each Synergy:")
    print(ttest_df.to_string(index=False))
    
    ttest_df['pearson_sig'] = ttest_df['pearson_p'].apply(sig_label)
    
    enhanced_csv = os.path.join(OUTPUT_DIR, "paired_ttest_synergy_enhanced_liftonset.csv")
    ttest_df.to_csv(enhanced_csv, index=False)
    print(f"[INFO] Enhanced t-test results saved to {enhanced_csv}")
    
    # ------------------------------------------------------------------------------------------
    # 3) Annotated Bar Plots (Pearson Correlation: Known vs. Unknown) using Lift-Onset Synergies
    # ------------------------------------------------------------------------------------------
    summary_pearson = grouped.groupby(['condition','synergy_idx']).agg(
        avg_pearson=('pearson_mean','mean'),
        std_pearson=('pearson_mean','std')
    ).reset_index()
    
    pivot_plot_pearson = summary_pearson.pivot(index='synergy_idx', columns='condition', values='avg_pearson').reset_index()
    pivot_plot_pearson_std = summary_pearson.pivot(index='synergy_idx', columns='condition', values='std_pearson').reset_index()
    
    plt.figure(figsize=(12, 8))
    sns.set(style="whitegrid")
    bar_width = 0.35
    indices = np.arange(1, len(synergy_list)+1)
    
    known_means_pearson = pivot_plot_pearson[pivot_plot_pearson['synergy_idx'].isin(synergy_list)]['Known']
    known_stds_pearson = pivot_plot_pearson_std[pivot_plot_pearson_std['synergy_idx'].isin(synergy_list)]['Known']
    unknown_means_pearson = pivot_plot_pearson[pivot_plot_pearson['synergy_idx'].isin(synergy_list)]['Unknown']
    unknown_stds_pearson = pivot_plot_pearson_std[pivot_plot_pearson_std['synergy_idx'].isin(synergy_list)]['Unknown']
    
    plt.bar(indices - bar_width/2, known_means_pearson, 
            width=bar_width, yerr=known_stds_pearson,
            capsize=5, label='Known', color='#F08080')
    
    plt.bar(indices + bar_width/2, unknown_means_pearson, 
            width=bar_width, yerr=unknown_stds_pearson,
            capsize=5, label='Unknown', color='#F0E68C')
    
    for idx, row in ttest_df.iterrows():
        synergy = row['synergy_idx']
        label = row['pearson_sig']
        synergy_position = synergy_list.index(synergy)
        km = known_means_pearson.iloc[synergy_position]
        ks = known_stds_pearson.iloc[synergy_position]
        um = unknown_means_pearson.iloc[synergy_position]
        us = unknown_stds_pearson.iloc[synergy_position]
        y_max = max(km + ks, um + us)
        
        plt.text(s=label, 
                 x=indices[synergy_position], 
                 y=y_max + 0.02, 
                 ha='center', va='bottom', color='black', fontsize=21)
    
    plt.xlabel('Synergy Pairs', fontsize=21)
    plt.ylabel('Average Pearson Correlation', fontsize=21)
    plt.title('Average Pearson Correlation between Synergy Pairs for Known & Unknown (Lift-Onset)', fontsize=22)
    plt.xticks(indices, [f"{s}" for s in synergy_list], fontsize=16)
    plt.yticks(fontsize=16)
    
    max_p = (known_means_pearson + known_stds_pearson).max()
    max_u = (unknown_means_pearson + unknown_stds_pearson).max()
    max_value = max(max_p, max_u) + 0.2
    plt.ylim(0, max_value)
    
    plt.legend(title='Condition', loc='upper right', fontsize=17)
    
    plt.tight_layout()
    annotated_pearson_plot_path = os.path.join(OUTPUT_DIR, "annotated_pearson_correlation_by_condition_liftonset.png")
    plt.savefig(annotated_pearson_plot_path, dpi=300)
    plt.close()
    print(f"[INFO] Annotated Pearson correlation by condition plot saved to: {annotated_pearson_plot_path}")
    
    # ------------------------------------------------------------------------------------------
    # 4) Boxplots
    # ------------------------------------------------------------------------------------------
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='condition', y='pearson_corr', data=synergy_df, palette='Blues')
    plt.title('Pearson Correlation by Condition (Lift-Onset)', fontsize=21)
    plt.xlabel('Condition', fontsize=18)
    plt.ylabel('Pearson Correlation', fontsize=18)
    plt.tight_layout()
    pearson_boxplot_condition_path = os.path.join(OUTPUT_DIR, "boxplot_pearson_by_condition_liftonset.png")
    plt.savefig(pearson_boxplot_condition_path, dpi=300)
    plt.close()
    print(f"[INFO] Boxplot of Pearson correlation saved to {pearson_boxplot_condition_path}")
    
    # ------------------------------------------------------------------------------------------
    # 5) Summary Statistics Table Adjusted
    # ------------------------------------------------------------------------------------------
    summary_stats = grouped.groupby(['synergy_idx', 'condition']).agg(
        Avg_Pearson_Corr=('pearson_mean', 'mean'),
        Std_Pearson_Corr=('pearson_mean', 'std')
    ).reset_index()
    
    summary_pivot = summary_stats.pivot(
        index='synergy_idx', 
        columns='condition', 
        values=['Avg_Pearson_Corr', 'Std_Pearson_Corr']
    ).reset_index()
    
    summary_pivot.columns = ['Synergy'] + [
        f"{stat}_{cond}" for stat, cond in summary_pivot.columns[1:]
    ]
    
    ttest_df_subset = ttest_df[['synergy_idx', 'pearson_t', 'pearson_p']]
    summary_pivot = summary_pivot.merge(ttest_df_subset, left_on='Synergy', right_on='synergy_idx')
    summary_pivot = summary_pivot.drop('synergy_idx', axis=1)
    
    summary_pivot = summary_pivot.round({
        'Avg_Pearson_Corr_Known': 3,
        'Std_Pearson_Corr_Known': 3,
        'Avg_Pearson_Corr_Unknown': 3,
        'Std_Pearson_Corr_Unknown': 3,
        'pearson_t': 3,
        'pearson_p': 3,
    })
    
    summary_pivot = summary_pivot[[
        'Synergy',
        'Avg_Pearson_Corr_Known', 'Avg_Pearson_Corr_Unknown',
        'pearson_t', 'pearson_p'
    ]]
    summary_pivot.rename(columns={
        'Avg_Pearson_Corr_Known': 'Pearson Known',
        'Avg_Pearson_Corr_Unknown': 'Pearson Unknown',
        'pearson_t': 'Pearson t',
        'pearson_p': 'Pearson p',
    }, inplace=True)
    
    fig, ax = plt.subplots(figsize=(14, 1 + 0.5 * len(summary_pivot)))
    ax.axis('off')
    
    table = ax.table(cellText=summary_pivot.values,
                     colLabels=summary_pivot.columns,
                     cellLoc='center',
                     loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(18)
    table.scale(1.2, 1.2)
    
    plt.title('Synergy Correlation Results by Condition (Greedy Matching, Lift-Onset)', fontsize=24, pad=30)
    plt.tight_layout()
    
    summary_table_png = os.path.join(OUTPUT_DIR, "synergy_correlation_results_table_liftonset.png")
    plt.savefig(summary_table_png, bbox_inches='tight', dpi=300)
    plt.close()
    
    print(f"[INFO] Synergy correlation results table saved as PNG to: {summary_table_png}")
    
    # ------------------------------------------------------------------------------------------
    # 6) Annotated Summary Statistics Table with Significance
    # ------------------------------------------------------------------------------------------
    ttest_df_pearson = ttest_df[['synergy_idx', 'pearson_sig']].rename(columns={'pearson_sig': 'Pearson_Sig'})
    summary_stats = summary_stats.merge(ttest_df_pearson, on='synergy_idx', how='left')
    
    summary_pivot_sig = summary_stats.pivot(
        index='synergy_idx', 
        columns='condition', 
        values=['Avg_Pearson_Corr', 'Std_Pearson_Corr', 'Pearson_Sig']
    ).reset_index()
    
    col_names = summary_pivot_sig.columns
    new_cols = []
    for c1, c2 in col_names:
        if c1 == 'synergy_idx':
            new_cols.append("Synergy")
        else:
            new_cols.append(f"{c1}_{c2}")
    summary_pivot_sig.columns = new_cols
    
    summary_pivot_sig.sort_values('Synergy', inplace=True)
    
    fig, ax = plt.subplots(figsize=(18, 2 + 0.6 * len(summary_pivot_sig)))
    ax.axis('off')
    
    table_data = summary_pivot_sig.values
    column_labels = summary_pivot_sig.columns.tolist()
    
    table = ax.table(
        cellText=table_data,
        colLabels=column_labels,
        cellLoc='center',
        loc='center'
    )
    table.auto_set_font_size(False)
    table.set_fontsize(18)
    table.scale(1.2, 1.2)
    
    plt.title("Summary Statistics of Synergy Similarities by Condition (Greedy, Lift-Onset)", 
              fontsize=24, pad=30)
    plt.tight_layout()
    
    summary_table_sig_png = os.path.join(OUTPUT_DIR, "summary_statistics_synergy_with_significance_liftonset.png")
    plt.savefig(summary_table_sig_png, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"[INFO] Summary statistics table with significance saved as PNG to {summary_table_sig_png}")

# ------------------------------------------------------------------------------------------
# 3) Execute Analysis
# ------------------------------------------------------------------------------------------
def main():
    synergy_similarity_analysis_repeated()

if __name__ == "__main__":
    main()



=== Analyzing P(1) ===

=== Analyzing P(2) ===

=== Analyzing P(3) ===

=== Analyzing P(4) ===

=== Analyzing P(5) ===

=== Analyzing P(6) ===

=== Analyzing P(7) ===
[INFO] Skipping Participant 7, Trial 5.
[INFO] Skipping Participant 7, Trial 6.
[INFO] Skipping Participant 7, Trial 7.
[INFO] Skipping Participant 7, Trial 8.
[INFO] Missing synergy data for Trial 20 of P(7). Skipping.
[INFO] Missing synergy data for Trial 22 of P(7). Skipping.

=== Analyzing P(8) ===

[INFO] Saved raw synergy data to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q2 Synergy Similarity By Knowledge Condition\Best Matching Greedy\LiftOnset\repeated_synergy_data_raw_liftonset.csv
[INFO] Saved aggregated synergy data to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q2 Synergy Similarity By Knowledge Condition\Best Matching Greedy\LiftOnset\repeated_synergy_data_agg_liftonset.csv
[INFO] Paired t-test results saved to C:


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='condition', y='pearson_corr', data=synergy_df, palette='Blues')


[INFO] Boxplot of Pearson correlation saved to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q2 Synergy Similarity By Knowledge Condition\Best Matching Greedy\LiftOnset\boxplot_pearson_by_condition_liftonset.png
[INFO] Synergy correlation results table saved as PNG to: C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q2 Synergy Similarity By Knowledge Condition\Best Matching Greedy\LiftOnset\synergy_correlation_results_table_liftonset.png
[INFO] Summary statistics table with significance saved as PNG to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q2 Synergy Similarity By Knowledge Condition\Best Matching Greedy\LiftOnset\summary_statistics_synergy_with_significance_liftonset.png


## Question 3: Anticipatory Information during Lift Onset

### Similarity Across Synergies computed around the lift onset

In [31]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr, ttest_rel

# ------------------------------------------------------------------------------------------
# 0) Global Configuration
# ------------------------------------------------------------------------------------------
BASE_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Experimental Data"
PARTICIPANTS = [1, 2, 3, 4, 5, 6, 7, 8]
PHASES = ["phase1", "phase2"]  # e.g., "Reach & Grasp" vs. "Lift & Hold"

# Output directory for saving results
OUTPUT_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q3 Anticipatory Information at Lift Onset"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ------------------------------------------------------------------------------------------
# 1) Helper Functions
# ------------------------------------------------------------------------------------------
def trial_info(trial_number):
    """
    Returns metadata about each trial, including whether the weight distribution
    is Known or Unknown (field 'knowledge' == 'Yes' or 'No').
    """
    protocol = {
        1:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "No"),
        2:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "Yes"),
        3:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "No"),
        4:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "Yes"),
        5:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "No"),
        6:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "Yes"),
        7:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "No"),
        8:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        9:  ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "No"),
        10: ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "Yes"),
        11: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "No"),
        12: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "Yes"),
        13: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "No"),
        14: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        15: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "No"),
        16: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "Yes"),
        17: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "No"),
        18: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "Yes"),
        19: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "No"),
        20: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "Yes"),
        21: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "No"),
        22: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "Yes"),
        23: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "No"),
        24: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "Yes"),
    }
    if trial_number not in protocol:
        return None
    tup = protocol[trial_number]
    return {
        'grasp_type':   tup[0],
        'handle_type':  tup[1],
        'weight_kg':    tup[2],
        'lever_side':   tup[3],
        'knowledge':    tup[4],  # 'Yes' => Known, 'No' => Unknown
    }

def cosine_similarity(vec_a, vec_b):
    denom = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
    if denom < 1e-12:
        return np.nan
    return np.dot(vec_a, vec_b) / denom

def load_synergy_factors_full(participant_dir, trial_idx, phase_name):
    synergy_dir = os.path.join(participant_dir, "Extracted Synergies")
    prefix = f"trial_{trial_idx:02d}_{phase_name}"
    w_file = os.path.join(synergy_dir, prefix + "_W.npy")
    h_file = os.path.join(synergy_dir, prefix + "_H.npy")
    if not (os.path.exists(w_file) and os.path.exists(h_file)):
        return None, None
    W = np.load(w_file)
    H = np.load(h_file)
    return W, H

def load_synergy_factors_liftonset(participant_dir, trial_idx, phase_name):
    synergy_dir = os.path.join(participant_dir, "Synergies at Lift-Onset")
    prefix = f"trial_{trial_idx:02d}_{phase_name}_liftonset"
    w_file = os.path.join(synergy_dir, prefix + "_W.npy")
    h_file = os.path.join(synergy_dir, prefix + "_H.npy")
    if not (os.path.exists(w_file) and os.path.exists(h_file)):
        return None, None
    W = np.load(w_file)
    H = np.load(h_file)
    return W, H

def match_synergies_greedy(W1, W2):
    n_s1 = W1.shape[1]
    n_s2 = W2.shape[1]
    if n_s1 != n_s2:
        return []

    unmatched1 = list(range(n_s1))
    unmatched2 = list(range(n_s2))
    matched_pairs = []

    while unmatched1 and unmatched2:
        best_sim = -np.inf
        best_pair = (None, None)
        for i in unmatched1:
            for j in unmatched2:
                sim = cosine_similarity(W1[:, i], W2[:, j])
                if not np.isnan(sim) and sim > best_sim:
                    best_sim = sim
                    best_pair = (i, j)
        if best_pair == (None, None):
            break
        matched_pairs.append(best_pair)
        unmatched1.remove(best_pair[0])
        unmatched2.remove(best_pair[1])

    return matched_pairs

def sig_label(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    else:
        return 'ns'

# ------------------------------------------------------------------------------------------
# 2) Main Analysis: Compare Full Phase Correlation vs. Lift-Onset Correlation
# ------------------------------------------------------------------------------------------
def compare_phase_correlation():
    results = []

    for pid in PARTICIPANTS:
        participant_str = f"P({pid})"
        participant_dir = os.path.join(BASE_DIR, participant_str)
        if not os.path.isdir(participant_dir):
            print(f"[WARN] Missing folder for {participant_str}. Skipping.")
            continue

        for trial_idx in range(1, 25):
            # Load synergy factors for the FULL phases
            W1_full, _ = load_synergy_factors_full(participant_dir, trial_idx, PHASES[0])
            W2_full, _ = load_synergy_factors_full(participant_dir, trial_idx, PHASES[1])

            # Load synergy factors for the LIFT-ONSET
            W1_onset, _ = load_synergy_factors_liftonset(participant_dir, trial_idx, PHASES[0])
            W2_onset, _ = load_synergy_factors_liftonset(participant_dir, trial_idx, PHASES[1])

            if (W1_full is None or W2_full is None or 
                W1_onset is None or W2_onset is None):
                continue

            pairs_full = match_synergies_greedy(W1_full, W2_full)
            pairs_onset = match_synergies_greedy(W1_onset, W2_onset)

            if not pairs_full or not pairs_onset:
                continue

            corr_vals_full = []
            for (i1, i2) in pairs_full:
                vec1 = W1_full[:, i1]
                vec2 = W2_full[:, i2]
                r, _ = pearsonr(vec1, vec2)
                corr_vals_full.append(r)
            mean_corr_full = np.mean(corr_vals_full) if corr_vals_full else np.nan

            corr_vals_onset = []
            for (i1, i2) in pairs_onset:
                vec1 = W1_onset[:, i1]
                vec2 = W2_onset[:, i2]
                r, _ = pearsonr(vec1, vec2)
                corr_vals_onset.append(r)
            mean_corr_onset = np.mean(corr_vals_onset) if corr_vals_onset else np.nan

            results.append({
                'participant': pid,
                'trial': trial_idx,
                'mean_corr_full': mean_corr_full,
                'mean_corr_liftonset': mean_corr_onset
            })

    df_results = pd.DataFrame(results)
    if df_results.empty:
        print("[INFO] No data found. Exiting.")
        return

    out_csv = os.path.join(OUTPUT_DIR, "mean_correlation_full_vs_liftonset.csv")
    df_results.to_csv(out_csv, index=False)
    print(f"[INFO] Saved raw correlation comparison to {out_csv}")

    df_clean = df_results.dropna(subset=['mean_corr_full', 'mean_corr_liftonset'])
    t_stat, p_val = ttest_rel(df_clean['mean_corr_full'], df_clean['mean_corr_liftonset'])
    star_label = sig_label(p_val)

    print("=== Paired T-Test: Full Phase vs. Lift-Onset Phase ===")
    print(f"T-statistic = {t_stat:.3f}, p-value = {p_val:.6f} => {star_label}")

    df_melt = df_clean.melt(
        id_vars=['participant', 'trial'],
        value_vars=['mean_corr_full', 'mean_corr_liftonset'],
        var_name='phase_window', 
        value_name='corr'
    )

    df_melt['phase_window'] = df_melt['phase_window'].map({
        'mean_corr_full': 'Full Phase Correlation',
        'mean_corr_liftonset': 'Lift-Onset Correlation'
    })

    plt.figure(figsize=(8, 6))
    ax = sns.boxplot(x='phase_window', y='corr', data=df_melt, palette="Set2")
    sns.swarmplot(x='phase_window', y='corr', data=df_melt, color='k', alpha=0.6)
    plt.title("Comparison of Synergy Correlation between Phases", fontsize=15)
    plt.ylabel("Mean Pearson Correlation", fontsize=15)
    plt.xlabel("")

    # Set fontsize of x-axis tick labels to 14
    plt.xticks(fontsize=15)
    plt.yticks(fontsize=14)

    # Calculate position for significance annotation between boxes
    full_data = df_melt[df_melt['phase_window'] == 'Full Phase Correlation']['corr']
    liftonset_data = df_melt[df_melt['phase_window'] == 'Lift-Onset Correlation']['corr']
    max_val = max(full_data.max(), liftonset_data.max())
    offset = (df_melt['corr'].max() - df_melt['corr'].min()) * 0.05

    # Place text at x=0.5 (middle between boxes) and y-value above the highest box
    plt.text(0.5, max_val - offset, star_label, ha='center', va='bottom', fontsize=15, color='red')

    plt.tight_layout()
    out_plot = os.path.join(OUTPUT_DIR, "boxplot_full_vs_liftonset.png")
    plt.savefig(out_plot, dpi=300)
    plt.close()
    print(f"[INFO] Saved boxplot comparing Full vs. Lift-Onset to {out_plot}")

# ------------------------------------------------------------------------------------------
# 3) Execute Analysis
# ------------------------------------------------------------------------------------------
def main():
    compare_phase_correlation()

if __name__ == "__main__":
    main()


[INFO] Saved raw correlation comparison to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q3 Anticipatory Information at Lift Onset\mean_correlation_full_vs_liftonset.csv
=== Paired T-Test: Full Phase vs. Lift-Onset Phase ===
T-statistic = -13.952, p-value = 0.000000 => ***



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.boxplot(x='phase_window', y='corr', data=df_melt, palette="Set2")


[INFO] Saved boxplot comparing Full vs. Lift-Onset to C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q3 Anticipatory Information at Lift Onset\boxplot_full_vs_liftonset.png


## Question 5: Mapping of Channels to Muscle and Kinematic Groups

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch

# -----------------------------------------------------------------------------
# 1) User Definitions
# -----------------------------------------------------------------------------

# Participants to process
PARTICIPANTS = ["P(3)"]  # Processing only Participant 1

# Define all trial numbers (1-24)
TRIAL_NUMBERS = list(range(1, 25))

# Grasp types and weight conditions derived from trial_info
GRASP_TYPES = [
    "Precision Grasp (Four Fingers and Thumb)",
    "Lateral Pinch Grasp",
    "Ball Grasp",
    "Precision Grasp (Thumb and Index)",
    "Disc Grip",
    "Power Bar Grasp"
]

WEIGHT_CONDITIONS = [0.25, 0.50]  # in kilograms

# Directories for synergy W files and output results
BASE_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Experimental Data"
OUT_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q5 Biomechanical Linkages\P(3)"
os.makedirs(OUT_DIR, exist_ok=True)

NUM_CHANNELS = 180

# -----------------------------------------------------------------------------
# 2) Define Muscle/Finger Groups
# -----------------------------------------------------------------------------

# Define muscle and finger groups with zero-based channel indices
MUSCLE_MAP_FOREARM = {
    "Brachioradialis Supinator": [0, 1, 2, 13, 14, 15, 26, 27, 28, 39, 40, 41],
    "Pronator Teres": [3, 4, 5, 16, 17, 18, 29, 30, 31, 42, 43, 44],
    "Flexor Digitorum Superficialis": [6, 7, 8, 19, 20, 21, 32, 33, 34, 45, 46, 47],
    "Flexor Carpi Ulnaris": [9, 10, 11, 12, 22, 23, 24, 25, 35, 36, 37, 38, 48, 49, 50, 51],
}

# Updated Thenar Muscle Groups based on final allocation
MUSCLE_MAP_THENAR = {
    "Abductor Pollicis Brevis": (
        list(range(52, 64))    # 52-63
        + list(range(66, 76))  # 66-75
        + list(range(80, 88))  # 80-87
        + list(range(96, 100)) # 96-99
    ),
    "Flexor Pollicis Brevis": (
        list(range(64, 66))    # 64-65
        + list(range(76, 80))  # 76-79
        + list(range(88, 96))  # 88-95
        + list(range(100, 112))# 100-111
    ),
}

MYO_CHANNELS = {
    "ECU_myo": [112],
    "EDC_myo": [113],
    "ECR_myo": [114],
    "Biceps":  [115],
    "Triceps": [116],
}

# Corrected Kinematic Mapping: Channels 117-179 (63 channels)
KINEMATIC_MAP = {
    "Wrist": list(range(117, 120)),           # Channels 117,118,119
    "Thumb": list(range(120, 132)),           # Channels 120-131
    "Index Finger": list(range(132, 144)),    # Channels 132-143
    "Middle Finger": list(range(144, 156)),   # Channels 144-155
    "Ring Finger": list(range(156, 168)),     # Channels 156-167
    "Little Finger": list(range(168, 180)),   # Channels 168-179
}

# Combine all groups into a single dictionary without overlap
GROUPS = {}

# Add Forearm Muscle Groups
for group_name, channels in MUSCLE_MAP_FOREARM.items():
    GROUPS[group_name] = channels

# Add Thenar Muscle Groups
for group_name, channels in MUSCLE_MAP_THENAR.items():
    GROUPS[group_name] = channels

# Add Myo Channels
for group_name, channels in MYO_CHANNELS.items():
    GROUPS[group_name] = channels

# Add Kinematic Groups (Channels 117-179)
for group_name, channels in KINEMATIC_MAP.items():
    GROUPS[group_name] = channels

# Verify that all channel indices are within bounds
for group, channels in GROUPS.items():
    for ch in channels:
        if ch < 0 or ch >= NUM_CHANNELS:
            raise ValueError(f"Channel index {ch} in group '{group}' is out of bounds.")

# Verify no overlapping channels
all_channels = [ch for groups in GROUPS.values() for ch in groups]
if len(all_channels) != len(set(all_channels)):
    overlapping = set([ch for ch in all_channels if all_channels.count(ch) > 1])
    raise ValueError(f"Overlapping channels detected: {overlapping}")
else:
    print("[INFO] No overlapping channels detected.")

# Define placeholder names for all channels (can be customized)
ROW_NAMES = [f"Ch{ch+1}" for ch in range(NUM_CHANNELS)]  # "Ch1" to "Ch180"
assert len(ROW_NAMES) == NUM_CHANNELS

# -----------------------------------------------------------------------------
# 3) Loading Synergy W for Each Participant/Trial (Phase 2 Only)
# -----------------------------------------------------------------------------

def load_synergy_w_phase2(base_dir, participant, trial_idx):
    """
    Loads the synergy matrix W for a given participant and trial from phase2.

    File naming pattern: trial_<trial_number>_phase2_W.npy
    Example: trial_01_phase2_W.npy

    Parameters:
        base_dir (str): Base directory path where synergy files are stored.
        participant (str): Participant identifier (e.g., "P(1)").
        trial_idx (int): Trial number (1-24).

    Returns:
        np.ndarray or None: Synergy matrix W if loaded successfully, else None.
    """
    # Construct the file name based on trial index and phase2
    file_name = f"trial_{trial_idx:02d}_phase2_W.npy"
    w_path = os.path.join(base_dir, participant, "Extracted Synergies", file_name)

    if not os.path.exists(w_path):
        print(f"[WARN] Synergy W not found: {w_path}")
        return None

    try:
        W = np.load(w_path)
    except Exception as e:
        print(f"[ERROR] Failed to load {w_path}: {e}")
        return None

    if W.shape[0] != NUM_CHANNELS:
        print(f"[WARN] Synergy mismatch shape={W.shape} in {w_path}, skipping.")
        return None

    return W  # shape => (180, n_local_synergies)

# -----------------------------------------------------------------------------
# 4) Pool Synergy Matrix by Muscle/Finger Groups (Excluding Zeroed Channels)
# -----------------------------------------------------------------------------

def pool_synergies_by_groups(W, group_dict):
    """
    Pools the synergy matrix W by averaging activations within each muscle/finger group,
    excluding channels that are zeroed out (all-zero activations). For kinematic groups,
    takes the absolute value of activations before averaging.

    Parameters:
        W (np.ndarray): Synergy matrix of shape (num_channels, num_synergies).
        group_dict (dict): Dictionary mapping group names to lists of channel indices (zero-based).

    Returns:
        tuple:
            - np.ndarray: Pooled synergy matrix of shape (num_groups, num_synergies).
            - list: List of group names corresponding to the pooled matrix.
    """
    num_synergies = W.shape[1]
    pooled_data = []
    pooled_labels = []

    for group_name, ch_idxs in group_dict.items():
        if not ch_idxs:
            continue
        subW = W[ch_idxs, :]  # (num_channels_in_group, num_synergies)

        # Identify channels that are not all zero
        non_zero_channels = ~np.all(subW == 0, axis=1)
        num_non_zero = np.sum(non_zero_channels)

        if num_non_zero == 0:
            print(f"[WARN] All channels in group '{group_name}' are zeroed out. Setting pooled synergy to zero.")
            mean_vec = np.zeros(num_synergies)
        else:
            filtered_subW = subW[non_zero_channels, :]  # Exclude zeroed channels
            
            # If the group is a kinematic group, take the absolute value
            if group_name in KINEMATIC_MAP.keys():
                filtered_subW = np.abs(filtered_subW)
                print(f"[INFO] Applied absolute value to kinematic group '{group_name}'.")

            mean_vec = np.mean(filtered_subW, axis=0)  # Average only non-zero channels

        # Debugging: Print the mean vector for kinematic groups
        if group_name in KINEMATIC_MAP.keys():
            print(f"[DEBUG] Group: {group_name}, Mean Synergy: {mean_vec}")

        pooled_data.append(mean_vec)
        pooled_labels.append(group_name)

    if not pooled_data:
        print("[WARN] No groups found to pool.")
        return None, None

    W_pooled = np.vstack(pooled_data)  # (#groups, num_synergies)
    return W_pooled, pooled_labels

# -----------------------------------------------------------------------------
# 5) Plot Synergy Bars - Enhanced Readability (Revised Function)
# -----------------------------------------------------------------------------

def plot_synergy_bars(W_pooled, group_labels, out_fig=None, title="", group_categories=None):
    """
    Plots bar charts for each synergy based on the pooled synergy matrix with enhanced readability.
    Excludes zeroed channels from averaging.

    Parameters:
        W_pooled (np.ndarray): Pooled synergy matrix of shape (num_groups, num_synergies).
        group_labels (list): List of group names corresponding to W_pooled's rows.
        out_fig (str, optional): Path to save the figure. If None, displays the plot.
        title (str, optional): Title for the plot.
        group_categories (dict, optional): Dictionary mapping group names to categories for color-coding.
    """
    sns.set(style="whitegrid")

    num_groups, num_syn = W_pooled.shape

    # Define categories and assign colors
    if group_categories:
        unique_categories = list(set(group_categories.values()))
        # Use a softer palette, e.g., 'Set2' or 'Pastel1'
        palette = sns.color_palette("Set2", len(unique_categories))
        category_color_map = dict(zip(unique_categories, palette))
        group_colors = [category_color_map[group_categories.get(g, 'Other')] for g in group_labels]
    else:
        group_colors = ['skyblue'] * num_groups  # Default color

    # Determine the grid size for subplots (single column)
    cols = 1  # Single column
    rows = num_syn  # One row per synergy

    # Increase figure size based on the number of synergies
    fig_width = 18  # Wider figure for better visibility
    fig_height = 4 * rows  # Taller figure to accommodate larger text
    fig, axes = plt.subplots(rows, cols, figsize=(fig_width, fig_height), sharey=True)

    # If there's only one synergy, axes is not a list
    if num_syn == 1:
        axes = [axes]

    for i in range(num_syn):
        ax = axes[i]
        synergy_vec = W_pooled[:, i]
        x_positions = np.arange(num_groups)

        bars = ax.bar(x_positions, synergy_vec, color=group_colors, alpha=0.9)
        ax.set_title(f"Synergy {i+1}", fontsize=24)

        # Only set x-axis labels for the bottom subplot
        if i == num_syn - 1:
            ax.set_xticks(x_positions)
            ax.set_xticklabels(group_labels, rotation=45, ha='right', fontsize=20)
            ax.set_xlabel("Muscle Groups", fontsize=22)
        else:
            ax.set_xticks([])

        if i == 0:
            ax.set_ylabel("Mean Activation", fontsize=18)

        # Increase tick label sizes
        ax.tick_params(axis='y', labelsize=21)
        ax.grid(True, axis='y', linestyle='--', alpha=0.7)

    # Create legend if group categories are provided
    if group_categories:
        legend_elements = [Patch(facecolor=category_color_map[cat], label=cat) for cat in unique_categories]
        fig.legend(handles=legend_elements, loc='upper left', title='Muscle Categories',
                   fontsize=20, title_fontsize=22, frameon=True)

    plt.suptitle(title, fontsize=26, y=1.02)
    plt.tight_layout(rect=[0, 0, 1, 0.95])

    if out_fig:
        plt.savefig(out_fig, dpi=700, bbox_inches='tight')
        plt.close()
        print(f"[INFO] Saved synergy bar chart => {out_fig}")
    else:
        plt.show()

# -----------------------------------------------------------------------------
# 6) Verification Functions
# -----------------------------------------------------------------------------

def check_synergy_signs(W_pooled, group_labels):
    """
    Checks and prints whether each group has negative synergy values.

    Parameters:
        W_pooled (np.ndarray): Pooled synergy matrix of shape (num_groups, num_synergies).
        group_labels (list): List of group names corresponding to W_pooled's rows.
    """
    for idx, group in enumerate(group_labels):
        synergy = W_pooled[idx]
        has_negative = np.any(synergy < 0)
        has_positive = np.any(synergy > 0)
        if has_negative and has_positive:
            status = "Mixed (Both Positive and Negative)"
        elif has_negative:
            status = "Exclusively Negative"
        elif has_positive:
            status = "Exclusively Positive"
        else:
            status = "All Zero"
        print(f"[INFO] Group: {group}, Synergy Sign Status: {status}")

def plot_synergy_distributions(W_pooled, group_labels, title="Synergy Distributions"):
    """
    Plots the distribution of synergy values for each group.

    Parameters:
        W_pooled (np.ndarray): Pooled synergy matrix of shape (num_groups, num_synergies).
        group_labels (list): List of group names corresponding to W_pooled's rows.
        title (str, optional): Title for the plot.
    """
    for idx, group in enumerate(group_labels):
        synergy = W_pooled[idx]
        plt.figure(figsize=(8, 4))
        sns.histplot(synergy, bins=30, kde=True, color='skyblue')
        plt.title(f"Distribution of Synergy Values for {group}")
        plt.xlabel("Synergy Activation")
        plt.ylabel("Frequency")
        plt.axvline(0, color='red', linestyle='--', linewidth=1)
        plt.show()

# -----------------------------------------------------------------------------
# 7) Trial Information
# -----------------------------------------------------------------------------

def trial_info(trial_number):
    """
    Returns metadata about each trial, including whether the weight distribution
    is Known or Unknown (field 'knowledge' == 'Yes' or 'No').

    Parameters:
        trial_number (int): Trial number (1-24).

    Returns:
        dict or None: Dictionary containing trial metadata or None if invalid trial number.
    """
    protocol = {
        1:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "No"),
        2:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "Yes"),
        3:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "No"),
        4:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "Yes"),
        5:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "No"),
        6:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "Yes"),
        7:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "No"),
        8:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        9:  ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "No"),
        10: ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "Yes"),
        11: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "No"),
        12: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "Yes"),
        13: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "No"),
        14: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        15: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "No"),
        16: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "Yes"),
        17: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "No"),
        18: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "Yes"),
        19: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "No"),
        20: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "Yes"),
        21: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "No"),
        22: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "Yes"),
        23: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "No"),
        24: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "Yes"),
    }
    if trial_number not in protocol:
        return None
    tup = protocol[trial_number]
    return {
        'grasp_type':   tup[0],
        'handle_type':  tup[1],
        'weight_kg':    tup[2],
        'lever_side':   tup[3],
        'knowledge':    tup[4],  # 'Yes' => Known, 'No' => Unknown
    }

# -----------------------------------------------------------------------------
# 8) Main Script: Gather Synergy Columns for Participant 1, Map, Average, Plot
# -----------------------------------------------------------------------------

def main():
    # Define group categories for color-coding
    group_categories_map = {}
    # Forearm Muscles
    forearm_muscle_groups = list(MUSCLE_MAP_FOREARM.keys())
    for group in forearm_muscle_groups:
        group_categories_map[group] = 'Forearm Muscles'
    # Thenar Muscles
    thenar_muscle_groups = list(MUSCLE_MAP_THENAR.keys())
    for group in thenar_muscle_groups:
        group_categories_map[group] = 'Thenar Muscles'
    # Myo Channels
    myo_groups = list(MYO_CHANNELS.keys())
    for group in myo_groups:
        group_categories_map[group] = 'Myo Channels'
    # Kinematic Groups
    kinematic_groups = list(KINEMATIC_MAP.keys())
    for group in kinematic_groups:
        group_categories_map[group] = 'Kinematic Groups'

    # Step 1: Group trials by condition (excluding 'knowledge')
    conditions = {}
    for trial_num in TRIAL_NUMBERS:
        info = trial_info(trial_num)
        if not info:
            print(f"[WARN] Trial information missing for trial {trial_num}, skipping.")
            continue
        # Define condition key excluding 'knowledge'
        condition_key = (
            info['grasp_type'],
            info['handle_type'],
            info['weight_kg'],
            info['lever_side']
        )
        if condition_key not in conditions:
            conditions[condition_key] = []
        conditions[condition_key].append(trial_num)

    print(f"[INFO] Found {len(conditions)} unique conditions.")

    # Step 2: Process each condition
    for condition_key, trial_numbers in conditions.items():
        grasp_type, handle_type, weight_kg, lever_side = condition_key
        condition_name = (
            f"{grasp_type.replace(' ', '_').replace('(', '').replace(')', '').replace('/', '_')}_"
            f"{weight_kg}kg_"
            f"{lever_side.replace(' ', '_')}"
        )
        print(f"\n----- Processing Condition: {condition_name} -----")

        # 1) Gather synergy matrices for all trials of P(1) in this condition (Phase 2 only)
        pooled_W_matrices = []
        for p in PARTICIPANTS:
            for trial_num in trial_numbers:
                W = load_synergy_w_phase2(
                    BASE_DIR,
                    p,
                    trial_num
                )
                if W is None:
                    print(f"[WARN] Missing synergy data for participant {p} in trial {trial_num}, skipping.")
                    continue
                pooled_W_matrices.append(W)  # list of (180, n_syn)

        if not pooled_W_matrices:
            print("[WARN] No synergy data found for this condition. Skipping.")
            continue

        # 2) Aggregate synergy matrices across trials by averaging
        # First, determine the maximum number of synergies across all W matrices
        max_synergies = max(W.shape[1] for W in pooled_W_matrices)
        # Initialize a list to hold padded W matrices
        padded_W_matrices = []
        for W in pooled_W_matrices:
            if W.shape[1] < max_synergies:
                # Pad with zeros to match the maximum number of synergies
                padding = np.zeros((NUM_CHANNELS, max_synergies - W.shape[1]))
                W_padded = np.hstack((W, padding))
            else:
                W_padded = W[:, :max_synergies]  # Truncate if necessary
            padded_W_matrices.append(W_padded)

        # Convert to a 3D array for averaging
        W_stack = np.stack(padded_W_matrices, axis=2)  # shape => (180, max_synergies, num_trials)
        W_mean = np.mean(W_stack, axis=2)  # shape => (180, max_synergies)
        print(f"[INFO] Averaged synergy matrix shape: {W_mean.shape}")

        # 3) Pool at muscle and kinematic group level
        W_pooled, pooled_labels = pool_synergies_by_groups(W_mean, GROUPS)
        if W_pooled is None:
            print("[WARN] Pooling failed. Skipping plotting.")
            continue

        # 4) Assign categories to pooled groups
        pooled_categories = {group: group_categories_map.get(group, 'Other') for group in pooled_labels}

        # 5) Verify Synergy Signs
        check_synergy_signs(W_pooled, pooled_labels)

        # 6) Plot Synergy Distributions (optional)
        # plot_synergy_distributions(W_pooled, pooled_labels)

        # 7) Plot Using the Enhanced Bar Plot
        out_name = f"{condition_name}_pooledSynergies_bars.png"
        out_path = os.path.join(OUT_DIR, out_name)
        plot_title = (
            f"{grasp_type} / {weight_kg}kg / {lever_side} Lever\n"
            f"=> Pooled Synergies (Phase 2)"
        )
        plot_synergy_bars(
            W_pooled, 
            pooled_labels, 
            out_fig=out_path, 
            title=plot_title,
            group_categories=pooled_categories
        )

    print("\n----- Done. Check synergy bar charts in:", OUT_DIR, "-----")

if __name__ == "__main__":
    main()


## Question 6: Task Efficiency and overall Muscle Activation between Known and Unknown Conditions


In [None]:
# -------------------------------------------------------------------
# Analysis of Phase 2 EMG and OTB Data: Comparing Muscle Activation Between
# "Known" and "Unknown" Task Knowledge Conditions
# -------------------------------------------------------------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_rel

# -------------------------------------------------------------------
# 0) Global Configuration
# -------------------------------------------------------------------
BASE_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Experimental Data"
PARTICIPANTS = [1, 2, 3, 4, 5, 6, 7, 8]
TRIALS = range(1, 25)  # Trials 1 to 24
PHASE = "phase2"  # Focus only on Phase 2

# Updated Output directory as specified by the user
OUTPUT_DIR = r"C:\Users\schmi\Documents\Studium\TUM\5. Semester\Masterthesis\Results Statistical Analysis\Q6 Task Efficiency by Muscle Activation"
os.makedirs(OUTPUT_DIR, exist_ok=True)  # Create the directory if it doesn't exist

# -------------------------------------------------------------------
# 1) Helper: trial_info
# -------------------------------------------------------------------
def trial_info(trial_number):
    """
    Returns metadata about each trial, including whether the weight distribution
    is Known or Unknown (field 'knowledge' == 'Yes' or 'No').
    """
    protocol = {
        1:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "No"),
        2:  ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Left Lever",  "Yes"),
        3:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "No"),
        4:  ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Right Lever", "Yes"),
        5:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "No"),
        6:  ("Ball Grasp", "Ball Handle", 0.50, "Left Lever", "Yes"),
        7:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "No"),
        8:  ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        9:  ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "No"),
        10: ("Disc Grip", "Disc Handle", 0.50, "Back Lever", "Yes"),
        11: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "No"),
        12: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Front Lever", "Yes"),
        13: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "No"),
        14: ("Precision Grasp (Four Fingers and Thumb)", "Precision Handle", 0.25, "Front Lever", "Yes"),
        15: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "No"),
        16: ("Lateral Pinch Grasp", "Lateral Pinch Handle", 0.25, "Back Lever", "Yes"),
        17: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "No"),
        18: ("Ball Grasp", "Ball Handle", 0.50, "Front Lever", "Yes"),
        19: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "No"),
        20: ("Precision Grasp (Thumb and Index)", "Precision Handle", 0.25, "Back Lever", "Yes"),
        21: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "No"),
        22: ("Disc Grip", "Disc Handle", 0.50, "Left Lever", "Yes"),
        23: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "No"),
        24: ("Power Bar Grasp", "Power Bar Handle", 0.50, "Right Lever", "Yes"),
    }
    if trial_number not in protocol:
        return None
    tup = protocol[trial_number]
    return {
        'grasp_type':   tup[0],
        'handle_type':  tup[1],
        'weight_kg':    tup[2],
        'lever_side':   tup[3],
        'knowledge':    tup[4],  # 'Yes' => Known, 'No' => Unknown
    }

# -------------------------------------------------------------------
# 2) Helper: Load EMG Data
# -------------------------------------------------------------------
def load_emg_data(participant_dir, trial_idx):
    """
    Loads the EMG data for a given participant and trial from the "Synchronized Data split in Phases" folder.
    
    Assumes that EMG data is stored as .npy files with the following naming convention:
    'match_{trial_idx:02d}_phase2_myo.npy'
    
    Returns:
        emg_data (numpy.ndarray): 2D array with shape (time_points, 5 muscle_channels)
        or None if the file does not exist.
    """
    sync_dir = os.path.join(participant_dir, "Synchronized Data split in Phases")
    emg_file = os.path.join(sync_dir, f"match_{trial_idx:02d}_phase2_myo.npy")
    if not os.path.exists(emg_file):
        return None
    emg_data = np.load(emg_file)
    return emg_data

# -------------------------------------------------------------------
# 3) Helper: Load OTB Data
# -------------------------------------------------------------------
def load_otb_data(participant_dir, trial_idx):
    """
    Loads the OTB data for a given participant and trial from the "Synchronized Data split in Phases" folder.
    
    Assumes that OTB data is stored as .npy files with the following naming convention:
    'match_{trial_idx:02d}_phase2_otb.npy'
    
    Returns:
        otb_data (numpy.ndarray): 2D array with shape (time_points, 112 channels)
        or None if the file does not exist.
    """
    sync_dir = os.path.join(participant_dir, "Synchronized Data split in Phases")
    otb_file = os.path.join(sync_dir, f"match_{trial_idx:02d}_phase2_otb.npy")
    if not os.path.exists(otb_file):
        return None
    otb_data = np.load(otb_file)
    return otb_data

# -------------------------------------------------------------------
# 4) Main Analysis: Phase 2 EMG and OTB Data Comparison
# -------------------------------------------------------------------
def emg_otb_activation_analysis():
    """
    Analyzes Phase 2 EMG and OTB data to determine if task knowledge leads to higher muscle efficiency,
    i.e., lower muscle activation during the manipulation task.
    
    Steps:
        1. Iterates over participants and trials.
        2. Loads EMG and OTB data for Phase 2.
        3. Computes average activation over time and across all muscle channels for both EMG and OTB.
        4. Aggregates data by participant and condition.
        5. Performs paired t-tests between Known and Unknown conditions for both EMG and OTB.
        6. Generates visualizations (box plots) and saves them.
        7. Calculates percentage decrease in activation.
        8. Saves aggregated data, statistical results, and percentage decrease as CSV files.
    """
    activation_rows = []
    
    # ---- Gather EMG and OTB Data from All Participants & Trials ----
    for pid in PARTICIPANTS:
        participant_str = f"P({pid})"
        participant_dir = os.path.join(BASE_DIR, participant_str)
        if not os.path.isdir(participant_dir):
            print(f"[WARN] Missing folder for {participant_str}. Skipping.")
            continue
        
        print(f"\n=== Analyzing {participant_str} ===")
        
        for trial_idx in TRIALS:
            meta = trial_info(trial_idx)
            if meta is None:
                print(f"[INFO] Trial {trial_idx} has no metadata. Skipping.")
                continue
            
            knowledge_flag = meta['knowledge']  # "Yes" => Known, "No" => Unknown
            condition = "Known" if knowledge_flag == "Yes" else "Unknown"
            
            # Load EMG data
            emg_data = load_emg_data(participant_dir, trial_idx)
            if emg_data is None:
                print(f"[INFO] Missing EMG data for Trial {trial_idx}. Skipping.")
                continue
            
            # Load OTB data
            otb_data = load_otb_data(participant_dir, trial_idx)
            if otb_data is None:
                print(f"[INFO] Missing OTB data for Trial {trial_idx}. Skipping.")
                continue
            
            # Verify EMG data shape
            if emg_data.ndim != 2 or emg_data.shape[1] != 5:
                print(f"[WARN] Unexpected EMG data shape for Trial {trial_idx}: {emg_data.shape}. Expected (time_points, 5). Skipping.")
                continue
            
            # Verify OTB data shape
            if otb_data.ndim != 2 or otb_data.shape[1] != 112:
                print(f"[WARN] Unexpected OTB data shape for Trial {trial_idx}: {otb_data.shape}. Expected (time_points, 112). Skipping.")
                continue
            
            # Ensure both datasets have the same number of time points
            if emg_data.shape[0] != otb_data.shape[0]:
                print(f"[WARN] Mismatched time points for Trial {trial_idx}: EMG {emg_data.shape[0]}, OTB {otb_data.shape[0]}. Skipping.")
                continue
            
            # Concatenate EMG and OTB data along the channel axis
            combined_data = np.concatenate((emg_data, otb_data), axis=1)  # Shape: (time_points, 117 channels)
            
            # Compute average activation across time and all muscle channels
            avg_activation = combined_data.mean()  # Single scalar value
            
            # Append to rows
            activation_rows.append({
                'participant': pid,
                'trial_idx': trial_idx,
                'condition': condition,      # 'Known' or 'Unknown'
                'avg_activation': avg_activation
            })
    
    # ---- Convert to DataFrame & Save Raw Data ----
    emg_otb_df_raw = pd.DataFrame(activation_rows)
    raw_csv = os.path.join(OUTPUT_DIR, "phase2_emg_otb_activation_raw.csv")
    emg_otb_df_raw.to_csv(raw_csv, index=False)
    print(f"\n[INFO] Saved raw EMG and OTB activation data to {raw_csv}")
    
    # ---- Aggregation: Average Activation per Participant and Condition ----
    aggregated = emg_otb_df_raw.groupby(['participant', 'condition']).agg(
        avg_activation=('avg_activation', 'mean')
    ).reset_index()
    agg_csv = os.path.join(OUTPUT_DIR, "phase2_emg_otb_activation_agg.csv")
    aggregated.to_csv(agg_csv, index=False)
    print(f"[INFO] Saved aggregated EMG and OTB activation data to {agg_csv}")
    
    # ---- Pivot for Paired T-Tests ----
    pivot = aggregated.pivot(index='participant', columns='condition', values='avg_activation').reset_index()
    
    # Ensure both conditions are present
    pivot = pivot.dropna()
    
    # ---- Perform Paired T-Tests ----
    known = pivot['Known']
    unknown = pivot['Unknown']
    t_stat, p_val = ttest_rel(known, unknown)
    print(f"\nPaired T-Test Results:")
    print(f"t-statistic: {t_stat:.3f}, p-value: {p_val:.3f}")
    
    # ---- Add Significance Label ----
    def significance_label(p):
        if p <= 0.001:
            return '***'
        elif p <= 0.01:
            return '**'
        elif p <= 0.05:
            return '*'
        else:
            return 'ns'
    
    sig_label = significance_label(p_val)
    
    # ---- Calculate Percentage Decrease ----
    # Percentage Decrease = ((Unknown - Known) / Unknown) * 100
    percentage_decrease = ((unknown - known) / unknown) * 100
    
    # Append to DataFrame
    pivot['percentage_decrease'] = percentage_decrease
    
    # Calculate mean and std of percentage decrease
    mean_decrease = percentage_decrease.mean()
    std_decrease = percentage_decrease.std()
    
    print(f"\nPercentage Decrease in Activation (Known vs. Unknown): {mean_decrease:.2f}% ± {std_decrease:.2f}%")
    
    # ---- Save T-Test Results ----
    ttest_results = pd.DataFrame({
        'Statistic': ['t-statistic', 'p-value'],
        'Value': [f"{t_stat:.3f}", f"{p_val:.3f}"],
        'Significance': [sig_label, sig_label]
    })
    ttest_csv = os.path.join(OUTPUT_DIR, "phase2_emg_otb_paired_ttest.csv")
    ttest_results.to_csv(ttest_csv, index=False)
    print(f"[INFO] Saved paired t-test results to {ttest_csv}")
    
    # ---- Save Percentage Decrease Data ----
    percentage_decrease_df = pivot[['participant', 'Known', 'Unknown', 'percentage_decrease']]
    percentage_decrease_csv = os.path.join(OUTPUT_DIR, "phase2_emg_otb_percentage_decrease.csv")
    percentage_decrease_df.to_csv(percentage_decrease_csv, index=False)
    print(f"[INFO] Saved percentage decrease data to {percentage_decrease_csv}")
    
    # ---- Visualization: Box Plot ----
    plt.figure(figsize=(8, 6))
    sns.boxplot(x='condition', y='avg_activation', data=aggregated, palette='Set2')
    sns.stripplot(x='condition', y='avg_activation', data=aggregated, color='black', alpha=0.5)
    plt.title('Average EMG and OTB Activation by Condition (Phase 2)')
    plt.xlabel('Condition')
    plt.ylabel('Average Activation')
    
    # Annotate significance
    max_val = aggregated['avg_activation'].max()
    plt.text(0.5, max_val + 0.05*(max_val), sig_label, ha='center', va='bottom', color='red', fontsize=14)
    
    plt.tight_layout()
    plot_path = os.path.join(OUTPUT_DIR, "phase2_emg_otb_activation_boxplot.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"[INFO] Saved EMG and OTB activation box plot to {plot_path}")
    
    # ---- Visualization: Percentage Decrease Box Plot ----
    plt.figure(figsize=(6, 6))
    sns.boxplot(y='percentage_decrease', data=pivot, palette='Set3')
    sns.stripplot(y='percentage_decrease', data=pivot, color='black', alpha=0.5)
    plt.title('Percentage Decrease in Activation (Known vs. Unknown)')
    plt.ylabel('Percentage Decrease (%)')
    plt.xlabel('')
    
    # Annotate significance
    plt.text(0.0, pivot['percentage_decrease'].max() + 5, sig_label, ha='center', va='bottom', color='red', fontsize=14)
    
    plt.tight_layout()
    percentage_plot_path = os.path.join(OUTPUT_DIR, "phase2_emg_otb_percentage_decrease_boxplot.png")
    plt.savefig(percentage_plot_path, dpi=300)
    plt.close()
    print(f"[INFO] Saved percentage decrease box plot to {percentage_plot_path}")
    
    # ---- Summary Statistics Table and Save as PNG ----
    # Create a summary table with average and std for each condition
    summary_stats = aggregated.groupby('condition').agg(
        Mean_Activation=('avg_activation', 'mean'),
        Std_Activation=('avg_activation', 'std')
    ).reset_index()
    summary_stats = summary_stats.round(3)
    
    # Rename columns for clarity and brevity
    summary_stats.rename(columns={
        'condition': 'Condition',
        'Mean_Activation': 'Mean Act.',
        'Std_Activation': 'Std Act.'
    }, inplace=True)
    
    # Create a figure for the table
    fig, ax = plt.subplots(figsize=(6, 3))  # Adjust size as needed
    ax.axis('off')  # Hide the axes
    
    # Create the table
    table = ax.table(cellText=summary_stats.values,
                     colLabels=summary_stats.columns,
                     cellLoc='center',
                     loc='center')
    
    # Customize table appearance
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 1.5)  # Adjust as needed
    
    # Add a title
    plt.title('Summary Statistics of EMG and OTB Activation (Phase 2)', fontsize=12, pad=20)
    
    # Adjust layout to ensure everything fits
    plt.tight_layout()
    
    # Save the table as PNG
    summary_table_png = os.path.join(OUTPUT_DIR, "phase2_emg_otb_summary_statistics.png")
    plt.savefig(summary_table_png, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"[INFO] Summary statistics table saved as PNG to: {summary_table_png}")
    
    # ---- Summary Statistics for Percentage Decrease ----
    percentage_summary = pd.DataFrame({
        'Mean % Decr.': [mean_decrease],
        'Std % Decr.': [std_decrease]
    })
    percentage_summary = percentage_summary.round(2)
    
    # Create a figure for the percentage summary table
    fig, ax = plt.subplots(figsize=(4, 2))  # Adjust size as needed
    ax.axis('off')  # Hide the axes
    
    # Create the table
    table = ax.table(cellText=percentage_summary.values,
                     colLabels=percentage_summary.columns,
                     cellLoc='center',
                     loc='center')
    
    # Customize table appearance
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 1.5)  # Adjust as needed
    
    # Add a title
    plt.title('Percentage Decrease in Activation', fontsize=12, pad=20)
    
    # Adjust layout to ensure everything fits
    plt.tight_layout()
    
    # Save the table as PNG
    percentage_summary_png = os.path.join(OUTPUT_DIR, "phase2_emg_otb_percentage_decrease_summary.png")
    plt.savefig(percentage_summary_png, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"[INFO] Percentage decrease summary table saved as PNG to: {percentage_summary_png}")

# -------------------------------------------------------------------
# 5) Main Execution
# -------------------------------------------------------------------
def main():
    emg_otb_activation_analysis()

if __name__ == "__main__":
    main()
