In [1]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

import os
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
from scipy.stats import ttest_rel
import seaborn as sns

from coordination.common.config import settings
from coordination.inference.inference_data import InferenceData

In [2]:
# Read scores
finger_tapping_data_path = "/space/paulosoares/data/coordination/2024/datasets/finger_tapping_fnirs_f1.csv"
ping_pong_data_path = "/space/paulosoares/data/coordination/2024/datasets/ping_pong_cooperative_fnirs_f1.csv"

finger_tapping_data_df = pd.read_csv(finger_tapping_data_path, index_col="experiment_id")[["final_sync_proportion"]].rename(columns={"final_sync_proportion": "final_team_score"})
ping_pong_data_df = pd.read_csv(ping_pong_data_path, index_col="experiment_id")[["final_team_score", "final_ai_score"]]

In [5]:
def get_coordination_df(run_id: str) -> pd.DataFrame:
    run_dir = f"{settings.inferences_dir}{run_id}"
    group_sessions = sorted([d for d in os.listdir(run_dir) if os.path.isdir(f"{run_dir}/{d}")])
    data = []
    for group_session in tqdm(group_sessions):       
        idata_filepath = f"{run_dir}/{group_session}/inference_data.pkl"
        if not os.path.exists(idata_filepath):
            continue
            
        with open(idata_filepath, "rb") as f:
            idata = InferenceData(pickle.load(f))
            
        data.append(
            {
                "experiment_id": group_session,
                "coordination": idata.average_posterior_samples(variable_name="coordination", return_std=False).to_numpy()
            }
        )

    return pd.DataFrame(data).set_index("experiment_id")

def signal_to_noise(x: np.ndarray) -> float:
    return x.mean() / x.std()

def p_value_star(p_value: float) -> str:
    star = (
        "***"
        if p_value < 0.001
        else "**"
        if p_value < 0.01
        else "*"
        if p_value < 0.05
        else "?"
        if p_value < 0.1
        else ""
    )

    return f"{p_value:.3f}{star}"

def correlate_all(task_df: pd.DataFrame) -> np.ndarray:
    expanded_df = pd.DataFrame(task_df['coordination'].tolist()).transpose()
    expanded_df.columns = finger_tapping_df.index
    return expanded_df.corr().values

# RQ1. Is coordination different between tasks where participants where interacting versus tasks they were not?

In [9]:
run_id_rest_state = "2024.01.18--15.52.15"
run_id_finger_tapping = "2024.01.18--10.59.47"
run_id_ping_pong = "2024.01.18--15.51.11"

rest_state_df = get_coordination_df(run_id_rest_state)
finger_tapping_df = get_coordination_df(run_id_finger_tapping)
ping_pong_df = get_coordination_df(run_id_ping_pong)

df = pd.concat([rest_state_df, finger_tapping_df, ping_pong_df], axis=1).dropna(axis=0)
df.columns = ["rest_state_coordination", "finger_tapping_coordination", "ping_pong_coordination"]
df.head()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:04<00:00,  4.76it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 36.22it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:01<00:00, 19.49it/s]


Unnamed: 0_level_0,rest_state_coordination,finger_tapping_coordination,ping_pong_coordination
experiment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
exp_2022_09_30_10,"[0.2220217255942336, 0.051553410877073734, 0.0...","[0.17756892445860226, 0.1028554085499791, 0.07...","[0.24606100022102217, 0.16694310918122102, 0.0..."
exp_2022_10_04_09,"[0.7143987626164525, 0.8160629424457705, 0.786...","[0.45108732237293725, 0.4353692378037841, 0.44...","[0.6452405452572341, 0.690141483278611, 0.6879..."
exp_2022_10_14_10,"[0.2694692724858083, 0.09605443559835694, 0.05...","[0.2293717671960898, 0.18193422440334014, 0.13...","[0.280902729770211, 0.23841556902016772, 0.240..."
exp_2022_10_18_10,"[0.368133623613364, 0.2401161660501671, 0.4516...","[0.18149327357293907, 0.12661960233887318, 0.1...","[0.4590513344668553, 0.46056742161675157, 0.38..."
exp_2022_10_28_10,"[0.5348355174045827, 0.579935525488207, 0.4702...","[0.4290073115676535, 0.4149361582839343, 0.426...","[0.5481089877347783, 0.5953445946782795, 0.416..."


In [12]:
# Compute mean, std and signal to noise ratio for each coordination series and task
data = []
for i, row in df.iterrows():
    data.append(
        {
            "rest_state_mean": row["rest_state_coordination"].mean(),
            "rest_state_median": np.median(row["rest_state_coordination"]),
            "rest_state_std": row["rest_state_coordination"].std(),
            "rest_state_s2n": signal_to_noise(row["rest_state_coordination"]),
            "finger_tapping_mean": row["finger_tapping_coordination"].mean(),
            "finger_tapping_median": np.median(row["finger_tapping_coordination"]),
            "finger_tapping_std": row["finger_tapping_coordination"].std(),
            "finger_tapping_s2n": signal_to_noise(row["finger_tapping_coordination"]),
            "ping_pong_mean": row["ping_pong_coordination"].mean(),
            "ping_pong_median": np.median(row["ping_pong_coordination"]),
            "ping_pong_std": row["ping_pong_coordination"].std(),
            "ping_pong_s2n": signal_to_noise(row["ping_pong_coordination"]),
        }
    )
summaries_df = pd.DataFrame(data)
summaries_df.head()

Unnamed: 0,rest_state_mean,rest_state_median,rest_state_std,rest_state_s2n,finger_tapping_mean,finger_tapping_median,finger_tapping_std,finger_tapping_s2n,ping_pong_mean,ping_pong_median,ping_pong_std,ping_pong_s2n
0,0.138398,0.032319,0.191317,0.723393,0.168431,0.128029,0.093549,1.80047,0.099515,0.034907,0.110799,0.89816
1,0.699363,0.851535,0.344686,2.028988,0.465573,0.458231,0.018444,25.241937,0.293702,0.318913,0.199495,1.472225
2,0.066637,0.005568,0.130848,0.509268,0.274149,0.34611,0.146578,1.870324,0.131431,0.054776,0.159021,0.8265
3,0.066968,0.001531,0.147358,0.45446,0.094168,0.087085,0.05251,1.793341,0.108184,0.070087,0.11189,0.966877
4,0.088361,0.046488,0.101014,0.874741,0.390078,0.376729,0.031091,12.546462,0.078045,0.014379,0.129257,0.603799


In [13]:
data = []
for task in ["finger_tapping", "ping_pong"]:
    data.append(
        {
            "task": task,
            "p_val_mean_diff": p_value_star(ttest_rel(summaries_df["rest_state_mean"], summaries_df[f"{task}_mean"]).pvalue),
            "p_val_mean_greater": p_value_star(ttest_rel(summaries_df["rest_state_mean"], summaries_df[f"{task}_mean"], alternative="less").pvalue),
            "p_val_median_diff": p_value_star(ttest_rel(summaries_df["rest_state_median"], summaries_df[f"{task}_median"]).pvalue),
            "p_val_median_greater": p_value_star(ttest_rel(summaries_df["rest_state_median"], summaries_df[f"{task}_median"], alternative="less").pvalue),
            "p_val_std_diff": p_value_star(ttest_rel(summaries_df["rest_state_std"], summaries_df[f"{task}_std"]).pvalue),
            "p_val_std_less": p_value_star(ttest_rel(summaries_df["rest_state_std"], summaries_df[f"{task}_std"], alternative="greater").pvalue),
            "p_val_s2n_diff": p_value_star(ttest_rel(summaries_df["rest_state_s2n"], summaries_df[f"{task}_s2n"]).pvalue),
            "p_val_s2n_greater": p_value_star(ttest_rel(summaries_df["rest_state_s2n"], summaries_df[f"{task}_s2n"], alternative="less").pvalue),
        }
    )
stats_df = pd.DataFrame(data)
stats_df.head(30)

Unnamed: 0,task,p_val_mean_diff,p_val_mean_greater,p_val_median_diff,p_val_median_greater,p_val_std_diff,p_val_std_less,p_val_s2n_diff,p_val_s2n_greater
0,finger_tapping,0.184,0.092?,0.075?,0.037*,0.009**,0.004**,0.058?,0.029*
1,ping_pong,0.212,0.894,0.580,0.710,0.023*,0.011*,0.896,0.448


# RQ2. Is coordination similar in shape across teams in the same task?

In [14]:
run_id_rest_state = "2024.01.18--15.52.15"
run_id_finger_tapping = "2024.01.18--10.59.47"
run_id_ping_pong = "2024.01.18--15.51.11"

finger_tapping_df = get_coordination_df(run_id_finger_tapping)
ping_pong_df = get_coordination_df(run_id_ping_pong)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 34.87it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:01<00:00, 19.26it/s]


In [15]:
rest_state_corr = np.array(correlate_all(rest_state_df)[np.triu_indices(3, k=1)])
finger_tapping_corr = np.array(correlate_all(finger_tapping_df)[np.triu_indices(3, k=1)])
ping_pong_corr = np.array(correlate_all(ping_pong_df)[np.triu_indices(3, k=1)])
corr_df = pd.DataFrame({"Rest State": rest_state_corr, "Finger Tapping": finger_tapping_corr, "Ping-Pong": ping_pong_corr})

sns.boxplot(pd.melt(corr_df, id_vars=[], var_name='Task', value_name='Correlation'), x="Task", y="Correlation")

ValueError: Length mismatch: Expected axis has 20 elements, new values have 23 elements

# RQ3. Is coordination similar in shape across teams in the same task with similar performance?

In [None]:
finger_tapping

corr_and_performance_df = corr_df.copy()
corr_and_performance_df["Performance"] = 