# Behavioral analyses

Extract behaviorally relevant data from the `events.tsv` files and provide them in Pandas dataframe format.

In [None]:
%matplotlib inline

In [None]:
import itertools
import os
import os.path as op
import shutil
from tempfile import mkdtemp

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pingouin
import scipy
import seaborn as sns
from scipy import stats

from utils import (
    BIDS_ROOT,
    TASK_NAME_MAP,
    add_binned_outcomes_to_df,
    events_to_behav_data,
    get_df_bnt,
    task_not_present_for_subject,
)

# Get sample description

In [None]:
participants_tsv = op.join(BIDS_ROOT, "participants.tsv")
participants_tsv = pd.read_csv(participants_tsv, sep="\t")

nsubjs = len(participants_tsv)
nfemale = int((participants_tsv["sex"] == "f").sum())
mean_age = participants_tsv["age"].mean().round(1)
sd_age = participants_tsv["age"].std().round(1)
age_range = (participants_tsv["age"].min(), participants_tsv["age"].max())

print(f"{nsubjs} participants, {nfemale} female")
print(
    f"mean age {mean_age} ± {sd_age} years, range {age_range[0]}-{age_range[1]} years"
)

# Get behavioral data

- Need to specify `BIDS_ROOT` in `utils.py`, a path to the bids directory with the participant data
- Will collect all `events.tsv` files and save them into a temporary directory
- Will read the `events.tsv` files, prune them, calculate some summaries, and clean error rows
- Will append all data and save as a single dataframe


In [None]:
subjects = list(range(1, 41))

tasks = ["ActiveFixed", "ActiveVariable", "YokedFixed", "YokedVariable", "description"]

# Where to save the behavioral data
beh_fname = "behavioral_data.csv"
beh_fpath = op.join(BIDS_ROOT, "code", beh_fname)

In [None]:
if op.exists(beh_fpath):
    print(
        'Data already exists at "{}"\nSkipping the "get" procedure ...'.format(
            beh_fpath
        )
    )
else:
    # where to temporarily save behavioral data
    tmp_beh_dir = mkdtemp()
    os.makedirs(tmp_beh_dir, exist_ok=True)

    # copy all events.tsv files to the new behavioral dir
    for root, dirs, files in os.walk(BIDS_ROOT):
        for file in files:
            if (
                "sourcedata" not in root
                and tmp_beh_dir not in root
                and "events.tsv" in file
            ):
                print(file)
                fpath = op.join(root, file)
                dest = op.join(tmp_beh_dir, file)
                shutil.copyfile(fpath, dest)

    # Append all dataframe into a single one
    dfs = list()
    for subj_task_tuple in itertools.product(subjects, tasks):

        # If this combination of subject and task is not present,
        # skip to the next iteration of the loop
        if task_not_present_for_subject(*subj_task_tuple):
            continue

        # Load df
        fname = "sub-{:02}_task-{}_events.tsv".format(*subj_task_tuple)
        fpath = op.join(tmp_beh_dir, fname)
        df = events_to_behav_data(fpath)

        # Add subjects and task column
        df["subject"] = subj_task_tuple[0]
        df["task"] = TASK_NAME_MAP[subj_task_tuple[1]]

        # Reorder columns
        cols = list(df)
        cols.insert(0, cols.pop(cols.index("subject")))
        cols.insert(1, cols.pop(cols.index("task")))
        cols.insert(2, cols.pop(cols.index("n_samples")))
        df = df.loc[:, cols]

        dfs.append(df)

    df = pd.concat(dfs, ignore_index=True)

    # Add BNT data
    df_bnt = get_df_bnt(BIDS_ROOT)
    tmp = df.merge(df_bnt[["subject", "bnt_quartile"]], on="subject")
    pd.testing.assert_frame_equal(df, tmp.loc[:, "subject":"timestamp_outcome"])
    df = tmp.copy()

    # Add binned outcomes and orth binned outcomes
    df = add_binned_outcomes_to_df(df)

    # Save DF
    df.to_csv(beh_fpath, na_rep="n/a", index=False)

    # Delete the temporary behavioral data dir
    shutil.rmtree(tmp_beh_dir)

In [None]:
df = pd.read_csv(beh_fpath)

# Difficulty of active versus yoked sequences

- One argument against the yoking scheme is based on half of participants being "yoked-to-self", and the other half being "yoked-to-other"
- For yoked-to-self, "active" versus "yoked" sequences are matched in terms of difficulty
- However, how is this for yoked-to-other?



In [None]:
# Add a "yoked_to_self" (True/False) column to the overall behavioral data
participants_tsv["yoked_to_self"] = (
    participants_tsv["participant_id"] == participants_tsv["yoked_to"]
)
yoked_to_self = [
    int(i)
    for i in participants_tsv[participants_tsv["yoked_to_self"] == True][
        "participant_id"
    ].str[4:]
]

df["yoked_to_self"] = False
df.loc[df["subject"].isin(yoked_to_self), "yoked_to_self"] = True

In [None]:
tmpdf = df[
    (df["yoked_to_self"] == False) & (df["task"].isin(["AF", "AV", "YF", "YV"]))
][["subject", "task", "trial", "exp_ev0", "exp_ev1"]].drop_duplicates()
tmpdf["exp_ev_diff"] = np.abs(tmpdf["exp_ev0"] - tmpdf["exp_ev1"])
tmpdf = (
    tmpdf.groupby(["subject", "task"])
    .mean()
    .reset_index()[["subject", "task", "exp_ev_diff"]]
)
tmpdf["sampling"] = tmpdf["task"].map(
    {"AF": "active", "AV": "active", "YF": "yoked", "YV": "yoked"}
)
tmpdf["stopping"] = tmpdf["task"].map(
    {"AF": "fixed", "AV": "variable", "YF": "fixed", "YV": "variable"}
)

tmpdf.head()

In [None]:
fig, ax = plt.subplots()

order = ["active", "yoked"]
hue = "stopping"
hue_order = ["fixed", "variable"]

sns.swarmplot(
    x="sampling",
    y="exp_ev_diff",
    data=tmpdf,
    order=order,
    hue=hue,
    hue_order=hue_order,
    ax=ax,
    dodge=True,
)
sns.pointplot(
    x="sampling",
    y="exp_ev_diff",
    data=tmpdf,
    order=order,
    hue=hue,
    hue_order=hue_order,
    ax=ax,
    ci=68,
    join=True,
)
ax.set(ylabel="Mean expected value difference", title="Error bars are SEM")
handles, labels = ax.get_legend_handles_labels()
lh = plt.legend(handles[2:], labels[2:])
lh.set_title(hue)

fig

In [None]:
fstats = pingouin.mixed_anova(
    data=tmpdf,
    dv="exp_ev_diff",
    within="sampling",
    between="stopping",
    subject="subject",
)
fstats

# bonus money earned

see also:
- https://github.com/sappelhoff/sp_experiment/blob/1c750597fbc7556d503f86eaaa440efae15091ab/sp_experiment/define_settings.py#L127
- https://github.com/sappelhoff/sp_experiment/blob/1c750597fbc7556d503f86eaaa440efae15091ab/sp_experiment/utils.py#L67-L141

do:

- for each subj:
- calculate points per task
- multiply with `exchange_rate` and round up to integer
- do for all tasks, and pay out the sum

In [None]:
exchange_rate = 0.005

In [None]:
tmp = df.drop_duplicates(subset=["subject", "task", "trial"])
assert len(tmp) == 40 * 3 * 100  # 40 subjs, 3 tasks, 100 trials each
tmp = tmp[["subject", "task", "trial", "fin_outcome"]]
tmp.head()

In [None]:
all_payoffs = []
for subj in tmp["subject"].unique():

    total_money = []
    tmp_subj = tmp[tmp["subject"] == subj]

    for task in tmp_subj["task"].unique():

        tmp_subj_task = tmp_subj[tmp_subj["task"] == task]
        assert len(tmp_subj_task) == 100

        points = np.sum(tmp_subj_task["fin_outcome"])
        money = int(np.ceil(points * exchange_rate))
        total_money.append(money)

    all_payoffs.append(np.sum(total_money))

assert len(all_payoffs) == 40

In [None]:
print(
    f"Participants earned {np.mean(all_payoffs):.2f} ± {np.std(all_payoffs):.2f} € on average"
)

# Descriptives about "error trials"

In [None]:
tmp = df[["subject", "task", "trial", "nerrors"]]
tmp = tmp.drop_duplicates(subset=["subject", "task", "trial"])
tmp = tmp[tmp["task"] != "DESC"]

tmp.groupby(["subject"])["nerrors"].sum().agg(["mean", "median", "sem"]).round(1)

In [None]:
# in percent out of 100 active and 100 yoked trials
perc_restarted = np.round(((6.5 / 200) * 100))
print(
    f"On average {perc_restarted}% of trials per participant were restarted due to lack of fixation,\n"
    f"or failure to draw a sample within 3 seconds"
)

# What was the average number of samples?

NOTE: Need to calculate "sample" column +1 because it is 0-indexed

In [None]:
df_nsamples = df[df["task"] == "AV"].drop_duplicates(["subject", "trial"])
df_nsamples = df_nsamples[["subject", "task", "trial", "n_samples"]]
df_nsamples = df_nsamples.reset_index(drop=True)
df_nsamples.head()

In [None]:
with sns.plotting_context("talk"):
    fig, ax = plt.subplots(figsize=(10, 5))
    sns.histplot(
        data=df_nsamples, x="n_samples", ax=ax, bins=19, discrete=True, kde=True
    )
    ax.set(xticks=np.arange(20), xlabel="Number of samples", ylabel="Count")

    sns.despine(fig)

In [None]:
mean_samples = df_nsamples["n_samples"].mean()
print(
    f"mean {mean_samples:.1f}, SD {df_nsamples['n_samples'].std():.1f}",
)
print(f"median {np.median(df_nsamples['n_samples']):.1f}")
print(f"mode {scipy.stats.mode(df_nsamples['n_samples'])[0][0]:.1f}")

# What were the experienced and true accuracies in the conditions?

- experienced accuracy = accuracy judged according to what was seen
- true accuracy = accuracy judged based on the underlying, true, distributions

NOTE: Need to remove "ambiguous trials", where one of the following conditions was met:

- only one option was sampled
- the expected values for both options were equal

## Calculate accuracies

In [None]:
# Calculate an accuracy dataframe for each accuracy type ... to eventually concatenate them
dfs = list()
for accuracy_type in ["experienced", "true"]:

    tmp_df = df.copy()

    if accuracy_type == "experienced":
        ev_to_base_on0 = "exp_ev0"
        ev_to_base_on1 = "exp_ev1"
    elif accuracy_type == "true":
        ev_to_base_on0 = "true_ev0"
        ev_to_base_on1 = "true_ev1"

    # ambiguous rows, where the experienced EVs are equal
    ev_equal_rows = (tmp_df["exp_ev0"] == tmp_df["exp_ev1"]).to_numpy()
    tmp_df["evs_equal"] = ev_equal_rows

    # ambiguous rows, where only one option was sampled
    # NOTE: this will produce wrong data for DESC task
    # (see https://github.com/sappelhoff/sp_experiment/issues/8)
    only_left_sampled = (
        tmp_df.groupby(["subject", "trial", "task"])["action"].mean() == 0
    )
    only_right_sampled = (
        tmp_df.groupby(["subject", "trial", "task"])["action"].mean() == 1
    )
    single_option_sampled = np.logical_or(only_left_sampled, only_right_sampled)

    # merge the single option rows into tmp_df
    single_option_sampled = single_option_sampled.reset_index()
    single_option_sampled = single_option_sampled.rename(
        {"action": "single_option_sampled"}, axis=1
    )
    tmp_df = pd.merge(
        tmp_df,
        single_option_sampled,
        on=["subject", "trial", "task"],
        validate="many_to_one",
    )

    # NOTE: For description task, trials are never ambiguous for cases were only a single option was sampled
    # because these cases were replaced with descriptions of true underlying distributions
    # rather than descriptions of the experienced distributions
    # Still, we mark them as ambiguous to prevent analyzing data from two different sources (experienced vs true)
    # For each subject, overwrite "single_option_sampled" column for DESC task with the
    # corresponding data from the active task (either AV or AF, depending on subject)
    for subj in range(1, 41):
        idx_active = (tmp_df["subject"] == subj) & (
            ~tmp_df["task"].isin(["DESC", "YV", "YF"])
        )
        idx_desc = (tmp_df["subject"] == subj) & (tmp_df["task"] == "DESC")
        data = (
            tmp_df[idx_active]
            .drop_duplicates(subset="trial")["single_option_sampled"]
            .to_numpy()
        )
        tmp_df.loc[idx_desc, "single_option_sampled"] = data

    # ambiguous rows overall (equal EVs, or single option sampled)
    tmp_df["ambiguous_trial"] = np.logical_or(
        tmp_df["evs_equal"], tmp_df["single_option_sampled"]
    ).to_numpy()

    # Group data by subject, task, and trial ... taking the mean over samples
    # --> the mean should leave "fin_action", "ambiguous_trial" and EVs unchanged, ...
    # because they are the same over samples
    tmp_df = (
        tmp_df.groupby(["subject", "task", "trial"])[
            [ev_to_base_on0, ev_to_base_on1, "ambiguous_trial", "fin_action"]
        ]
        .mean()
        .reset_index()
    )

    # Remove ambiguous trials
    tmp_df = tmp_df[~tmp_df["ambiguous_trial"]]

    # Add column when correct choice was made
    right_better = (tmp_df[ev_to_base_on0] < tmp_df[ev_to_base_on1]).to_numpy()
    right_selected = (tmp_df["fin_action"] == 1).to_numpy()
    tmp_df["correct_choice"] = right_better == right_selected

    # Calculate accuracy as mean correct choices over trials per subject and task
    accuracy_df = (
        tmp_df.groupby(["subject", "task"])["correct_choice"].mean().reset_index()
    )
    accuracy_df["accuracy_type"] = accuracy_type
    dfs.append(accuracy_df)

accuracy_df = pd.concat(dfs, ignore_index=True)
accuracy_df.head()

In [None]:
fname_beh_acc = op.join(BIDS_ROOT, "code", "beh_accs.csv")
accuracy_df.to_csv(fname_beh_acc, index=False)

## Sanity check for yoking scheme: Comparing "yoked to self" and "yoked to other"

in "yoked", watching the stream of a different person may be different from watching our own stream

... are the accuracies different between these groups?

In [None]:
# get list of subj ids who were yoked to themselves
yoked_to_self = participants_tsv["participant_id"][
    participants_tsv["participant_id"] == participants_tsv["yoked_to"]
].to_list()
yoked_to_self = [int(i[-2:]) for i in yoked_to_self]

In [None]:
# add a column to accuracy df
accuracy_df["yoked_to"] = "n/a"
accuracy_df.loc[accuracy_df["subject"].isin(yoked_to_self), "yoked_to"] = "self"
accuracy_df.loc[~accuracy_df["subject"].isin(yoked_to_self), "yoked_to"] = "other"

In [None]:
# print out a summary
accuracy_df.groupby(["task", "yoked_to", "accuracy_type"])["correct_choice"].describe()

In [None]:
# visualize as 2x2x2 catplot
tmp = accuracy_df[
    (accuracy_df["task"] != "DESC") & (accuracy_df["accuracy_type"] == "experienced")
]

tmp.insert(
    2, "sampling", ["active" if i == "A" else "yoked" for i in tmp["task"].str[0]]
)
tmp.insert(
    3, "stopping", ["fixed" if i == "F" else "variable" for i in tmp["task"].str[1]]
)

grid = sns.catplot(
    kind="point",
    ci=68,
    dodge=True,
    x="sampling",
    y="correct_choice",
    hue="stopping",
    data=tmp,
    col="yoked_to",
)

In [None]:
tmp.groupby("yoked_to")["correct_choice"].describe()

In [None]:
xname = "self"
yname = "other"
x = tmp[tmp["yoked_to"] == xname]["correct_choice"].to_numpy()
y = tmp[tmp["yoked_to"] == yname]["correct_choice"].to_numpy()
model = pingouin.ttest(x, y, paired=False)

print(
    f"ttest\nMean accuracies\n{xname}: {np.round(x.mean(), 2)}, {yname}: {np.round(y.mean(),2)}"
)
print(
    f"t({model['dof'][0]})={np.round(model['T'][0], 3)}, p={np.round(model['p-val'][0], 3)}"
)
display(model)

### Use rpy2 for mixed anova

see: https://www.marsja.se/r-from-python-rpy2-tutorial/

In [None]:
import rpy2.robjects as ro
import rpy2.robjects.packages as rpackages
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

In [None]:
# convert pandas DF to R data.frame
with localconverter(ro.default_converter + pandas2ri.converter):
    r_from_pd_df = ro.conversion.py2rpy(tmp)

r_from_pd_df.head()

In [None]:
afex = rpackages.importr("afex")

In [None]:
## see: https://cran.r-project.org/web/packages/afex/afex.pdf
model = afex.aov_ez(
    id="subject",
    dv="correct_choice",
    between=["stopping", "yoked_to"],
    within="sampling",
    data=r_from_pd_df,
    check_contrasts=True,  # 3-way interaction is unaffected by this, "True" is recommended
    type=3,  # using so-called type 3 sums of squares
    print_formula=True,
)

print(model)

## Overview plot

In [None]:
# add sampling and stopping cols to DF
sampstop_data = np.array([[i[0], i[1]] for i in accuracy_df["task"].to_list()])
sampstop_data[(sampstop_data == "D") | (sampstop_data == "E")] = ""
accuracy_df["sampling"] = sampstop_data[:, 0]
accuracy_df["stopping"] = sampstop_data[:, 1]

In [None]:
# save data for publication plots
plotdir = op.join(BIDS_ROOT, "code", "publication_plots")
os.makedirs(plotdir, exist_ok=True)

fname = "beh_accuracies.csv"
fname = op.join(plotdir, fname)

tmp = accuracy_df[accuracy_df["accuracy_type"] == "experienced"]
tmp = tmp[tmp["task"] != "DESC"]
tmp["sampling"] = tmp["sampling"].map({"A": "Active", "Y": "Yoked"})
tmp["stopping"] = tmp["stopping"].map({"V": "Variable", "F": "Fixed"})

tmp.to_csv(fname, index=False)

In [None]:
with sns.plotting_context("notebook"):
    fig, ax = plt.subplots()

    data = tmp
    order = ["Active", "Yoked"]
    hue_order = ["Fixed", "Variable"]

    sns.pointplot(
        x="sampling",
        y="correct_choice",
        hue="stopping",
        data=data,
        ci=68,
        dodge=0.2,
        order=order,
        hue_order=hue_order,
        ax=ax,
        markers="o",
    )

    sns.swarmplot(
        x="sampling",
        y="correct_choice",
        hue="stopping",
        data=data,
        order=order,
        hue_order=hue_order,
        ax=ax,
        dodge=True,
        size=4,
    )

    # add legend
    # https://matplotlib.org/3.1.1/gallery/text_labels_and_annotations/custom_legends.html
    from matplotlib.lines import Line2D

    legend_elements = [
        Line2D([0], [0], color=sns.color_palette()[0], marker="o", label="Fixed"),
        Line2D([0], [0], color=sns.color_palette()[1], marker="o", label="Variable"),
    ]

    ax.legend(
        handles=legend_elements,
        loc="lower left",
        framealpha=1,
        bbox_to_anchor=(0, 1),
        ncol=2,
        title="Stopping",
    )

    ax.set_ylim((0.5, 1.0))
    ax.set_ylabel("accuracy")

    # https://stackoverflow.com/a/51157346/5201771
    # 2-4 3-5
    for i_dots, (idx0, idx1) in enumerate([(2, 4), (3, 5)]):
        locs1 = ax.get_children()[idx0].get_offsets()
        locs2 = ax.get_children()[idx1].get_offsets()

        # Need to sort locs, so data corresponds
        sort_idxs_list = []
        sampling = order[i_dots]
        for stopping in hue_order:
            arr = data[(data["sampling"] == sampling) & (data["stopping"] == stopping)][
                "correct_choice"
            ].to_numpy()
            sort_idxs_list += [np.argsort(arr)]

        locs2_sorted = locs2[sort_idxs_list[1].argsort()][sort_idxs_list[0]]

        for i in range(locs1.shape[0]):
            _x = [locs1[i, 0], locs2_sorted[i, 0]]
            _y = [locs1[i, 1], locs2_sorted[i, 1]]
            ax.plot(_x, _y, color="black", alpha=0.1)

    # ax.set_yticks(np.arange(0.6, 0.95, 0.02))
    # ax.grid('on')
    ax.set_xlabel("Sampling")
    ax.set_ylabel("Accuracy")

## statistical testing

In [None]:
# Summary statistics
accuracy_df.groupby(["task", "accuracy_type"])["correct_choice"].describe()

In [None]:
# Mean +- SEM for specific selection


def sns_ci(a):
    """Helper to get seaborn 68% ci (~SEM)."""
    it = sns.utils.ci(sns.algorithms.bootstrap(a), 68)
    return np.mean([np.abs(a.mean() - it[0]), np.abs(a.mean() - it[1])])


accuracy_df[
    (accuracy_df["accuracy_type"] == "experienced") & (accuracy_df["task"] != "DESC")
].groupby(["task", "accuracy_type"]).agg(
    {"correct_choice": [np.mean, scipy.stats.sem, sns_ci]}
).round(
    3
).reset_index()

### mixed anova

In [None]:
accuracy_type = "experienced"

tt = accuracy_df.loc[
    (accuracy_df["task"] != "DESC") & (accuracy_df["accuracy_type"] == accuracy_type),
    :,
]

# calculate model
model = pingouin.mixed_anova(
    data=tt,
    dv="correct_choice",
    within="sampling",
    between="stopping",
    subject="subject",
).round(3)

display(model)

# calculate posthocs

stats = pingouin.pairwise_ttests(
    data=tt,
    padjust="bonf",
    dv="correct_choice",
    within="sampling",
    between="stopping",
    subject="subject",
    within_first=False,
    effsize="cohen",
).round(3)

display(stats)

## single subj scatterplot

with **active** task on x-axis and **yoked** task on y-axis

In [None]:
# Collapse df to wide format
dd = pd.pivot_table(
    accuracy_df,
    index=["subject", "accuracy_type"],
    values="correct_choice",
    columns="task",
)
dd = dd.reset_index()

In [None]:
def _label_point(x, y, val, ax):
    """Label points in a plot: https://stackoverflow.com/a/46028674/5201771."""
    a = pd.concat({"x": x, "y": y, "val": val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point["x"], point["y"], str(int(point["val"])), fontsize=10)

In [None]:
# idxs for FIXED vs VARIABLE tasks
variable_idx = dd["subject"].isin(range(2, 41, 2)).to_numpy()
fixed_idx = dd["subject"].isin(range(1, 41, 2)).to_numpy()

In [None]:
# Plot single subject accuracies
fig, axs = plt.subplots(1, 2, figsize=(10, 6), sharex=True, sharey=False)

for ax, taskset, xyidx in zip(
    axs, [["AF", "YF"], ["AV", "YV"]], [fixed_idx, variable_idx]
):
    x, y = taskset
    legend = "brief" if x == "AF" else False
    sns.scatterplot(
        x=x,
        y=y,
        hue="accuracy_type",
        data=dd,
        ax=ax,
        legend=legend,
        palette=sns.color_palette()[2:4],
        markers=".",
        hue_order=["true", "experienced"],
    )

    _label_point(dd.loc[xyidx, x], dd.loc[xyidx, y], dd.loc[xyidx, "subject"], ax)

    if legend:
        ax.legend(loc=2)


axs.flat[0].set_xlim((0.5, 1))
axs.flat[0].set_ylim((0.5, 1))
axs.flat[1].set_ylim((0.5, 1))


fig.tight_layout()
plt.suptitle("Single subject accuracies", y=1.02)

# Is "numeracy" correlated with n_samples or accuracy?

- According to Peters2012 --> more numeracy, more n_samples

scoring according to:
- Cokely, E. T., Galesic, M., Schulz, E., Ghazal, S., & Garcia-Retamero, R. (2012). Measuring Risk Literacy: The Berlin Numeracy Test. Judgment and Decision Making. https://doi.org/10.1037/t45862-000

Dividing sample into 4 quartiles through "adaptive scoring":

- quartile 1 --> got 1 wrong, 2a wrong 
- quartile 2 --> got 1 wrong, 2a right
- quartile 3 --> got 1 right, 2b wrong, 3 wrong
- quartile 4 --> got 1 right, 2b right ... OR 2b wrong, but 3 right

The 4th quartile has the highest numeracy skill


The adaptive scoring questions 1, 2a, 2b, and 3 correspond to q1, q4, q2, q3 in our data.


In [None]:
df_bnt = get_df_bnt(BIDS_ROOT)

In [None]:
# sanity check: high quartiles should also have an overall higher number
# of correctly answered questions
fig, ax = plt.subplots()
sns.pointplot(x="bnt_quartile", y="bnt_n_correct", data=df_bnt, ci=68, ax=ax)
ax.set_title("Number of correctly answererd questions " "for each scoring quartile")

In [None]:
# Sanity check 2: quartiles 1+2 should be around 50% of all cases if this
# sample is comparable to the sample the original BNT study was based on
np.sum(df_bnt["bnt_quartile"] <= 2) / len(df_bnt)

In [None]:
fig, ax = plt.subplots()
sns.histplot(x="bnt_quartile", data=df_bnt, ax=ax)

## numeracy and n_samples

In [None]:
tmp = df_nsamples.groupby("subject").mean().reset_index()
tmp = tmp.merge(df_bnt[["subject", "bnt_quartile"]], on="subject")
tmp = df_nsamples.merge(df_bnt[["subject", "bnt_quartile"]], on="subject")
tmp.head()

In [None]:
fig, ax = plt.subplots()
sns.pointplot(
    x="bnt_quartile",
    y="n_samples",
    data=tmp,
    ci=68,
    ax=ax,
    estimator=np.mean,
    color="blue",
)
sns.pointplot(
    x="bnt_quartile",
    y="n_samples",
    data=tmp,
    ci=68,
    ax=ax,
    estimator=np.median,
    color="red",
)
sns.swarmplot(x="bnt_quartile", y="n_samples", data=tmp, ax=ax, size=0.5, color="black")
ax.set_title("Mean (blue) and Median (red)")

## numeracy and accuracy

In [None]:
tmp = accuracy_df.merge(df_bnt[["subject", "bnt_quartile"]], on="subject")
tmp = tmp[tmp["task"] != "DESC"]

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), sharex=True, sharey=True)

for ax, acctype in zip([ax1, ax2], ["experienced", "true"]):

    sns.pointplot(
        x="bnt_quartile",
        y="correct_choice",
        data=tmp[tmp["accuracy_type"] == acctype],
        ci=68,
        dodge=True,
        hue="task",
        hue_order=["AV", "AF", "YV", "YF"],
        markers=["o", "x", "o", "x"],
        linestyles=["-", "--", "-", "--"],
        palette=[
            sns.color_palette()[0],
            sns.color_palette()[0],
            sns.color_palette()[1],
            sns.color_palette()[1],
        ],
        ax=ax,
    )
    ax.set_title(acctype)

In [None]:
# Correlate accuracy (averaged over active/yoked per subj) and BNT
bnt_acc_corr_df = (
    tmp.groupby("subject")[["correct_choice", "bnt_quartile"]].mean().reset_index()
)

pingouin.correlation.corr(
    bnt_acc_corr_df["correct_choice"],
    bnt_acc_corr_df["bnt_quartile"],
    tail="one-sided",
    method="kendall",
).round(3)

### Use rpy2 for mixed anova

see: https://www.marsja.se/r-from-python-rpy2-tutorial/

In [None]:
tmp = accuracy_df.merge(df_bnt[["subject", "bnt_quartile"]], on="subject")
tmp = tmp[tmp["task"] != "DESC"]

acctype = "experienced"
tmp = tmp[tmp["accuracy_type"] == acctype]

tmp.head()

In [None]:
for col in ["stopping", "sampling", "task", "accuracy_type"]:
    tmp[col] = tmp[col].astype("category")

In [None]:
# convert pandas DF to R data.frame
with localconverter(ro.default_converter + pandas2ri.converter):
    r_from_pd_df = ro.conversion.py2rpy(tmp)

In [None]:
r_from_pd_df.head()

In [None]:
afex = rpackages.importr("afex")

In [None]:
model = afex.aov_ez(
    id="subject",
    dv="correct_choice",
    between=["stopping", "bnt_quartile"],
    within="sampling",
    data=r_from_pd_df,
)
print("acc type: {}".format(acctype))
print(model)

# find stereotypic sampling pattern

- for example, "piecewise": `a b a b a b` --> if sequence_length - 1 switches in sequence and sequence length > 4
- or, "comprehensive": `a a a a b b b b` --> if single switch in sequence and sequence length > 1


In [None]:
# Calculate switches using "action" and "trial" column
# A switch happens when *within* a trial, the participant
# changes from sampling one option to the other option
# The first sample at the "other option" is considered the
# switch trial.
dat = zip(df["action"].to_numpy(), df["trial"].to_numpy())

# initialize a "previous trial" variable for preventing
# counting switches across trial boundaries
prev_trial = -9999999

# Collect booleans for each trial whether it was a switch
switches = list()
for act, trial in dat:
    # if we go on to a new trial in the data, an action is
    # never counted as a switch
    if trial != prev_trial:
        prev_act = act
        prev_trial = trial
    # Else, an action is a switch if the action is different
    # from the action before
    switches.append(act != prev_act)
    prev_act = act

df["switch"] = switches

In [None]:
_df = df[df["task"].isin(["AF", "AV"])][
    ["subject", "task", "trial", "sample", "action", "switch"]
]

In [None]:
# classify each trial into "0=comprehensive" (aaabbb), "1=piecewise"(ababab), ...
# "np.nan=invalid"(seq length <4, or only one option), or "2=other"
#
# The <4 requirement is important to distinguish "ab" trials: these are
# ambiguous: comprehensive or piecewise.
#
# when strict=True, the mean of actions must be 0.5, so only aabb, aaabbb, ...
# and abab, ababab, ... are valid

comprehensive_tolerance = 2
strict = False

classification = dict(subject=[], task=[], trial=[], classification=[])
for meta, grp in _df.groupby(["subject", "task", "trial"]):
    n_switches = grp["switch"].sum()
    n_samples = len(grp)

    # too short sequences are invalid
    if (len(grp["action"].unique()) != 2) or (n_samples < 4):
        classi = np.nan

    # exactly one switch means "comprehensive"
    elif (n_samples > 1) and n_switches == 1:
        classi = 0

        # Check within tolerance for equal samples from each option
        # to exclude patterns like aaaaaaaab
        diff_0_1 = np.abs(np.diff(grp["action"].value_counts().to_numpy()))
        if diff_0_1 > comprehensive_tolerance:
            classi = 2

        # In the strict case, left and right must be samples evenly
        if strict and not np.allclose(grp["action"].mean(), 0.5):
            classi = 2

    # switching each times meas "piecewise"
    elif (n_samples >= 2) and n_switches == (n_samples - 1):
        classi = 1

        # In the strict case, left and right must be samples evenly
        if strict and not np.allclose(grp["action"].mean(), 0.5):
            classi = 2

    # all other are "other" (mixed)
    else:
        classi = 2

    subj, task, trl = meta
    classification["subject"] += [subj]
    classification["task"] += [task]
    classification["trial"] += [trl]
    classification["classification"] += [classi]

# make DF
classification_df = pd.DataFrame(classification)
assert len(classification_df) == 4000

In [None]:
n_na = len(classification_df) - len(classification_df.dropna())
print(f"Need to drop {n_na} trials for different reasons:")
print(
    "\nonly one option chosen, or only one sample taken"
    "\n(strictly speaking the latter reason is the same as the former)"
)

classification_df_na = classification_df.copy()
classification_df = classification_df.dropna()

In [None]:
# Summarize the proportion of used strategies per subject
data = {"subject": [], "task": [], "comprehensive": [], "piecewise": [], "other": []}
for meta, grp in classification_df.groupby(["subject", "task"]):
    subj, task = meta

    for i, style in enumerate(["comprehensive", "piecewise", "other"]):
        try:
            a = (grp["classification"].value_counts() / len(grp))[i]
        except KeyError:
            a = 0.0

        data[style] += [a]

    data["subject"] += [subj]
    data["task"] += [task]


class_summary_df = pd.DataFrame(data)

# stereotypic sampling is either comprehensive or piecewise
class_summary_df["stereotypic"] = (
    class_summary_df["comprehensive"] + class_summary_df["piecewise"]
)

In [None]:
summary_stat_to_use = np.median  # np.median or np.mean

# NOTE: np.median might give warnings for the cases where all subjs are excluded

# plot tradeoff: What % of subj did use stereotypic sampling on **at least** % or trials?
fig, axs = plt.subplots(1, 3, figsize=(10, 5), sharey=True)
n = 100
cutoffs = np.linspace(1, 0, n)
nsubjs_norm = np.interp(np.arange(0, 41), (0, 40), (0, 1))

for ipartition, partition in enumerate([["AF"], ["AV"], ["AF", "AV"]]):

    _df_to_work_on = class_summary_df[class_summary_df["task"].isin(partition)]
    ax = axs.flat[ipartition]

    plot_data = np.full((3, n), np.nan)
    for i, cutoff in enumerate(cutoffs):

        nsubjs_in_data = len(_df_to_work_on[_df_to_work_on["other"] < cutoff])
        min_stereo = _df_to_work_on[_df_to_work_on["other"] < cutoff][
            "stereotypic"
        ].min()

        summary_stereo = summary_stat_to_use(
            _df_to_work_on[_df_to_work_on["other"] < cutoff]["stereotypic"]
        )

        plot_data[0, i] = nsubjs_in_data
        plot_data[1, i] = min_stereo
        plot_data[2, i] = summary_stereo

    # plot
    ax.plot(
        cutoffs,
        nsubjs_norm[plot_data[0, :].astype(int)],
        color="r",
        label="subjects retained",
    )
    ax.plot(cutoffs, plot_data[1, :], color="k", label="minimum stereotypic sampling")
    ax.plot(
        cutoffs,
        plot_data[2, :],
        color="m",
        label=f"{summary_stat_to_use.__name__} stereotypic sampling",
    )

    xlab = "cutoff 'other'"
    if ipartition == 1:
        xlab += "\n(subjects need to have less than this proportion of 'other' sampling styles,"
        xlab += "\ni.e., NOT 'comprehensive' or 'piecewise', to be retained)"
    ax.set(xlabel=xlab, ylabel="proportion", title=partition)
    ax.grid("on")

    if ipartition == 0:
        leg = ax.legend()


fig.suptitle(
    "How many subject sample stereotypically?\n(i.e., 'comprehensive':aaabbb or 'piecewise':ababab)",
    y=1.05,
)
fig.tight_layout()

In [None]:
cutoffs = [0.3, 0.3, 0.4]

for ipartition, partition in enumerate([["AF"], ["AV"], ["AF", "AV"]]):

    _df_to_work_on = class_summary_df[class_summary_df["task"].isin(partition)]
    cutoff = cutoffs[ipartition]

    nsubjs_in_data = len(_df_to_work_on[_df_to_work_on["other"] < cutoff])
    min_stereo = _df_to_work_on[_df_to_work_on["other"] < cutoff]["stereotypic"].min()
    summary_stereo = summary_stat_to_use(
        _df_to_work_on[_df_to_work_on["other"] < cutoff]["stereotypic"]
    )

    print(f"\n\n{partition}")
    print(
        f"{nsubjs_in_data} subjects sampled at least {min_stereo*100:.2f}% of trials in stereotypic form."
    )
    print(
        f"{nsubjs_in_data} subjects sampled {summary_stereo*100:.2f}% of trials in stereotypic form on average ({summary_stat_to_use.__name__})."
    )

In [None]:
all_av_stereo = summary_stat_to_use(
    (class_summary_df[class_summary_df["task"] == "AV"]["stereotypic"]) * 100
).round(2)

all_af_stereo = summary_stat_to_use(
    (class_summary_df[class_summary_df["task"] == "AF"]["stereotypic"]) * 100
).round(2)

all_stereo = summary_stat_to_use((class_summary_df["stereotypic"]) * 100).round(2)

print(
    f"Over all subjects,\n{all_av_stereo}% in AV,\n{all_af_stereo}% in AF,\n{all_stereo}% overall,"
    f"\nsampled in stereotypic form ..."
)

# Decision weights via logisitic regression

Two vectors are needed:
- `X`: the sample outcomes from both options (one option "flipped" such that 1=9, 2=8, ..., 9=1)
    - this "flip" is equivalent to first mean-centering the vector of outcomes, and then flipping the signs
    - e.g., 1, 2, 3, 4, 5, 6, 7, 8, 9 --> (mean-center) --> -4, -3, -2, -1, 0, 1, 2, 3, 4 --> (flip)  --> 4, 3, 2, 1, 0, -1, -2, -3, -4
- `y`: the final choices for options (left: 0, right: 1)

Obviously, `y` will have fewer entries than `X`. 
We need to repeat the `y` value of each trial for each sample in that trial.
Finally, we do this for early, mid, and late samples separately


In [None]:
def classify_timing(df, first_last_n):
    """Add a new column 'timing' to the `df` for early/mid/late samples.

    Parameters
    ----------
    df : pandas.DataFrame
        The behavioral data containing `n_samples` and `sample` column.
    first_last_n : int
        Controls the N *first* samples in a sequence and N *last* samples in a
        sequence to be classified into "early" and "late" respectively. The
        samples in between are *mid* samples..
        Sequences that are 2*first_last_n samples long or shorter have
        *mixed* timing and can later be dropped from analysis.

    Returns
    -------
    df : pandas.DataFrame
        A copy of the data with a new column "timing".

    Notes
    -----
    `first_last_n` should be picked low enough such that not too many trials
    will end up with each sample classified as "mixed". At the same time,
    `first_last_n` should be high enough to make the number of "early", "mid",
    and "late" samples per trial as balanced as possible.

    """
    df = df.copy()
    df["timing"] = np.nan

    # NOTE: nth "sample" is 0-indexed ... n_samples is a count
    # "how many steps is *this* sample away from the sequence end?" ...
    # `1` is the end, due to zero-indexing
    sample_diff = (df["n_samples"] - df["sample"]).to_numpy()

    # "how large does the `sample_diff` have to be so that the sample can be classified
    # as early?"
    early_thresh = ((df["n_samples"] + 1) - first_last_n).to_numpy()

    # If there are `first_last_n` or less steps until the sequence end, we have
    # a late sample
    df.loc[sample_diff <= first_last_n, "timing"] = "late"

    # If there are many steps left, we have an early sample
    df.loc[sample_diff >= early_thresh, "timing"] = "early"

    # The remaining rows are "mid" samples ...
    df.loc[df["timing"].isna(), "timing"] = "mid"

    # ... EXCEPT those rows that are of sampling sequences that are too short
    # to be divided into early, mid, and late
    df.loc[(df["n_samples"] <= first_last_n * 2), "timing"] = "mixed"

    return df

In [None]:
def get_log_reg_coef(df, subject, flip, selection):
    """Predict final choice by outcome history.

    Using a logistic regression.

    Parameters
    ----------
    df : pandas.DataFrame
        The data, with columns [subject, sampling, stopping, trial, sample,
        timing, action, outcome, fin_action], and no "DESC" task.
    subject : int
        The subject id.
    flip : 0 | 1
        The "side" to flip (0=left 1=right). Relates to whether an outcome
        was sampled left or right. For the sake of the logistic regression,
        we will "flip" the outcomes on one side such that 1 becomes 9, 2
        becomes 8, ... and 9 becomes 1.
    selection : all | timing | task | timing_task
        On which part(s) of the data to calculate the logistic regression.
        "all" will calculate over all data. "timing" will calculate three
        separate logistic regressions (one for early, mid, and late each).
        "task" will calculate two separate regressions (one for active,
        one for yoked). Finally, "timing_task" will calculate six separate
        regressions: early, mid, late each for active, and yoked.

    Notes
    -----
    If you need a stopping" factor next to "sampling" and are wondering why
    this is not supplied with the `selection` parameter, remember that
    "stopping" was a between factor, so this is implicit. For example, the
    data from sub-01 is always "fixed", sub-02 always "variable", and so on.
    You can use df[["subject", "task"]] to see the mapping.

    Returns
    -------
    coef : dict
        The coefficient(s)

    """
    # work on one subj
    subj_df = df[df["subject"] == subject]
    subj_df = subj_df.reset_index(drop=True)

    outcome = subj_df["outcome"].to_numpy()
    flip_idxs = (subj_df["action"] == flip).to_numpy()

    # "10 - outcome" flips outcome so that 1=9, 2=8, ... 9=1
    outcome_flipped = 10 - outcome[flip_idxs]

    # Add column to df
    subj_df.insert(len(subj_df.columns), "outcome_flipped", outcome)
    subj_df.loc[flip_idxs, "outcome_flipped"] = outcome_flipped

    # sanity check non-flipped outcomes equal
    test = subj_df[subj_df["action"] != flip][["outcome", "outcome_flipped"]].to_numpy()
    np.testing.assert_array_equal(test[:, 0], test[:, 1])

    # sanity check flipped outcomes + original outcomes == 10
    test = subj_df[subj_df["action"] == flip][["outcome", "outcome_flipped"]].to_numpy()
    np.testing.assert_allclose(test[:, 0] + test[:, 1], 10)

    if selection == "all":
        sel = {"all": np.arange(0, subj_df.shape[0])}
    elif selection == "timing":
        sel = {
            "early": subj_df["timing"] == "early",
            "mid": subj_df["timing"] == "mid",
            "late": subj_df["timing"] == "late",
        }
    elif selection == "task":
        sel = {
            "active": subj_df["sampling"] == "active",
            "yoked": subj_df["sampling"] == "yoked",
        }
    elif selection == "timing_task":
        sel = {
            "active/early": (subj_df["sampling"] == "active")
            & (subj_df["timing"] == "early"),
            "active/mid": (subj_df["sampling"] == "active")
            & (subj_df["timing"] == "mid"),
            "active/late": (subj_df["sampling"] == "active")
            & (subj_df["timing"] == "late"),
            "yoked/early": (subj_df["sampling"] == "yoked")
            & (subj_df["timing"] == "early"),
            "yoked/mid": (subj_df["sampling"] == "yoked")
            & (subj_df["timing"] == "mid"),
            "yoked/late": (subj_df["sampling"] == "yoked")
            & (subj_df["timing"] == "late"),
        }
    else:
        raise ValueError(f"What is {selection}?")

    coef = {}
    for selkey, selval in sel.items():
        # IV and DV for linear regression
        X = subj_df.loc[selval, "outcome_flipped"].to_numpy()
        X = np.atleast_2d(X).T
        y = subj_df.loc[selval, "fin_action"].to_numpy()

        # first output is the coef ot the intercept
        _, this_coef = pingouin.logistic_regression(
            X, y, coef_only=True, random_state=42
        )

        # "save"
        coef[selkey] = this_coef

    return coef

In [None]:
# prep df
first_last_n = 2
tmp_logreg = classify_timing(df, first_last_n=first_last_n)


tmp_logreg = tmp_logreg[tmp_logreg["task"] != "DESC"]

tmp_logreg.loc[:, "sampling"] = np.array(["yoked", "active"])[
    tmp_logreg["task"].str.startswith("A").to_numpy(dtype=int)
]
tmp_logreg.loc[:, "stopping"] = np.array(["variable", "fixed"])[
    tmp_logreg["task"].str.endswith("F").to_numpy(dtype=int)
]

cols = [
    "subject",
    "sampling",
    "stopping",
    "trial",
    "sample",
    "timing",
    "action",
    "outcome",
    "fin_action",
]

tmp_logreg = tmp_logreg[cols]
tmp_logreg.head()

## early mid late ... WITH sampling/stopping division


In [None]:
flip = 0
selection = "timing_task"
subjdfs = []
for subj in range(1, 41):
    # calculate
    slope = get_log_reg_coef(tmp_logreg, subj, flip, selection)

    colnames = {
        "all": ["data_used"],
        "timing": ["timing"],
        "task": ["sampling"],
        "timing_task": ["sampling", "timing"],
    }[selection]

    # "save"
    tmp_logreg_subjdf = pd.DataFrame(
        np.array([i.split("/") for i in list(slope.keys())])
    )
    tmp_logreg_subjdf.columns = colnames
    tmp_logreg_subjdf["slope"] = np.array(list(slope.values()))
    tmp_logreg_subjdf["subject"] = subj
    tmp_logreg_subjdf = tmp_logreg_subjdf[["subject", *colnames, "slope"]]
    subjdfs.append(tmp_logreg_subjdf)

decision_weight_df = pd.concat(subjdfs)
decision_weight_df

In [None]:
# merge the stopping factor on to decision_weight_df
tmp_stopping = df.drop_duplicates(subset=["subject"], keep="first")[
    ["subject", "task"]
].reset_index(drop=True)
tmp_stopping["stopping"] = tmp_stopping["task"].map({"AF": "fixed", "AV": "variable"})

decision_weight_df = decision_weight_df.merge(
    tmp_stopping[["subject", "stopping"]], on=["subject"]
)
decision_weight_df = decision_weight_df[
    ["subject", "sampling", "stopping", "timing", "slope"]
]
decision_weight_df.head()

In [None]:
# save data for publication plots
plotdir = op.join(BIDS_ROOT, "code", "publication_plots")
os.makedirs(plotdir, exist_ok=True)

fname = "decision_weights_logreg_tasks.csv"
fname = op.join(plotdir, fname)

decision_weight_df.to_csv(fname, index=False)

In [None]:
with sns.plotting_context("talk", font_scale=1):
    fig, ax = plt.subplots()
    sns.lineplot(
        x="timing",
        y="slope",
        hue="stopping",
        hue_order=["fixed", "variable"],
        style="sampling",
        style_order=["active", "yoked"],
        data=decision_weight_df,
        ci=68,
        ax=ax,
        legend=False,
    )

    ax.axhline(0, color="black")
    ax.set_ylabel("decision weight")
    ax.set_xlabel("sample position")

    # add legend
    # https://matplotlib.org/3.1.1/gallery/text_labels_and_annotations/custom_legends.html
    legend_elements = []
    for linestyle in ["-", "--"]:
        for i, label in enumerate(["partial control", "full control"]):

            if linestyle == "--":
                label = "yoked"

            color = sns.color_palette()[i]
            legend_elements.append(
                Line2D(
                    [0], [0], color=color, marker=None, label=label, linestyle=linestyle
                ),
            )

    # define invisible bogus item to tweak legend
    # bogus_item = Line2D([0], [0], color="white", marker=None, label="", linestyle=linestyle)
    # legend_elements.insert(2, bogus_item)

    legend1 = fig.legend(
        handles=legend_elements,
        loc="lower right",
        bbox_to_anchor=(0.95, 0.175),
        framealpha=1,
        frameon=False,
        ncol=2,
        title="sampling",
    )

## `late - early`

In [None]:
flip = 0
selection = "timing_task"
subjdfs = []
for subj in range(1, 41):
    # calculate
    coef = get_log_reg_coef(tmp_logreg, subj, flip, selection)

    colnames = {
        "all": ["data_used"],
        "timing": ["timing"],
        "task": ["sampling"],
        "timing_task": ["sampling", "timing"],
    }[selection]

    # "save"
    tmp_subjdf = pd.DataFrame(np.array([i.split("/") for i in list(coef.keys())]))
    tmp_subjdf.columns = colnames
    tmp_subjdf["slope"] = np.array(list(coef.values()))
    tmp_subjdf["subject"] = subj
    tmp_subjdf = tmp_subjdf[["subject", *colnames, "slope"]]
    subjdfs.append(tmp_subjdf)

decision_weight_df = pd.concat(subjdfs)

# sampling column must be capitalized to match with "tmp" df that we want
# to merge on this df
decision_weight_df["sampling"] = decision_weight_df["sampling"].str.capitalize()

decision_weight_df.head()

In [None]:
# get stopping and task factors back
tmp = pd.read_csv(op.join(BIDS_ROOT, "participants.tsv"), sep="\t")
tmp["subject"] = [int(i[-2:]) for i in tmp["participant_id"]]
tmp = tmp[["subject", "stopping"]]
tmp["stopping"] = tmp["stopping"].str.capitalize()

decision_weight_df = decision_weight_df.merge(tmp, on=["subject"])

decision_weight_df["task"] = (
    decision_weight_df["sampling"].str[0] + decision_weight_df["stopping"].str[0]
).str.upper()


decision_weight_df.head()

In [None]:
# subtract second from first --> first - second
first = "late"
second = "early"
colname = f"slope_{first}_minus_{second}"
tmp_dwdf = decision_weight_df[decision_weight_df["timing"] == first].merge(
    decision_weight_df[decision_weight_df["timing"] == second],
    on=["subject", "task"],
    suffixes=("_" + first, "_" + second),
)
tmp_dwdf.insert(0, colname, tmp_dwdf[f"slope_{first}"] - tmp_dwdf[f"slope_{second}"])

tmp_dwdf["sampling"] = [
    "Active" if i.startswith("A") else "Yoked" for i in tmp_dwdf["task"]
]
tmp_dwdf["stopping"] = [
    "Fixed" if i.endswith("F") else "Variable" for i in tmp_dwdf["task"]
]

tmp_dwdf.head()

### correlate recency with numeracy

In [None]:
bnt_recency_corr_df = bnt_acc_corr_df.merge(
    tmp_dwdf.groupby(["subject"])[colname].mean().reset_index(), on="subject"
)
bnt_recency_corr_df = bnt_recency_corr_df.rename(columns={colname: "recency"})

In [None]:
pingouin.correlation.corr(
    bnt_recency_corr_df["recency"],
    bnt_recency_corr_df["bnt_quartile"],
    tail="one-sided",
    method="kendall",
).round(3)

In [None]:
# save data for publication plots
plotdir = op.join(BIDS_ROOT, "code", "publication_plots")
os.makedirs(plotdir, exist_ok=True)

fname = "beh_recency_logreg.csv"
fname = op.join(plotdir, fname)

tmp_dwdf.to_csv(fname, index=False)

In [None]:
with sns.plotting_context("notebook", font_scale=1.3):
    fig, ax = plt.subplots()

    data = tmp_dwdf
    order = ["Active", "Yoked"]
    hue_order = ["Fixed", "Variable"]

    sns.pointplot(
        x="sampling",
        y=colname,
        hue="stopping",
        data=data,
        dodge=True,
        ci=68,
        order=order,
        hue_order=hue_order,
        ax=ax,
        markers="o",
    )

    sns.swarmplot(
        x="sampling",
        y=colname,
        hue="stopping",
        data=data,
        order=order,
        hue_order=hue_order,
        ax=ax,
        dodge=True,
        size=3,
    )

    # add legend
    # https://matplotlib.org/3.1.1/gallery/text_labels_and_annotations/custom_legends.html
    from matplotlib.lines import Line2D

    legend_elements = [
        Line2D([0], [0], color=sns.color_palette()[0], marker="o", label="Fixed"),
        Line2D([0], [0], color=sns.color_palette()[1], marker="o", label="Variable"),
    ]

    ax.legend(
        handles=legend_elements,
        loc="upper left",
        prop={"size": 13},
        framealpha=1,
    )

    ax.set_ylabel(f"slope difference\n{first}-{second}")

    # https://stackoverflow.com/a/51157346/5201771
    # 2-4 3-5
    for i_dots, (idx0, idx1) in enumerate([(2, 4), (3, 5)]):
        locs1 = ax.get_children()[idx0].get_offsets()
        locs2 = ax.get_children()[idx1].get_offsets()

        # Need to sort locs, so data corresponds
        sort_idxs_list = []
        sampling = order[i_dots]
        for stopping in hue_order:
            arr = data[(data["sampling"] == sampling) & (data["stopping"] == stopping)][
                colname
            ].to_numpy()
            sort_idxs_list += [np.argsort(arr)]

        locs2_sorted = locs2[sort_idxs_list[1].argsort()][sort_idxs_list[0]]

        for i in range(locs1.shape[0]):
            _x = [locs1[i, 0], locs2_sorted[i, 0]]
            _y = [locs1[i, 1], locs2_sorted[i, 1]]
            ax.plot(_x, _y, color="black", alpha=0.1)

    ax.axhline(0, color="black")
    ax.set_title("Regression slopes over weights")

In [None]:
model = pingouin.mixed_anova(
    data=tmp_dwdf, dv=colname, within="sampling", subject="subject", between="stopping"
)
model.round(3)

In [None]:
stats = pingouin.pairwise_ttests(
    data=tmp_dwdf,
    dv=colname,
    within="sampling",
    between="stopping",
    subject="subject",
    padjust="bonf",
    within_first=False,
    effsize="cohen",
)

display(stats.round(3))