## Set Up Dependencies and Data


In [None]:
import joblib
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm


In [None]:
slip_df = pd.read_csv("https://osf.io/ftb8m/download")
baseline_df = pd.read_csv("https://osf.io/zfkvm/download")


## Reproducibility


In [None]:
%load_ext watermark
%watermark -iwbmuvg -iv


In [None]:
joblib.hash(slip_df)


In [None]:
joblib.hash(baseline_df)


# Preprocess data


In [None]:
slip_df["condition"] = "slip"
baseline_df["condition"] = "baseline"

df = pd.concat([slip_df, baseline_df])


In [None]:
df["Task Coding Sites"] = df["Task Coding Sites"].apply(eval)


In [None]:
df["Run ID"] = df["Unnamed: 0"].apply(lambda x: x.split(",")[0])
df["Task"] = df["Unnamed: 0"].apply(lambda x: x.split(",")[1])


In [None]:
df["Genome Site"] = df["Genome"].apply(list)


In [None]:
tidy_df = df.explode("Genome Site")
tidy_df


### Absolute Instruction Abundances


In [None]:
sns.countplot(
    data=tidy_df,
    x="Genome Site",
    hue="condition",
)


In [None]:
count_df = (
    tidy_df.groupby(["Run ID", "condition", "Genome Site"]).size().reset_index()
)
count_df[0] /= count_df.groupby(["Run ID", "condition"])[0].transform("sum")
count_df


## Relative Instruction Abundances


In [None]:
sns.barplot(
    data=count_df,
    x="Genome Site",
    hue="condition",
    y=0,
)
