In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import mannwhitneyu as mwu
import seaborn as sns


In [2]:
data = pd.read_csv("anon-data/participants.csv")
control = pd.read_csv("anon-data/control.txt", names=["id"])
experimental = pd.read_csv("anon-data/experimental.txt", names=["id"])

In [3]:
control_list = [x[0] for x in control.values.tolist()]
experimental_list = [x[0] for x in experimental.values.tolist()]

In [4]:
data_control = data[data["id"].isin(control_list)].drop('id', axis=1)
data_experimental = data[data["id"].isin(experimental_list)].drop('id', axis=1)

In [5]:
def get_mean_std_mwu(ctrl, exp, colname):
    ctrl_vals = ctrl[colname]
    exp_vals = exp[colname]

    avg_ctrl = ctrl_vals.mean()
    avg_exp = exp_vals.mean()

    std_ctrl = ctrl_vals.std()
    std_exp = exp_vals.std()

    mwu_result = mwu(ctrl_vals, exp_vals)

    return (avg_ctrl, std_ctrl, avg_exp, std_exp, mwu_result[0], mwu_result[1])


In [6]:
colnames = ["Total Duration (Task 1)", 
            "Total Duration (Task 2)", 
            "Total Duration (Task 3)", 
            "Context Switches (Task 1)", 
            "Context Switches (Task 2)", 
            "Context Switches (Task 3)"]

for i in colnames:
    print(get_mean_std_mwu(data_control, data_experimental, i))

(863.3333333333334, 518.226141113446, 653.0, 232.11419603290102, 6.0, 0.7)
(703.3333333333334, 432.9079963841432, 373.3333333333333, 110.86177579911542, 8.0, 0.2)
(328.3333333333333, 223.05231075542198, 328.6666666666667, 176.14293438379335, 5.0, 1.0)
(36.333333333333336, 24.337899115029078, 9.0, 6.0, 8.0, 0.2)
(25.666666666666668, 13.428824718989125, 7.333333333333333, 3.214550253664318, 9.0, 0.1)
(17.0, 12.0, 4.0, 2.0, 8.0, 0.2)


In [7]:
def plot_context(ctrl, exp, context):
    ctrl_with_group = ctrl.assign(group = ["control"] * len(ctrl))
    exp_with_group = exp.assign(group = ["experimental"] * len(exp))

    merged_data = pd.concat([ctrl_with_group, exp_with_group])

    cols = [f"{context} (Task 1)",
            f"{context} (Task 2)",
            f"{context} (Task 3)",
            "group"]

    subset = merged_data[cols]

    new_df_elems = []

    for _, row in subset.iterrows():
        time_task_1 = row[f"{context} (Task 1)"]
        time_task_2 = row[f"{context} (Task 2)"]
        time_task_3 = row[f"{context} (Task 3)"]

        new_df_elems.append([time_task_1, 1, row["group"]])
        new_df_elems.append([time_task_2, 2, row["group"]])
        new_df_elems.append([time_task_3, 3, row["group"]])

    new_df = pd.DataFrame(new_df_elems, columns=["Time (seconds)", "Task", "Group"])

    sns.boxplot(y='Time (seconds)', x='Task', data=new_df, hue='Group')
    plt.title(f"In {context}")
    plt.show()  

In [9]:
contexts = ["VSCode", "Terminal", "Firefox", "Reading Instructions", "Context Switches", "Total Duration"]

for i in contexts:
    plot_context(data_control, data_experimental, i)

Unnamed: 0,Context Switches (Task 1),Total Duration (Task 1),Reading Instructions (Task 1),Firefox (Task 1),Terminal (Task 1),VSCode (Task 1),File Manager (Task 1),File Editor (Task 1),Excluded (Task 1),Context Switches (Task 2),...,Excluded (Task 2),Context Switches (Task 3),Total Duration (Task 3),Reading Instructions (Task 3),Firefox (Task 3),Terminal (Task 3),VSCode (Task 3),File Manager (Task 3),File Editor (Task 3),Excluded (Task 3)
2,15,921,70,0,292,559,0,0,0,5,...,0,4,203,39,0,3,155,6,0,0
3,3,516,85,0,0,415,16,0,57,6,...,0,2,530,41,0,0,489,0,0,0
5,9,522,94,0,0,428,0,0,0,11,...,0,6,253,65,0,0,188,0,0,0
