In [99]:
import pandas as pd
import numpy as np
import altair as alt
import os
from os.path import join
import json
from altair_saver import save

In [132]:
## Demographics

dem_df = pd.read_csv(join("..", "User study pre-survey.csv"))
dem_df = dem_df.reset_index().rename(columns={
    "index": "user",
    "How much experience do you have using Doodle?": "doodle_exp",
    "How much experience do you have using When2meet?": "when2meet_exp"
})
dem_df["user"] = dem_df["user"].apply(lambda x: f"P{x+1}")
dem_df

Unnamed: 0,user,Timestamp,What is your age?,What is your highest level of education?,How many meetings per week do you attend (on average)?,doodle_exp,when2meet_exp
0,P1,2022/11/04 7:07:26 PM EST,23,college,2,"Familiar, have filled out many polls","Familiar, have filled out many polls"
1,P2,2022/11/09 7:42:05 PM EST,25,college,3,"A little, have filled out a few polls","A little, have filled out a few polls"
2,P3,2022/11/21 7:10:17 PM EST,27,college,3,"Very familiar, have filled out and created man...","Very familiar, have filled out and created man..."
3,P4,2022/11/26 7:43:04 PM EST,22,college,5+,"None, have never heard of it","None, have never heard of it"
4,P5,2022/11/26 7:53:57 PM EST,56,graduate school,5+,"Very familiar, have filled out and created man...","None, have never heard of it"


In [157]:
exp_scale = alt.Scale(domain=["None, have never heard of it", "A little, have filled out a few polls", "Familiar, have filled out many polls", "Very familiar, have filled out and created many polls"])
doodle_chart = alt.Chart(dem_df).mark_circle().encode(
    y=alt.Y("user:N", title="Participant"),
    size=alt.Size("doodle_exp:O", scale=exp_scale, title="Familiarity level"),
    color=alt.Size("doodle_exp:O", scale=exp_scale, title="Familiarity level")
).properties(title="Doodle")
when2meet_chart = alt.Chart(dem_df).mark_circle().encode(
    y=alt.Y("user:N", title=None),
    size=alt.Size("when2meet_exp:O", scale=exp_scale, title="Familiarity level", legend=alt.Legend(labelLimit=500)),
    color=alt.Size("doodle_exp:O", scale=exp_scale, title="Familiarity level")
).properties(title="When2meet")
chart = (doodle_chart | when2meet_chart).properties(title="Participant familiarity")
chart.save(join("plots", "participant_familiarity.html"))
chart

In [33]:
users = ['P1', 'P2', 'P3', 'P4', 'P5']

In [34]:
DATA_DIR = '..'

In [249]:
def load_file(u, app, task):
    u_files = os.listdir(join(DATA_DIR, u))
    prefix = f"{app}_{task}"
    for u_file in u_files:
        if u_file.startswith(prefix):
            with open(join(DATA_DIR, u, u_file)) as f:
                u_contents = json.load(f)
                return u_contents

In [250]:
def append_data(df, u, d, acc):
    df = df.append({
        'user': u,
        'app': d['app'],
        'task': d['task'][:2], 
        'week': d['task'][3:],
        'time': d['time'],
        'timeUnit': d['timeUnit'],
        'timeString': d['timeString'],
        'accuracy': acc,
        'num_selections': get_num_selections(d)
    }, ignore_index=True)
    return df

In [251]:
doodle_w1_ground_truth = [1, 1, 1, 1, 0, 1]
doodle_w2_ground_truth = [1, 0, 1, 0, 0, 1]

when2meet_w1_ground_truth = ["2023-01-02T14:00:00.000Z","2023-01-02T14:30:00.000Z","2023-01-02T20:00:00.000Z","2023-01-02T20:30:00.000Z","2023-01-02T21:00:00.000Z","2023-01-02T21:30:00.000Z","2023-01-03T14:00:00.000Z","2023-01-03T14:30:00.000Z","2023-01-03T15:00:00.000Z","2023-01-03T15:30:00.000Z","2023-01-03T16:00:00.000Z","2023-01-03T16:30:00.000Z","2023-01-03T17:00:00.000Z","2023-01-03T17:30:00.000Z","2023-01-03T18:00:00.000Z","2023-01-03T18:30:00.000Z","2023-01-03T19:00:00.000Z","2023-01-03T19:30:00.000Z","2023-01-03T20:00:00.000Z","2023-01-03T20:30:00.000Z","2023-01-03T21:00:00.000Z","2023-01-03T21:30:00.000Z","2023-01-03T14:00:00.000Z","2023-01-03T14:30:00.000Z","2023-01-03T15:00:00.000Z","2023-01-03T15:30:00.000Z","2023-01-03T16:00:00.000Z","2023-01-03T16:30:00.000Z","2023-01-03T17:00:00.000Z","2023-01-03T17:30:00.000Z","2023-01-03T18:00:00.000Z","2023-01-03T18:30:00.000Z","2023-01-03T19:00:00.000Z","2023-01-03T19:30:00.000Z","2023-01-03T20:00:00.000Z","2023-01-03T20:30:00.000Z","2023-01-03T21:00:00.000Z","2023-01-03T21:30:00.000Z","2023-01-04T14:00:00.000Z","2023-01-04T14:30:00.000Z","2023-01-04T15:00:00.000Z","2023-01-04T15:30:00.000Z","2023-01-04T16:00:00.000Z","2023-01-04T16:30:00.000Z","2023-01-04T17:00:00.000Z","2023-01-04T17:30:00.000Z","2023-01-04T18:00:00.000Z","2023-01-04T18:30:00.000Z","2023-01-04T19:00:00.000Z","2023-01-04T19:30:00.000Z","2023-01-04T20:00:00.000Z","2023-01-04T20:30:00.000Z","2023-01-04T21:00:00.000Z","2023-01-04T21:30:00.000Z","2023-01-05T14:00:00.000Z","2023-01-05T14:30:00.000Z","2023-01-05T15:00:00.000Z","2023-01-05T15:30:00.000Z","2023-01-05T18:00:00.000Z","2023-01-05T18:30:00.000Z","2023-01-05T19:00:00.000Z","2023-01-05T19:30:00.000Z","2023-01-05T20:00:00.000Z","2023-01-05T20:30:00.000Z","2023-01-05T21:00:00.000Z","2023-01-05T21:30:00.000Z","2023-01-06T14:00:00.000Z","2023-01-06T14:30:00.000Z","2023-01-06T15:00:00.000Z","2023-01-06T15:30:00.000Z","2023-01-06T20:00:00.000Z","2023-01-06T20:30:00.000Z","2023-01-06T21:00:00.000Z","2023-01-06T21:30:00.000Z"]
when2meet_w2_ground_truth = ["2023-01-09T15:00:00.000Z","2023-01-09T15:30:00.000Z","2023-01-09T16:00:00.000Z","2023-01-09T16:30:00.000Z","2023-01-09T17:00:00.000Z","2023-01-09T17:30:00.000Z","2023-01-09T18:00:00.000Z","2023-01-09T18:30:00.000Z","2023-01-09T19:00:00.000Z","2023-01-09T19:30:00.000Z","2023-01-09T20:00:00.000Z","2023-01-09T20:30:00.000Z","2023-01-09T21:00:00.000Z","2023-01-09T21:30:00.000Z","2023-01-10T14:00:00.000Z","2023-01-10T14:30:00.000Z","2023-01-10T19:00:00.000Z","2023-01-10T19:30:00.000Z","2023-01-10T20:00:00.000Z","2023-01-10T20:30:00.000Z","2023-01-10T21:00:00.000Z","2023-01-10T21:30:00.000Z","2023-01-11T16:30:00.000Z","2023-01-11T17:00:00.000Z","2023-01-11T17:30:00.000Z","2023-01-11T18:00:00.000Z","2023-01-11T18:30:00.000Z","2023-01-11T19:00:00.000Z","2023-01-11T19:30:00.000Z","2023-01-11T20:00:00.000Z","2023-01-11T20:30:00.000Z","2023-01-11T21:00:00.000Z","2023-01-11T21:30:00.000Z","2023-01-12T14:00:00.000Z","2023-01-12T14:30:00.000Z","2023-01-12T15:00:00.000Z","2023-01-12T15:30:00.000Z","2023-01-12T16:00:00.000Z","2023-01-12T16:30:00.000Z","2023-01-12T17:00:00.000Z","2023-01-12T17:30:00.000Z","2023-01-12T18:00:00.000Z","2023-01-12T18:30:00.000Z","2023-01-12T21:00:00.000Z","2023-01-12T21:30:00.000Z","2023-01-13T14:00:00.000Z","2023-01-13T14:30:00.000Z","2023-01-13T15:00:00.000Z","2023-01-13T15:30:00.000Z","2023-01-13T16:00:00.000Z","2023-01-13T16:30:00.000Z","2023-01-13T17:00:00.000Z","2023-01-13T17:30:00.000Z","2023-01-13T18:00:00.000Z","2023-01-13T18:30:00.000Z","2023-01-13T19:00:00.000Z","2023-01-13T19:30:00.000Z","2023-01-13T20:00:00.000Z","2023-01-13T20:30:00.000Z","2023-01-13T21:00:00.000Z","2023-01-13T21:30:00.000Z"]

In [252]:
def get_num_selections(d):
    n = 0
    if d['app'] == "doodle":
        for s in d["selections"]:
            if s["you"] == 1:
                n += 1
                
    if d['app'] == "when2meet":
        n = len(d["selections"])
    
    return n

In [253]:
def get_ground_truth(d):
    if d['app'] == "doodle" and d["task"] == "T1_WA":
        return doodle_w1_ground_truth
    if d['app'] == "doodle" and d["task"] == "T1_WB":
        return doodle_w2_ground_truth
    if d['app'] == "when2meet" and d["task"] == "T1_WA":
        return when2meet_w1_ground_truth
    if d['app'] == "when2meet" and d["task"] == "T1_WB":
        return when2meet_w2_ground_truth

In [254]:
def compare_doodle(ud):
    gt = get_ground_truth(ud)
    num_correct = 0
    for i, x in enumerate(gt):
        if ud["selections"][i]["you"] == x:
            num_correct += 1
    return num_correct / len(gt)

In [255]:
def compare_when2meet(ud):
    gt = get_ground_truth(ud)
    return len(set(gt).intersection(set(ud["selections"]))) / len(set(gt).union(set(ud["selections"])))

In [256]:
df = pd.DataFrame(index=[], data=[], columns=['user', 'app', 'task', 'week', 'time', 'timeUnit', 'timeString'])
for u in users:
    when2meet_t1 = load_file(u, 'when2meet', 'T1')
    when2meet_t2 = load_file(u, 'when2meet', 'T2')
    doodle_t1 = load_file(u, 'doodle', 'T1')
    doodle_t2 = load_file(u, 'doodle', 'T2')
    
    df = append_data(df, u, when2meet_t1, compare_when2meet(when2meet_t1))
    df = append_data(df, u, when2meet_t2, 0)
    df = append_data(df, u, doodle_t1, compare_doodle(doodle_t1))
    df = append_data(df, u, doodle_t2, 0)
        
    print("doodle", compare_doodle(doodle_t1))
    print("when2meet", compare_when2meet(when2meet_t1))

df['week'] = df['week'].apply(lambda x: "W1" if x == "WA" else "W2")

# Convert ms to seconds
df['time_s'] = df['time'] / 1000.0

doodle 1.0
when2meet 1.0
doodle 1.0
when2meet 0.4482758620689655
doodle 1.0
when2meet 0.9508196721311475
doodle 1.0
when2meet 0.4482758620689655
doodle 1.0
when2meet 0.8688524590163934


  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({


In [257]:
t1_df = df.loc[df['task'] == 'T1']
t2_df = df.loc[df['task'] == 'T2']

In [258]:
t1_df

Unnamed: 0,user,app,task,week,time,timeUnit,timeString,accuracy,num_selections,time_s
0,P1,when2meet,T1,W2,44858,ms,44 seconds,1.0,61.0,44.858
2,P1,doodle,T1,W1,38701,ms,38 seconds,1.0,5.0,38.701
4,P2,when2meet,T1,W1,117129,ms,117 seconds,0.448276,26.0,117.129
6,P2,doodle,T1,W2,82263,ms,82 seconds,1.0,3.0,82.263
8,P3,when2meet,T1,W2,51828,ms,51 seconds,0.95082,58.0,51.828
10,P3,doodle,T1,W1,81903,ms,81 seconds,1.0,5.0,81.903
12,P4,when2meet,T1,W1,67774,ms,67 seconds,0.448276,26.0,67.774
14,P4,doodle,T1,W2,45774,ms,45 seconds,1.0,3.0,45.774
16,P5,when2meet,T1,W2,109249,ms,109 seconds,0.868852,59.0,109.249
18,P5,doodle,T1,W1,44088,ms,44 seconds,1.0,5.0,44.088


In [259]:
alt.Chart(df).mark_bar().encode(
    x=alt.X('user:N', title='Participant'),
    y=alt.Y('time_s:Q', title='Time (s)'),
    column=alt.Column('task:N', title='Task'),
    color=alt.Color('app:N', title='App')
).properties(
 title='Task completion time'   
)

  for col_name, dtype in df.dtypes.iteritems():


In [260]:
t1_chart = alt.Chart(t1_df).mark_bar().encode(
    x=alt.X('user:N', title='Participant'),
    y=alt.Y('time_s:Q', title='Time (s)', scale=alt.Scale(domain=[0, 120])),
    column=alt.Column('app:N', title='App'),
    color=alt.Color('user:N', title='User')
).properties(
 title='Task 1 completion time'   
)
t2_chart = alt.Chart(t2_df).mark_bar().encode(
    x=alt.X('user:N', title='Participant'),
    y=alt.Y('time_s:Q', title='Time (s)', scale=alt.Scale(domain=[0, 120])),
    column=alt.Column('app:N', title='App'),
    color=alt.Color('user:N', title='User')
).properties(
 title='Task 2 completion time'   
)
t1_chart | t2_chart

In [261]:
t1_chart = alt.Chart(t1_df).mark_boxplot(extent='min-max').encode(
    y=alt.Y('time_s:Q', title='Time (s)', scale=alt.Scale(domain=[0, 120])),
    column=alt.Column('app:N', title='App', header=alt.Header(orient='bottom', labelAngle=-80, labelAnchor='end', labelBaseline='bottom')),
    color=alt.Color('app:N', title='App')
).properties(
 title=alt.TitleParams(text='Task 1', anchor='middle', align='center')
)
t2_chart = alt.Chart(t2_df).mark_boxplot(extent='min-max').encode(
    y=alt.Y('time_s:Q', title='Time (s)', scale=alt.Scale(domain=[0, 120])),
    column=alt.Column('app:N', title='App', header=alt.Header(orient='bottom', labelAngle=-80, labelAnchor='end', labelBaseline='bottom')),
    color=alt.Color('app:N', title='App')
).properties(
 title=alt.TitleParams(text='Task 2', anchor='middle', align='center')
)
chart = (t1_chart | t2_chart).properties(title=alt.TitleParams(text='Distribution of task completion times', anchor='start'))
chart.save(join("plots", "task_time_distribution.html"))
chart

In [262]:
merged_df = t1_df.merge(t2_df, on='user')

In [263]:
chart = alt.Chart(df).mark_line(point=True).encode(
    x=alt.X('app:N', title='App'),
    y=alt.Y('time_s:Q', title='Time (s)'),
    color=alt.Color('user:N', title='Participant'),
    column=alt.Column('task:N', title=None)
).properties(title='Participant-level task completion times by task')
chart.save(join("plots", "participant_task_times_by_task.html"))
chart

  for col_name, dtype in df.dtypes.iteritems():


In [264]:
chart = alt.Chart(df).mark_line(point=True).encode(
    x=alt.X('task:O', title='Task'),
    y=alt.Y('time_s:Q', title='Time (s)'),
    color=alt.Color('user:N', title='Participant'),
    column=alt.Column('app:N', title=None)
).properties(title='Participant-level task completion times by interface')
chart.save(join("plots", "participant_task_times_by_interface.html"))
chart

In [283]:
t1_df[(t1_df["app"] == "when2meet") & (t1_df["week"] == "W2")]

Unnamed: 0,user,app,task,week,time,timeUnit,timeString,accuracy,num_selections,time_s
0,P1,when2meet,T1,W2,44858,ms,44 seconds,1.0,61.0,44.858
8,P3,when2meet,T1,W2,51828,ms,51 seconds,0.95082,58.0,51.828
16,P5,when2meet,T1,W2,109249,ms,109 seconds,0.868852,59.0,109.249


In [265]:
t1_df

Unnamed: 0,user,app,task,week,time,timeUnit,timeString,accuracy,num_selections,time_s
0,P1,when2meet,T1,W2,44858,ms,44 seconds,1.0,61.0,44.858
2,P1,doodle,T1,W1,38701,ms,38 seconds,1.0,5.0,38.701
4,P2,when2meet,T1,W1,117129,ms,117 seconds,0.448276,26.0,117.129
6,P2,doodle,T1,W2,82263,ms,82 seconds,1.0,3.0,82.263
8,P3,when2meet,T1,W2,51828,ms,51 seconds,0.95082,58.0,51.828
10,P3,doodle,T1,W1,81903,ms,81 seconds,1.0,5.0,81.903
12,P4,when2meet,T1,W1,67774,ms,67 seconds,0.448276,26.0,67.774
14,P4,doodle,T1,W2,45774,ms,45 seconds,1.0,3.0,45.774
16,P5,when2meet,T1,W2,109249,ms,109 seconds,0.868852,59.0,109.249
18,P5,doodle,T1,W1,44088,ms,44 seconds,1.0,5.0,44.088


In [266]:
chart = alt.Chart(t1_df).mark_circle(size=100).encode(
    x=alt.X("user:N", title="Participant"),
    y=alt.Y("accuracy:Q", title="Accuracy"),
    column=alt.Column("app:N", title=None),
    color=alt.Color("week:N", title="Week")
).properties(title="Accuracy by interface")
chart.save(join("plots", "accuracy_by_app.html"))
chart

  for col_name, dtype in df.dtypes.iteritems():


In [273]:
chart = alt.Chart(t1_df).mark_circle(size=100).encode(
    y=alt.Y("accuracy:Q", title="Accuracy"),
    x=alt.X("time_s:Q", title="Time (s)"),
    color=alt.Color("week:N", title="Week")
).properties(title="Accuracy by time")
chart.save(join("plots", "accuracy_by_time.html"))
chart

In [276]:
chart = alt.Chart(t2_df).mark_circle(size=100).encode(
    y=alt.Y("num_selections:Q", title="Number of selections"),
    x=alt.X("time_s:Q", title="Time (s)"),
    color=alt.Color("app:N", title="App")
).properties(title="Number of selections by time")
chart.save(join("plots", "num_selections_by_time.html"))
chart

In [267]:
t2_df

Unnamed: 0,user,app,task,week,time,timeUnit,timeString,accuracy,num_selections,time_s
1,P1,when2meet,T2,W2,47341,ms,47 seconds,0.0,65.0,47.341
3,P1,doodle,T2,W1,36690,ms,36 seconds,0.0,2.0,36.69
5,P2,when2meet,T2,W1,57733,ms,57 seconds,0.0,10.0,57.733
7,P2,doodle,T2,W2,71448,ms,71 seconds,0.0,3.0,71.448
9,P3,when2meet,T2,W2,5467,ms,5 seconds,0.0,32.0,5.467
11,P3,doodle,T2,W1,14387,ms,14 seconds,0.0,3.0,14.387
13,P4,when2meet,T2,W1,43885,ms,43 seconds,0.0,13.0,43.885
15,P4,doodle,T2,W2,56829,ms,56 seconds,0.0,2.0,56.829
17,P5,when2meet,T2,W2,81642,ms,81 seconds,0.0,20.0,81.642
19,P5,doodle,T2,W1,41627,ms,41 seconds,0.0,2.0,41.627
