In [15]:
import pandas as pd
import json
from pathlib import Path
from datetime import datetime

from ds import Participant, GazePath

In [22]:
EXCLUDE_NAMES = ["Amogh Mannekote", "Mansi Singh", "Anviksha Sharma", "kmkm", "ash", "mansi", "hju bjh", "Sankalp Mathur"]

In [23]:
participants = []
idx = 1
for p in Path("../name-emails/").glob("*.json"):
    with open(p) as f:
        name_email = json.load(f)
        if "calibration_score" not in name_email or name_email["name"] in EXCLUDE_NAMES:
            continue
        participants.append(Participant(
            name=name_email["name"],
            email=name_email["email_address"],
            calibaration_quality=int(name_email["calibration_score"]),
            start_time=datetime.utcfromtimestamp(float(p.stem)),
        ))

In [24]:
participants[:2]

[Participant(name='John Ng', email='johnng@ufl.edu', start_time=datetime.datetime(2022, 3, 24, 20, 58, 19, 116706), calibaration_quality=3, gaze_paths=[]),
 Participant(name='Mariana Molano', email='marianamolano@ufl.edu', start_time=datetime.datetime(2022, 3, 22, 17, 5, 41, 671388), calibaration_quality=3, gaze_paths=[])]

In [25]:
# Only retain the latest log for duplicate logs from a single name
filtered_participants = []
for p in sorted(participants, key=lambda p: p.start_time, reverse=True):
    if p.name not in [pp.name for pp in filtered_participants]:
        filtered_participants.append(p)
print(len(filtered_participants))
[(p.name, str(p.start_time)) for p in filtered_participants]

18


[('Heting Wang', '2022-03-25 21:03:46.248357'),
 ('Raghav Gupta', '2022-03-25 19:39:57.838126'),
 ('Eric Navar', '2022-03-25 19:02:59.850827'),
 ('Jayavidhi Kumar', '2022-03-25 18:22:16.811646'),
 ('Yingbo Ma', '2022-03-25 14:14:15.168916'),
 ('Jasmine McKenzie', '2022-03-25 13:01:20.479027'),
 ('John Ng', '2022-03-24 20:58:19.116706'),
 ('Patriel Stapleton', '2022-03-24 18:59:05.612296'),
 ('Amal Hashky', '2022-03-24 17:02:48.240804'),
 ('Katarina Jurczyk', '2022-03-23 20:59:18.731983'),
 ('Nishant Agrawal', '2022-03-22 19:45:52.168187'),
 ('Monica Bhargavi Kodali', '2022-03-22 18:52:09.250428'),
 ('Mariana Molano', '2022-03-22 17:05:41.671388'),
 ('Josh Abraham', '2022-03-22 16:06:29.154850'),
 ('Shaina Murphy', '2022-03-22 14:56:39.521831'),
 ('Hengxu You', '2022-03-21 21:28:04.134762'),
 ('jahnavi Paruchuri', '2022-03-21 21:14:20.903205'),
 ('Sri Chaitanya Nulu', '2022-03-21 18:56:43.324268')]

In [26]:
for idx, participant in enumerate(filtered_participants):
    gaze_dir = Path("../gaze-paths") / participant.email
    print(participant.name, participant.email)
    for dialogue_file in gaze_dir.glob("*.json"):
        gaze_log = json.load(open(dialogue_file, "r"))
        gaze_path = GazePath(
            submit_time=gaze_log["timestamp"],
            dst=gaze_log["dst"],
            turns_time_series={
                float(timestamp) / 1000.0: int(turn)
                for timestamp, turn in gaze_log["turnsTimeSeries"].items()
            },
            prompt=gaze_log["prompt"]
        )
        participant.gaze_paths.append(gaze_path)

Heting Wang heting.wang@ufl.edu
Raghav Gupta gupta.raghav@ufl.edu
Eric Navar ericnavar@ufl.edu
Jayavidhi Kumar kumar.j@ufl.edu
Yingbo Ma yingbo.ma@ufl.edu
Jasmine McKenzie jasminemckenzie@ufl.edu
John Ng johnng@ufl.edu
Patriel Stapleton pstapleton@ufl.edu
Amal Hashky ahashky@ufl.edu
Katarina Jurczyk kjurczyk@ufl.edu
Nishant Agrawal nag2965@gmail.com
Monica Bhargavi Kodali mkodali@ufl.edu
Mariana Molano marianamolano@ufl.edu
Josh Abraham joshabraham@ufl.edu
Shaina Murphy shainanmurphy@ufl.edu
Hengxu You you.h@ufl.edu
jahnavi Paruchuri paruchurijahnavi@ufl.edu
Sri Chaitanya Nulu srichaitanyanulu@gmail.com


In [27]:
import pickle
if not Path("processed/").exists():
    Path("processed/").mkdir()
pickle.dump(filtered_participants, open("processed/participants.pkl", "wb"))