We need the iso8601 module, unfortunately, to do robust date parsing.

In [2]:
import collections
import csv
import datetime
import json
import glob
import os

import iso8601

Specify the folder in which the JSON log files are kept. They will be read and concatenated.

In [3]:
folder = "./p7"
events = []

In [4]:
filenames = glob.glob(os.path.join(folder, "*.json"))
filenames.sort()

for filename in filenames:
    with open(filename) as f:
        for line in f:
            events.append(json.loads(line))

In [5]:
tasks = [event for event in events if event["0"] in ("startTask", "endTask")]

We need to synthesize an endTask for the last level, since normally this is only recorded on level transitions.

In [6]:
last_level = tasks[-1]["1"]["quest_id"]
if tasks[-1]["0"] != "endTask":
    for event in events[::-1]:
        if event["1"].get("quest_id") == last_level:
            fake_task = event.copy()
            fake_task["0"] = "endTask"
            tasks.append(fake_task)
            break

IndexError: list index out of range

In [None]:
completion_times = collections.defaultdict(list)
start_times = {}
for task in tasks:
    if task["0"] == "startTask":
        quest_id = task["1"]["quest_id"]
        if quest_id in start_times:
            print("Task", quest_id, "already has a start time.")
        start_times[quest_id] = iso8601.parse_date(task["timestamp"])
    elif task["0"] == "endTask":
        quest_id = task["1"]["quest_id"]
        start_time = start_times.get(quest_id)
        if not start_time:
            print("WARNING: Level", quest_id, "does not have a start time.")
            continue
        end_time = iso8601.parse_date(task["timestamp"])
        completion_times[quest_id].append((end_time - start_time).total_seconds())
        del start_times[quest_id]

In [None]:
total_times = {}
for level_id, times in completion_times.items():
    total_times[level_id] = sum(times)

In [None]:
total_times

In [None]:
output_path = os.path.join(folder, "output.csv")
with open(output_path, "w") as output_file:
    writer = csv.writer(output_file)
    for level_id, total_seconds in sorted(total_times.items(), key=lambda x: x[0]):
        writer.writerow((level_id, "{:.02f}".format(total_seconds)))

In [None]:
from IPython.display import FileLink
FileLink(output_path)