We need the iso8601 module, unfortunately, to do robust date parsing.

In [14]:
import collections
import csv
import datetime
import json
import glob
import os
import codecs

import iso8601

Specify the folder in which the JSON log files are kept. They will be read and concatenated.

In [15]:
folder = "userlogs/jeremy"
events = []

In [34]:
filenames = glob.glob(os.path.join(folder, "*.json"))
filenames.sort()

for filename in filenames:
    with open(filename, encoding="utf-8-sig") as f:
        if "static" in filename: # Add 'static' if file was downloaded locally, not generated by the node.js server.
            txt = f.read()
            es = json.loads(txt)
            for e in es:
                events.append( {"0":e[0], "1":e[1]} )
        else:
            for line in f:
                events.append(json.loads(line))

In [35]:
tasks = [event for event in events if event["0"] in ("startTask", "endTask")]

In [36]:
print(filenames)

['userlogs/jeremy/static_log_1505995872926.json']


We need to synthesize an endTask for the last level, since normally this is only recorded on level transitions.

In [37]:
last_level = tasks[-1]["1"]["quest_id"]
if tasks[-1]["0"] != "endTask":
    for event in events[::-1]:
        if event["1"].get("quest_id") == last_level:
            fake_task = event.copy()
            fake_task["0"] = "endTask"
            tasks.append(fake_task)
            break

In [48]:
completion_times = collections.defaultdict(list)
start_times = {}
for task in tasks:
    if task["0"] == "startTask":
        quest_id = task["1"]["quest_id"]
        if quest_id in start_times:
            print("Task", quest_id, "already has a start time.")
        if "timestamp" in task:
            start_times[quest_id] = iso8601.parse_date(ts)
        else:
            start_times[quest_id] = datetime.datetime.fromtimestamp(task["1"]["client_timestamp"] / 1000)
    elif task["0"] == "endTask":
        quest_id = task["1"]["quest_id"]
        start_time = start_times.get(quest_id)
        if not start_time:
            print("WARNING: Level", quest_id, "does not have a start time.")
            continue
        if "timestamp" in task:
            end_time = iso8601.parse_date(task["timestamp"])
        else:
            end_time = datetime.datetime.fromtimestamp(task["1"]["client_timestamp"] / 1000)
        completion_times[quest_id].append((end_time - start_time).total_seconds())
        del start_times[quest_id]

In [49]:
total_times = {}
for level_id, times in completion_times.items():
    total_times[level_id] = sum(times)

In [52]:
for t in total_times:
    print(total_times[t])

22.549
25.471
13.929
9.821
10.269
9.473
21.622
17.222
6.376
8.747
11.582
4.838
8.348
14.088
8.336
17.21
22.758
20.169
26.13
18.979
18.273
15.985
66.249
26.567
9.544
11.307
14.613
43.370999999999995
197.226
223.499
106.986
22.748
20.448999999999998
11.706
17.297
28.541
16.748
26.59
54.178
19.161
27.962
16.015
24.868
89.51599999999999
160.595
44.7
174.756
8.435
61.248999999999995
27.399
7.937
22.567
13.734
35.088
25.225
119.84200000000001
28.166
35.283
21.559
20.344
77.842


In [None]:
output_path = os.path.join(folder, "output.csv")
with open(output_path, "w") as output_file:
    writer = csv.writer(output_file)
    for level_id, total_seconds in sorted(total_times.items(), key=lambda x: x[0]):
        writer.writerow((level_id, "{:.02f}".format(total_seconds)))

In [None]:
from IPython.display import FileLink
FileLink(output_path)