We need the iso8601 module, unfortunately, to do robust date parsing.

In [16]:
import collections
import csv
import datetime
import json
import glob
import os
import codecs

import iso8601

Specify the folder in which the JSON log files are kept. They will be read and concatenated.

In [17]:
folder = "userlogs/lauren"
events = []

In [18]:
filenames = glob.glob(os.path.join(folder, "*.json"))
filenames.sort()

for filename in filenames:
    with open(filename, encoding="utf-8-sig") as f:
        if "static" in filename: # Add 'static' if file was downloaded locally, not generated by the node.js server.
            txt = f.read()
            es = json.loads(txt)
            for e in es:
                events.append( {"0":e[0], "1":e[1]} )
        else:
            for line in f:
                events.append(json.loads(line))

In [19]:
tasks = [event for event in events if event["0"] in ("startTask", "endTask")]

In [20]:
print(filenames)

['userlogs/lauren/static_log_1507229018171.json']


We need to synthesize an endTask for the last level, since normally this is only recorded on level transitions.

In [21]:
last_level = tasks[-1]["1"]["quest_id"]
if tasks[-1]["0"] != "endTask":
    for event in events[::-1]:
        if event["1"].get("quest_id") == last_level:
            fake_task = event.copy()
            fake_task["0"] = "endTask"
            tasks.append(fake_task)
            break

In [22]:
completion_times = collections.defaultdict(list)
start_times = {}
for task in tasks:
    if task["0"] == "startTask":
        quest_id = task["1"]["quest_id"]
        if quest_id in start_times:
            print("Task", quest_id, "already has a start time.")
        if "timestamp" in task:
            start_times[quest_id] = iso8601.parse_date(ts)
        else:
            start_times[quest_id] = datetime.datetime.fromtimestamp(task["1"]["client_timestamp"] / 1000)
    elif task["0"] == "endTask":
        quest_id = task["1"]["quest_id"]
        start_time = start_times.get(quest_id)
        if not start_time:
            print("WARNING: Level", quest_id, "does not have a start time.")
            continue
        if "timestamp" in task:
            end_time = iso8601.parse_date(task["timestamp"])
        else:
            end_time = datetime.datetime.fromtimestamp(task["1"]["client_timestamp"] / 1000)
        completion_times[quest_id].append((end_time - start_time).total_seconds())
        del start_times[quest_id]

Task 0 already has a start time.


In [23]:
total_times = {}
for level_id, times in completion_times.items():
    total_times[level_id] = sum(times)

In [24]:
for t in total_times:
    print(total_times[t])

102.25
10.765
13.334
4.276
33.019999999999996
36.713
161.956
16.387
15.404
12.036
10.541
17.841
53.760999999999996
272.827
8.542
10.843
18.596
22.369
76.08500000000001
14.884
124.786
28.321
121.67099999999999
18.596
32.256
17.6
52.766999999999996
234.671
168.243
95.351
153.834
219.29199999999997
7.325
48.737
37.315
21.019
78.473
44.733
138.659
92.958
52.547
24.047
273.364
31.127
709.271


In [25]:
output_path = os.path.join(folder, "output.csv")
with open(output_path, "w") as output_file:
    writer = csv.writer(output_file)
    for level_id, total_seconds in sorted(total_times.items(), key=lambda x: x[0]):
        writer.writerow((level_id, "{:.02f}".format(total_seconds)))

In [26]:
from IPython.display import FileLink
FileLink(output_path)