In [1]:
import json
import sqlite3
import numpy as np

In [2]:
LOG_FILE = "results.log"
DB_FILE = "results.db"

## Populate database

In [3]:
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
c.execute(
    """CREATE TABLE sessions
             (id text, ip text, ua text, cb text, ww integer, wo text)"""
)
c.execute(
    """CREATE TABLE picks
             (id text, ip text, c1 text, c2 text, o text, dm integer, sp integer, cp integer, np integer)"""
)

with open(LOG_FILE) as log_file:
    sessions = picks = bad_picks = 0
    for line in log_file:
        line = json.loads(line)
        if line["type"] == "session":
            sessions += 1
            c.execute(
                "INSERT INTO sessions VALUES (?, ?, ?, ?, ?, ?)",
                (
                    line["id"],
                    line["ip"],
                    line["ua"],
                    line["cbtq"],
                    line["ww"],
                    line["wo"],
                ),
            )
        elif line["type"] == "pick":
            picks += 1
            c.execute(
                "INSERT INTO picks VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                (
                    line["id"],
                    line["ip"],
                    line["c1"],
                    line["c2"],
                    line["o"],
                    line["dm"],
                    line["sp"],
                    line["cp"],
                    line["np"],
                ),
            )
        elif line["type"] in ["badpick", "badmatch"]:
            bad_picks += 1
        else:
            raise ValueError(f"Invalid log entry type!: {line['type']}")

if (
    c.execute("SELECT count(*) FROM sessions").fetchone()[0]
    > c.execute("SELECT count(DISTINCT id) FROM sessions").fetchone()[0]
):
    raise ValueError("There are duplicate sessions!")

ids = [row for row in c.execute("SELECT id FROM sessions")]
pick_counts = []
empty_sessions = 0
for row in ids:
    pick_count = c.execute("SELECT count(*) FROM picks WHERE id = ?", row).fetchone()[0]
    if pick_count < 100:
        c.execute("DELETE FROM sessions WHERE id = ?", row)
        c.execute("DELETE FROM picks WHERE id = ?", row)
        empty_sessions += 1
    else:
        pick_counts.append(pick_count)

session_count = sessions - empty_sessions

conn.commit()
conn.close()

## Session statistics

In [4]:
print("sessions:", sessions)
print("empty sessions:", empty_sessions)
print("good sessions:", session_count)
print("picks:", picks)
print("bad picks:", bad_picks)
print("average picks per session:", np.mean(pick_counts))
print("median picks per session:", np.median(pick_counts))
print("min picks per session:", np.min(pick_counts))
print("max picks per session:", np.max(pick_counts))

sessions: 10
empty sessions: 3
good sessions: 7
picks: 756
bad picks: 0
average picks per session: 100.0
median picks per session: 100.0
min picks per session: 100
max picks per session: 100


## Picks per set length

Repeat survey only looked at eight-color sets.

In [5]:
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()

In [6]:
for num_colors in [6, 8, 10]:
    print(f"{c.execute('SELECT COUNT(c1) FROM picks WHERE length(c1) = ?', (num_colors * 7 - 1,)).fetchone()[0]:5d} {num_colors:2d}")

    0  6
  700  8
    0 10


## Self-reported color-vision-deficiency information

Respondees were asked if they had a color-vision deficiency at the start of the survey.

In [7]:
cvd_types = {
    "na": "Not applicable",
    "dta": "Decline to answer",
    "dk": "Don't know",
    "dy": "Deuteranomaly (partial green deficiency) [most common]",
    "py": "Protanomaly (partial red deficiency)",
    "da": "Deuteranopia (complete green deficiency)",
    "pa": "Protanopia (complete red deficiency)",
    "ty": "Tritanomaly (partial blue deficiency) [rare]",
    "ta": "Tritanopia (complete blue deficiency) [rare]",
    "m": "Monochromasy (complete colorblindness) [rare]",
    "o": "Other",
}

In [8]:
total_cvd_count = 0
for t in cvd_types:
    count = c.execute('SELECT COUNT(cb) FROM sessions WHERE cb = ?', (t,)).fetchone()[0]
    print(f'{count:4d}', cvd_types[t])
    if t not in ['na', 'dta', 'dk']:
        total_cvd_count += count
print(f'{total_cvd_count:4d} CVD total: {total_cvd_count / session_count * 100:.1f}%')

   6 Not applicable
   0 Decline to answer
   0 Don't know
   0 Deuteranomaly (partial green deficiency) [most common]
   1 Protanomaly (partial red deficiency)
   0 Deuteranopia (complete green deficiency)
   0 Protanopia (complete red deficiency)
   0 Tritanomaly (partial blue deficiency) [rare]
   0 Tritanopia (complete blue deficiency) [rare]
   0 Monochromasy (complete colorblindness) [rare]
   0 Other
   1 CVD total: 14.3%


## Drop sessions table for privacy reasons

In [9]:
c.execute("DROP TABLE sessions")
c.execute("VACUUM")
conn.commit()
conn.close()