In [1]:
import sqlite3
import numpy as np

In [2]:
import sys
sys.path.append('numpy-version')
from numpy_model import SetModel, CycleModel

In [3]:
DB_FILE = "../validation-survey-results/results.db"

## Evaluate sets and cycles using aesthetic model

In [4]:
set_model = SetModel("numpy-version/set_model_weights.npz.gz")
cycle_model = CycleModel("numpy-version/cycle_model_weights.npz.gz")

In [5]:
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
set_scores = []
for i in range(50):
    sets = c.execute("SELECT c1, c2 FROM picks WHERE np = ?", (i,)).fetchone()
    set_scores.append((set_model(sets[0].split(",")), set_model(sets[1].split(","))))
conn.close()
set_scores = np.array(set_scores).T

In [6]:
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
cycle_scores = []
for i in range(50):
    set1, set2, order = c.execute(
        "SELECT c1, c2, o FROM picks WHERE np = ?", (i,)
    ).fetchone()
    set_pick = set1 if i % 2 == 0 else set2
    set_pick = np.array(set_pick.split(","))
    order = order.split(",")
    cycle_scores.append(
        (
            cycle_model(set_pick[[int(o) for o in order[0]]]),
            cycle_model(set_pick[[int(o) for o in order[1]]]),
            cycle_model(set_pick[[int(o) for o in order[2]]]),
            cycle_model(set_pick[[int(o) for o in order[3]]]),
        )
    )
conn.close()
cycle_scores = np.array(cycle_scores).T

## Evaluate validation survey results

Self-consistency for individual subjects, overall consistency, and accuracy of aesthetic model are considered for both sets and cycles.

In [7]:
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
set_a = []
set_b = []
for i in range(50):
    set_a.append(list(c.execute("SELECT sp FROM picks WHERE np = ?", (i,))))
    set_b.append(list(c.execute("SELECT sp FROM picks WHERE np = ?", (i + 50,))))
conn.close()
set_a = np.array(set_a).reshape(50, -1)
set_b = np.array(set_b).reshape(50, -1)
print("first set matches second set: {:.2f}".format(np.mean(set_a == set_b)))

first set matches second set: 0.63


In [8]:
set_all = np.append(set_a.T, set_b.T, axis=0)
results = []
for i in range(set_all.shape[0] // 2):
    # Exclude the current subject from mode to avoid bias
    idx = list(range(set_all.shape[0]))
    idx.pop(set_all.shape[0] // 2 + i)
    idx.pop(i)
    # Consider mode
    response_count = np.array([np.sum(set_all[idx] == c, axis=0) for c in range(1, 3)])
    max_count = np.max(response_count, axis=0)
    # Consider both responses from current subject
    j = set_all.shape[0] // 2 + i
    # If tied with mode, divide by tie count
    results.append(
        np.mean(
            (np.choose(set_all[i] - 1, response_count) == max_count)
            / np.sum(response_count == max_count, axis=0)
        )
    )
    results.append(
        np.mean(
            (np.choose(set_all[j] - 1, response_count) == max_count)
            / np.sum(response_count == max_count, axis=0)
        )
    )
np.mean(results)
print("set matches mean of other subjects: {:.2f}".format(np.mean(results)))

set matches mean of other subjects: 0.58


In [9]:
print(
    "set prediction matches responses: {:.2f}".format(
        np.mean((set_all - 1) == np.argmax(set_scores, axis=0))
    )
)

set prediction matches responses: 0.59


In [10]:
# Consider mode
response_count = np.array([np.sum(set_all == c, axis=0) for c in range(1, 3)])
max_count = np.max(response_count, axis=0)
print(
    "set prediction matches mode: {:.2f}".format(
        np.mean(
            (np.choose(np.argmax(set_scores, axis=0), response_count) == max_count)
            / np.sum(response_count == max_count, axis=0)
        )
    )
)

set prediction matches mode: 0.70


In [11]:
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
cycle_a = []
cycle_b = []
for i in range(50):
    cycle_a.append(list(c.execute("SELECT cp FROM picks WHERE np = ?", (i,))))
    cycle_b.append(list(c.execute("SELECT cp FROM picks WHERE np = ?", (i + 50,))))
conn.close()
cycle_a = np.array(cycle_a).reshape(50, -1)
cycle_b = np.array(cycle_b).reshape(50, -1)
print("first cycle matches second cycle: {:.2f}".format(np.mean(cycle_a == cycle_b)))

first cycle matches second cycle: 0.43


In [12]:
cycle_all = np.append(cycle_a.T, cycle_b.T, axis=0)
results = []
for i in range(cycle_all.shape[0] // 2):
    # Exclude the current subject from mode to avoid bias
    idx = list(range(cycle_all.shape[0]))
    idx.pop(cycle_all.shape[0] // 2 + i)
    idx.pop(i)
    # Consider mode
    response_count = np.array(
        [np.sum(cycle_all[idx] == c, axis=0) for c in range(1, 5)]
    )
    max_count = np.max(response_count, axis=0)
    # Consider both responses from current subject
    j = cycle_all.shape[0] // 2 + i
    # If tied with mode, divide by tie count
    results.append(
        np.mean(
            (np.choose(cycle_all[i] - 1, response_count) == max_count)
            / np.sum(response_count == max_count, axis=0)
        )
    )
    results.append(
        np.mean(
            (np.choose(cycle_all[j] - 1, response_count) == max_count)
            / np.sum(response_count == max_count, axis=0)
        )
    )
np.mean(results)
print("cycle matches mode of other subjects: {:.2f}".format(np.mean(results)))

cycle matches mode of other subjects: 0.31


In [13]:
print(
    "cycle prediction matches responses: {:.2f}".format(
        np.mean((cycle_all - 1) == np.argmax(cycle_scores, axis=0))
    )
)

cycle prediction matches responses: 0.32


In [14]:
# Consider mode
response_count = np.array([np.sum(cycle_all == c, axis=0) for c in range(1, 5)])
max_count = np.max(response_count, axis=0)
print(
    "cycle prediction matches mode: {:.2f}".format(
        np.mean(
            (np.choose(np.argmax(cycle_scores, axis=0), response_count) == max_count)
            / np.sum(response_count == max_count, axis=0)
        )
    )
)

cycle prediction matches mode: 0.37
