In [1]:
import sqlite3
import numpy as np
import colorspacious
from sklearn import linear_model

In [2]:
ALL_NUM_COLORS = [6, 8, 10]
DB_FILE = "../survey-results/results.db"

In [3]:
def to_rgb_jab(color):
    """
    Convert hex color code (without `#`) to sRGB255 and CAM02-UCS.
    """
    rgb = [(int(i[:2], 16), int(i[2:4], 16), int(i[4:], 16)) for i in color]
    jab = [colorspacious.cspace_convert(i, "sRGB255", "CAM02-UCS") for i in rgb]
    return np.array(rgb), np.array(jab)

In [4]:
# Load survey data
data_rgb = {}
data_jab = {}
targets = {}
min_count = 1e10

conn = sqlite3.connect(DB_FILE)
c = conn.cursor()

for num_colors in ALL_NUM_COLORS:
    count = 0
    data_jab[num_colors] = []
    data_rgb[num_colors] = []
    targets[num_colors] = []
    for row in c.execute(
        f"SELECT c1, c2, sp FROM picks WHERE length(c1) = {num_colors * 7 - 1}"
    ):
        count += 1
        # Convert to Jab [CAM02-UCS based]
        rgb1, jab1 = to_rgb_jab(row[0].split(","))
        rgb2, jab2 = to_rgb_jab(row[1].split(","))
        # Add to data arrays
        data_rgb[num_colors].append(np.array((rgb1, rgb2)).flatten())
        data_jab[num_colors].append(np.array((jab1, jab2)).flatten())
        targets[num_colors].append(row[2] - 1)
    data_rgb[num_colors] = np.array(data_rgb[num_colors])
    data_jab[num_colors] = np.array(data_jab[num_colors])
    targets[num_colors] = np.array(targets[num_colors])
    min_count = min(min_count, count)
    print(num_colors, count)

conn.close()

6 10347
8 10371
10 1705


In [5]:
stats1 = {}
stats2 = {}
stats = [np.mean, np.min, np.max]
for stat in stats:
    stats1["c" + stat.__name__] = np.array([])
    stats2["c" + stat.__name__] = np.array([])
for stat in stats:
    stats1["l" + stat.__name__] = np.array([])
    stats2["l" + stat.__name__] = np.array([])
for nc in ALL_NUM_COLORS:
    tmp1 = data_jab[nc][:, : nc * 3].reshape((data_jab[nc].shape[0], nc, 3))
    tmp2 = data_jab[nc][:, nc * 3 :].reshape((data_jab[nc].shape[0], nc, 3))
    for stat in stats:
        c1 = stat(np.sqrt(tmp1[:, :, 1] ** 2 + tmp1[:, :, 2] ** 2), axis=1)
        c2 = stat(np.sqrt(tmp2[:, :, 1] ** 2 + tmp2[:, :, 2] ** 2), axis=1)
        stats1["c" + stat.__name__] = np.append(stats1["c" + stat.__name__], c1)
        stats2["c" + stat.__name__] = np.append(stats2["c" + stat.__name__], c2)
        print(
            f"   {stat.__name__} chroma {nc:2d}: {np.mean((c1 > c2) ^ targets[nc]):.3f}"
        )
        l1 = stat(tmp1[:, :, 0], axis=1)
        l2 = stat(tmp2[:, :, 0], axis=1)
        stats1["l" + stat.__name__] = np.append(stats1["l" + stat.__name__], l1)
        stats2["l" + stat.__name__] = np.append(stats2["l" + stat.__name__], l2)
        print(
            f"{stat.__name__} lightness {nc:2d}: {np.mean((l1 > l2) ^ targets[nc]):.3f}"
        )

   mean chroma  6: 0.517
mean lightness  6: 0.500
   amin chroma  6: 0.516
amin lightness  6: 0.497
   amax chroma  6: 0.477
amax lightness  6: 0.491
   mean chroma  8: 0.526
mean lightness  8: 0.496
   amin chroma  8: 0.527
amin lightness  8: 0.503
   amax chroma  8: 0.478
amax lightness  8: 0.494
   mean chroma 10: 0.527
mean lightness 10: 0.501
   amin chroma 10: 0.528
amin lightness 10: 0.505
   amax chroma 10: 0.480
amax lightness 10: 0.499


In [6]:
reg = linear_model.LinearRegression()
tmp = (np.array(list(stats1.values())) - np.array(list(stats2.values()))).T
reg.fit(tmp, np.concatenate(list(targets.values())))
coef_sum = np.sum(np.abs(reg.coef_))
print("coefficient fractions:")
for i, k in enumerate(stats1.keys()):
    print(k, f"{reg.coef_[i] / coef_sum:6.3f}")
print(
    f"\naccuracy:\n{np.mean((reg.predict(tmp) < 0.5) ^ np.concatenate(list(targets.values()))):.3f}"
)

coefficient fractions:
cmean -0.366
camin -0.036
camax  0.316
lmean -0.086
lamin -0.027
lamax  0.170

accuracy:
0.543


In [7]:
print(
    f"accuracy  6: {np.mean((reg.predict(tmp[:targets[6].size]) < 0.5) ^ targets[6]):.3f}"
)
print(
    f"accuracy  8: {np.mean((reg.predict(tmp[targets[6].size:targets[6].size + targets[8].size]) < 0.5) ^ targets[8]):.3f}"
)
print(
    f"accuracy 10: {np.mean((reg.predict(tmp[-targets[10].size:]) < 0.5) ^ targets[10]):.3f}"
)

accuracy  6: 0.539
accuracy  8: 0.548
accuracy 10: 0.546
