In [14]:
%load_ext autoreload
%autoreload 2
import joblib 
import numpy as np

import matplotlib.pyplot as plt
import pandas as pd
from colorml.utils.utils import plot_prediction_dist, read_pickle, pairwise_delta_es

%matplotlib inline
from colorml.utils.utils import get_delta_e, pairwise_delta_es

import seaborn as sns

import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

from collections import Counter
import pickle

plt.rcParams['font.family'] = 'sans-serif'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
df = pd.read_csv('../data/color_feat_merged.csv')

In [3]:
color_counter = Counter(df['color_cleaned_y'].values)

In [4]:
color_counter['cherry red']

1

In [5]:
augmentation_dict = read_pickle('../data/augment_dict.pkl')

In [6]:
color_delta_es = {}
means = []
weights = []
for k,v in augmentation_dict.items():
    colors = len(v)
    meanc = []
    for i in range(colors):
        for j in range(colors): 
            if j>i: 
                meanc.append(get_delta_e(v[i], v[j], upscaled=True))
    color_delta_es[k] = meanc
    weights.extend([color_counter[k]] * len(meanc))
    means.extend(meanc)

In [7]:
len(weights)

31228

In [8]:
color_delta_es2 = {}
means2 = []
weights = []
for k,v in augmentation_dict.items():
    colors = len(v)
    meanc = []
    for i in range(colors):
        for j in range(colors): 
            if j>i: 
                meanc.append(get_delta_e(v[i], v[j], upscaled=True))
    color_delta_es2[k] = np.mean(meanc)
    weights.append(color_counter[k] / len(df))
    means2.append(np.mean(meanc))

In [9]:
color_delta_es_from_med = {}
means_from_med = []

for k,v in augmentation_dict.items():
    colors = len(v)
    meanc = []
    median_c = np.median(v, axis=0)
    for i in range(colors):
        meanc.append(get_delta_e(v[i], median_c, upscaled=True))
    color_delta_es_from_med[k] = meanc
    means_from_med.extend(meanc)

In [10]:
fig, ax = plt.subplots(1,1, figsize=(4.5,3.5), sharex=True)

ax.hist(means2, weights=weights, bins=60, cumulative=True, alpha=.5, density=True, label='weighted ')
ax.hist(means2, bins=60, cumulative=True, label='unweighted', alpha=.5, density=True)

ax.set_xlabel(r'mean pairwise colour distance $\Delta E^*_{ab}$ for a colour name')
ax.set_ylabel(r'cumulative distribution')

ax.vlines(16, 0, 1)
# ax.vlines(10, 0, 140)
ax.spines['top'].set_color('none')
ax.spines['right'].set_color('none')
ax.spines['left'].set_smart_bounds(True)
ax.spines['bottom'].set_smart_bounds(True)
ax.legend()

fig.tight_layout()
fig.savefig('../results/delta_e.pdf', bbox_inches='tight')

The set_smart_bounds function was deprecated in Matplotlib 3.2 and will be removed two minor releases later.
  del sys.path[0]
The set_smart_bounds function was deprecated in Matplotlib 3.2 and will be removed two minor releases later.
  


In [11]:
np.mean(np.array(means) * np.array(weights)/len(df)) 

ValueError: operands could not be broadcast together with shapes (31228,) (140,) 

In [12]:
colors_to_pick = {}

for threshold in [5, 10, 16]: 
    colors_to_pick_list = []
    for k, v in color_delta_es2.items(): 
        if v < threshold: 
            colors_to_pick_list.append(k)
            
    colors_to_pick[threshold] = colors_to_pick_list

In [15]:
with open('../data/color_threshold.pkl', 'wb') as fh:
    pickle.dump(colors_to_pick, fh)