In [1]:
from joblib import dump, load
import numpy as np
import cv2
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import matplotlib.pyplot as plt

In [2]:
#import the clustered ds swatches
hsv_knn = load('ds_hsv_knn.joblib')

# Validation steps
1. import an image made from five swatches of ds paints
2. collect the hsv and shifted HSV values
3. Drop the neutral tones
4. Cluster on K-means++ and collect the top 5 clusters
5. Match those clusters to the hsv_knn model

## Import the test image
Order of test image:

alizarin_crimson  | Quin_Rose. | Lemon_Yellow | new_gamboge

french_ultramarine | Pth_bl_GS | Sap_green | Raw_umber

In [3]:
img = cv2.imread('/Users/macbook/Box/insight_project_data/test_image/eight_swatches.jpg')

In [None]:
#don't forget it is in brg
plt.imshow(img)

In [4]:
#convert to hsv
img_HSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

In [5]:
#convert to pixels
pixels = np.float32(img_HSV.reshape(-1, 3))

In [6]:
def shift_h_remove(data, v_thresh, s_thresh):
    """Produces shifted H values for color segmentation and removed neutral tones
    Inputs: data - list of pixel H, S, V values one entry per pixel
    Outputs: H, H120, H240
    """
    shifted_colors = []
    for i in range(0,len(data)):
        H = data[i][0]
        s = data[i][1]
        v = data[i][2]
        V_thres = 255*v_thresh
        S_thres = 255*s_thresh
        if (v > V_thres and s > S_thres):
            if H >= 120:
                H120 = H - 120
            else:
                H120 = H + 60
            if H >= 60:
                H240 = H - 60
            else:
                H240 = H + 120
            shifted_colors.append([H, s, v])        
        else:
           pass
        
        
    return shifted_colors

In [7]:
pixels_shift = shift_h_remove(pixels, .25, .25)

In [16]:
pixels_shift[0]

[178.0, 226.0, 237.0]

In [12]:
len(pixels)

90000

In [8]:
len(pixels_shift)

53568

In [9]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=20, random_state=0).fit(pixels_shift)
image2show = kmeans.cluster_centers_[kmeans.labels_]

In [18]:
kmeans_df = pd.DataFrame(image2show, columns=['h','s','v'])

In [20]:
kmeans_df['label'] = kmeans.labels_

In [22]:
kmeans_df.label.value_counts()

0     6696
11    6696
18    4045
17    3213
14    2831
12    2785
5     2736
16    2658
4     2651
9     2527
3     2481
7     2298
1     2085
15    2070
10    1786
6     1636
13    1340
8     1210
19     935
2      889
Name: label, dtype: int64

In [33]:
kmeans_top = kmeans_df[(kmeans_df['label'] == 0) | (kmeans_df['label'] == 11)| (kmeans_df['label'] == 18)| (kmeans_df['label'] == 17)| (kmeans_df['label'] == 14)| (kmeans_df['label'] == 12)| (kmeans_df['label'] == 5)| (kmeans_df['label'] == 16)]

In [34]:
kmeans_top

Unnamed: 0,h,s,v,label
3,177.790227,234.012761,231.979770,17
7,177.790227,234.012761,231.979770,17
12,177.790227,234.012761,231.979770,17
13,177.790227,234.012761,231.979770,17
14,177.790227,234.012761,231.979770,17
...,...,...,...,...
53554,19.335009,238.381688,119.669659,12
53555,19.335009,238.381688,119.669659,12
53556,19.335009,238.381688,119.669659,12
53557,19.335009,238.381688,119.669659,12


In [36]:
X = kmeans_top[['h','s','v']]

In [38]:
predict_colors = hsv_knn.predict(X)

In [40]:
colors2 = np.array(np.unique(predict_colors, return_counts=True)).T

In [41]:
colors2

array([['Alizarin Crimson', 3213],
       ['Lemon Yellow', 6696],
       ['New Gamboge', 6696],
       ['Phthalo Blue Green Shade', 4045],
       ['Quinacridone Rose', 5567],
       ['Raw Umber', 2785],
       ['Sap Green', 2658]], dtype=object)

In [10]:
kmeans.cluster_centers_

array([[ 29.11230585, 254.91114098, 254.61320191],
       [178.11846523, 242.30263789, 224.70359712],
       [ 48.04581006, 228.47150838, 107.89608939],
       [117.34864974, 174.60137042, 207.15155179],
       [100.83477933, 251.60958129, 122.38061109],
       [172.0869883 , 209.68567251, 244.25328947],
       [ 19.57675841, 221.87706422, 127.57737003],
       [ 19.12576153, 251.80678851, 111.78677111],
       [117.34876033, 186.68347107, 195.18347107],
       [175.78986941, 225.00949743, 238.91768896],
       [ 47.26169265, 251.61581292,  97.38975501],
       [ 22.53360215, 254.21654719, 253.99626643],
       [ 19.33500898, 238.38168761, 119.66965889],
       [ 39.39984768, 235.87357197, 121.12414318],
       [172.        , 198.26245143, 249.98163193],
       [117.42415459, 164.73043478, 217.11642512],
       [ 39.66104869, 250.91273408, 113.09662921],
       [177.7902272 , 234.01276066, 231.97976969],
       [ 99.06477132, 249.41854141, 133.46526576],
       [117.52406417, 154.52620

In [13]:
colors = np.array(np.unique(kmeans.labels_, return_counts=True)).T
print(colors)

[[   0 6696]
 [   1 2085]
 [   2  889]
 [   3 2481]
 [   4 2651]
 [   5 2736]
 [   6 1636]
 [   7 2298]
 [   8 1210]
 [   9 2527]
 [  10 1786]
 [  11 6696]
 [  12 2785]
 [  13 1340]
 [  14 2831]
 [  15 2070]
 [  16 2658]
 [  17 3213]
 [  18 4045]
 [  19  935]]


In [27]:
kmeans.labels_

array([ 9,  9,  9, ..., 12,  7,  7], dtype=int32)

In [29]:
predict_colors = hsv_knn.predict(pixels_shift)

In [30]:
predict_colors

array(['Alizarin Crimson', 'Alizarin Crimson', 'Alizarin Crimson', ...,
       'Raw Umber', 'Raw Umber', 'Raw Umber'], dtype=object)

In [32]:
colors = np.array(np.unique(predict_colors, return_counts=True)).T

In [33]:
colors

array([['Alizarin Crimson', 6418],
       ['Aussie Red Gold', 1],
       ['Burnt Umber', 4],
       ['Cascade Green', 26],
       ['Cerulean Blue Chromium', 462],
       ['Cobalt Blue', 673],
       ['French Ultramarine', 5484],
       ['Hansa Yellow Light', 583],
       ['Hansa Yellow Medium', 1339],
       ['Indian Yellow', 1091],
       ['Lemon Yellow', 4774],
       ['New Gamboge', 4752],
       ['Nickel Azo Yellow', 853],
       ['Opera Pink', 265],
       ['Permanent Alizarin Crimson', 18],
       ['Phthalo Blue Green Shade', 6687],
       ['Phthalo Blue Red Shade', 1],
       ['Phthalo Turquoise', 9],
       ['Quinacridone Pink', 19],
       ['Quinacridone Red', 1171],
       ['Quinacridone Rose', 5501],
       ['Raw Umber', 4961],
       ['Sap Green', 6440],
       ['Sepia', 1959],
       ['Ultramarine Blue', 77]], dtype=object)

In [34]:
predictions = pd.DataFrame(colors, columns=["name","pixels"])

In [39]:
predictions.sort_values(by=['pixels'], ascending = False)

Unnamed: 0,name,pixels
15,Phthalo Blue Green Shade,6687
22,Sap Green,6440
0,Alizarin Crimson,6418
20,Quinacridone Rose,5501
6,French Ultramarine,5484
21,Raw Umber,4961
10,Lemon Yellow,4774
11,New Gamboge,4752
23,Sepia,1959
8,Hansa Yellow Medium,1339
