In [1]:
import cv2
import glob
import os

from pyspark import SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.types import FloatType
from pyspark.sql.functions import udf

from sklearn.cluster import KMeans
from colormath.color_objects import LabColor
from colormath.color_diff import delta_e_cie2000
from colormath.color_objects import XYZColor, sRGBColor
from colormath.color_conversions import convert_color

from input_output_manager import IOManager

DATAFRAME_SCHEMA = ['image',
                    'color_1_r', 'color_1_g', 'color_1_b',
                    'color_2_r', 'color_2_g', 'color_2_b',
                    'color_3_r', 'color_3_g', 'color_3_b']

PARQUET_FILE_NAME = 'images_df_sample'

REMOTE = True
spark = SparkSession.builder.master("spark://192.168.1.2:7077").config(conf=SparkConf()).getOrCreate()

def image_id(image_path):
    return os.path.basename(image_path).replace('_r.jpg', '').strip()


def find_dominant_colors(image_path, kmeans_clusters=3):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.reshape((img.shape[0] * img.shape[1], 3))
    kmeans = KMeans(n_clusters=kmeans_clusters, random_state=1)
    kmeans.fit(img)

    attributes = [image_id(image_path)]
    for d_color in kmeans.cluster_centers_.astype(int):
        for rgb in d_color:
            attributes.append(rgb.item())
    return attributes


def assign_dominant_colors():
    images = glob.glob("/home/esteban/spark_test/test/images_r_sample/*.jpg")

    images_dominant_color_rdd = spark.sparkContext.parallelize(images).map(find_dominant_colors)
    images_dataframe = images_dominant_color_rdd.toDF(DATAFRAME_SCHEMA)
    images_dataframe.show()
    #io_manager = IOManager(spark)
    #io_manager.hdfs_save_dataframe_parquet(images_dataframe, PARQUET_FILE_NAME)


def get_delta_e(r_1, g_1, b_1, r_2, g_2, b_2):
    rgb_param = sRGBColor(r_1, g_1, b_1)
    image_dominant_color = sRGBColor(r_2, g_2, b_2)

    color_param_xyz = convert_color(rgb_param, XYZColor)
    color_image_xyz = convert_color(image_dominant_color, XYZColor)

    color_param_lab = convert_color(color_param_xyz, LabColor)
    color_image_lab = convert_color(color_image_xyz, LabColor)

    return delta_e_cie2000(color_param_lab, color_image_lab)


def get_matching_images(desired_color):
    delta_e_udf = udf(lambda r, g, b: get_delta_e(r, g, b, *desired_color), FloatType())
    io_manager = IOManager(spark)

    images_df_parquet = io_manager.hdfs_load_dataframe_parquet(PARQUET_FILE_NAME)
    images_df_parquet.createOrReplaceTempView("images_df_parquet")
    full_images_df = spark.sql('select * from images_df_parquet')
    delta_e_assigned_df = full_images_df.select('image',
                                                delta_e_udf('color_1_r', 'color_1_g', 'color_1_b').alias(
                                                    'color_1_delta_e'),
                                                delta_e_udf('color_2_r', 'color_2_g', 'color_2_b').alias(
                                                    'color_2_delta_e'),
                                                delta_e_udf('color_3_r', 'color_3_g', 'color_3_b').alias(
                                                    'color_3_delta_e'))
    return delta_e_assigned_df

In [8]:
assign_dominant_colors()

+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
|    image|color_1_r|color_1_g|color_1_b|color_2_r|color_2_g|color_2_b|color_3_r|color_3_g|color_3_b|
+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| ann1217a|       96|       89|       98|      168|      148|      162|       41|       39|       36|
| opo9927d|        0|        0|        0|      208|      126|       94|      135|       97|       89|
| opo9932c|       13|       13|        1|      253|      240|      216|      187|       78|        5|
| ann1208a|      244|      153|       19|      176|       80|        9|       78|       40|       19|
| opo9929b|      209|      191|      172|       10|        8|        2|      141|      118|       78|
| ann1406a|       56|       43|       34|       93|      101|      105|      150|      170|      181|
|heic0206c|      132|       45|       47|      216|      140|      124|       63| 

In [2]:
import ipywidgets as widgets

widgets.ColorPicker(
    concise=False,
    description='Pick a color',
    value='blue',
    disabled=False
)


ColorPicker(value='blue', description='Pick a color')

In [8]:
get_matching_images((0,191,255)).show()

+---------+---------------+---------------+---------------+
|    image|color_1_delta_e|color_2_delta_e|color_3_delta_e|
+---------+---------------+---------------+---------------+
| opo9929b|       91.10425|      137.31532|      109.99007|
| ann1406a|     102.747696|      47.268314|      28.239372|
|heic0206c|      131.25316|       112.5184|      171.51913|
|potw1422a|       91.89381|       92.22175|       127.3342|
|potw1444a|       33.77612|       132.7916|      28.154665|
| ann1217a|      148.30452|      197.95705|       96.06793|
| opo9927d|      139.78716|      104.39176|     108.259514|
| opo9932c|      141.24297|      100.33872|      102.27639|
| ann1208a|       90.66206|       101.6956|     115.569534|
+---------+---------------+---------------+---------------+



In [9]:
delta_e_assigned = get_matching_images((0,191,255))
delta_e_assigned.filter(delta_e_assigned.color_1_delta_e < 50).show()


+---------+---------------+---------------+---------------+
|    image|color_1_delta_e|color_2_delta_e|color_3_delta_e|
+---------+---------------+---------------+---------------+
|potw1444a|       33.77612|       132.7916|      28.154665|
+---------+---------------+---------------+---------------+



In [33]:
delta_e_assigned.filter('color_1_delta_e < 50 or color_2_delta_e < 50 or color_3_delta_e < 50').show()

+---------+---------------+---------------+---------------+
|    image|color_1_delta_e|color_2_delta_e|color_3_delta_e|
+---------+---------------+---------------+---------------+
| ann1406a|       95.06792|      42.996983|      27.130707|
|potw1444a|      37.052086|       122.3911|      36.429565|
+---------+---------------+---------------+---------------+



In [7]:
delta_e_assigned.orderBy('color_1_delta_e', ascending=1).show()

+---------+---------------+---------------+---------------+
|    image|color_1_delta_e|color_2_delta_e|color_3_delta_e|
+---------+---------------+---------------+---------------+
|potw1444a|      37.052086|       122.3911|      36.429565|
|potw1422a|        74.3123|      86.438385|     108.851074|
| opo9929b|       77.51546|      129.31705|       91.21968|
| ann1208a|       89.45832|       91.29653|      103.51094|
| ann1406a|       95.06792|      42.996983|      27.130707|
|heic0206c|     102.665016|       90.37304|      124.97095|
| ann1217a|      121.25168|      130.32187|       88.58818|
| opo9927d|      139.76248|      89.412224|       86.33246|
| opo9932c|      143.11295|      109.03904|       90.78594|
+---------+---------------+---------------+---------------+



In [14]:
delta_e_assigned.orderBy('color_2_delta_e', ascending=1).show()

+---------+---------------+---------------+---------------+
|    image|color_1_delta_e|color_2_delta_e|color_3_delta_e|
+---------+---------------+---------------+---------------+
| ann1406a|       95.06792|      42.996983|      27.130707|
|potw1422a|        74.3123|      86.438385|     108.851074|
| opo9927d|      139.76248|      89.412224|       86.33246|
|heic0206c|     102.665016|       90.37304|      124.97095|
| ann1208a|       89.45832|       91.29653|      103.51094|
| opo9932c|      143.11295|      109.03904|       90.78594|
|potw1444a|      37.052086|       122.3911|      36.429565|
| opo9929b|       77.51546|      129.31705|       91.21968|
| ann1217a|      121.25168|      130.32187|       88.58818|
+---------+---------------+---------------+---------------+

