This notebook shows primary color analysis of color image using K-Means algorithm.
The output are N primary colors and their corresponding percentage.

In [None]:
import matplotlib.pyplot as plt
import cv2
import numpy as np

import webcolors
from sklearn.metrics import mean_squared_error
import pandas as pd
import plotly.express as px
import colorgram
from colormath.color_diff import delta_e_cie2000, delta_e_cmc, delta_e_cie1976
from colormath.color_objects import sRGBColor, LabColor
from colormath.color_conversions import convert_color

from ammico.utils import get_color_table


In [None]:
def rgb2name(c, merge_color=True):
    h_color = "#{:02x}{:02x}{:02x}".format(int(c[0]), int(c[1]), int(c[2]))
    try:
        output_color = webcolors.hex_to_name(h_color, spec="css3")
        output_diff = 0
    except ValueError:
        delta_e_lst = []
        filtered_colors = webcolors.CSS3_NAMES_TO_HEX
        c1 = convert_color(sRGBColor(c[0], c[1], c[2]), LabColor)

        for img_clr, img_hex in filtered_colors.items():
            cur_clr = webcolors.hex_to_rgb(img_hex)

            # calculate color Delta-E
            c2 = convert_color(sRGBColor(cur_clr[0], cur_clr[1], cur_clr[2]), LabColor)

            delta_e = delta_e_cmc(c1, c2)

            delta_e_lst.append(delta_e)

        # find lowest dealta-e
        min_diff = np.argsort(delta_e_lst)[0]

        # if the difference is more than 1, then pick only the first one.
        output_color = (
            str(list(filtered_colors.items())[min_diff][0])
            .lower()
            .replace("grey", "gray")
        )
        output_diff = delta_e_lst[min_diff]

    # match color to reduced list:
    if merge_color:
        for reduced_key, reduced_color_sub_list in get_color_table().items():
            if (
                str(output_color).lower()
                in str(reduced_color_sub_list["ColorName"]).lower()
            ):
                output_color = reduced_key
                break

    return output_color, output_diff

In [None]:
def merge_color_dict(old_color_dict, new_color, merge_color=True):
    old_rgb = old_color_dict["color_mean"]
    new_rgb = new_color.rgb

    old_percent = old_color_dict["percentage"]
    new_percent = new_color.proportion

    merged_rgb = np.average(
        [old_rgb, new_rgb], axis=0, weights=[old_percent, new_percent]
    ).astype(int)
    merged_name, merged_difference = rgb2name(merged_rgb, merge_color=merge_color)

    merged_percent = old_percent + new_percent
    output_color_dict = {
        "color_mean": merged_rgb,
        "hex_mean": webcolors.rgb_to_hex(merged_rgb),
        "label": merged_name + f" {round(merged_percent,2)}",
        "color_name": merged_name,
        "color_name_old": old_color_dict["color_name"],
        "distance_from_rgb_label": merged_difference,
        "percentage": merged_percent,
    }
    return output_color_dict

In [None]:
n_cluster = 20


def analyze_image(image_path, n_extract=10, merge_color=True, input_dict=None):
    colors = colorgram.extract(image_path, n_extract)
    if input_dict is None:
        output_dict = {}
    else:
        output_dict = input_dict

    # average color in each bin and save bin length
    for color in colors:
        if color.proportion > 0.01:
            rgb_name, rgb_difference = rgb2name(color.rgb, merge_color=merge_color)
            label = f"{rgb_name}"

            # if color is already present in dict, merge the new into the old.

            if label in output_dict.keys():
                output_dict[label] = merge_color_dict(output_dict[label], color)

            else:
                output_dict[label] = {
                    "color_mean": color.rgb,
                    "hex_mean": webcolors.rgb_to_hex(color.rgb),
                    "label": label + f" {round(color.proportion,2)}",
                    "color_name": rgb_name,
                    "distance_from_rgb_label": rgb_difference,
                    "percentage": color.proportion,
                }

    df = pd.DataFrame(output_dict)
    df = df.reindex(sorted(df.columns), axis=1)

    return df

In [None]:
def analyze_images(file_list, n_extract=10, merge_color=True):
    output_dict = {}
    for file in file_list:
        output_dict = analyze_image(
            file, n_extract=n_extract, merge_color=merge_color, input_dict=output_dict
        )
    df = pd.DataFrame(output_dict)
    df = df.reindex(sorted(df.columns), axis=1)
    return df

In [None]:
def show_piechart(df, n_max=-1):
    if n_max == -1:
        n_max = len(df.T)

    df = df.T.sort_values(by="percentage", ascending=False).head(n_max)

    color_map = {
        color: hex_mean for color, hex_mean in zip(df["label"], df["hex_mean"])
    }
    fig = px.pie(
        df,
        values="percentage",
        names="label",
        title="Color analysis",
        color="label",
        color_discrete_map=color_map,
        hole=0.3,
    )
    fig.show()


In [None]:
image_path = "../../documents-export-2023-04-27/images-text/104646S_fra.png"


image = cv2.imread(image_path)
# BGR-->RGB cv to matplotlib show
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)


In [None]:
df = analyze_image(image_path, 10)
show_piechart(df)


In [None]:
image_paths = [
    "../../documents-export-2023-04-27/images-text/104646S_fra.png",
    "../../france-in-pictures-beautiful-places-to-photograph-calanques-national-park.jpg",
]

image1 = cv2.imread(image_paths[0])
# BGR-->RGB cv to matplotlib show
image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)

image2 = cv2.imread(image_paths[1])
# BGR-->RGB cv to matplotlib show
image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)

In [None]:
plt.imshow(image1)


In [None]:
plt.imshow(image2)


In [None]:
import glob

file_list = glob.glob("../../documents-export-2023-04-27/images-text/*.png")

df2 = analyze_images(file_list, merge_color=True)
show_piechart(df2)

In [None]:
df2


In [None]:
df2.loc["color_name"]
