This code is based on https://github.com/aysebilgegunduz/DominantColor

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import pandas as pd
import os

In [1]:
def find_histogram(clt):
    """
    create a histogram with k clusters
    :param: clt
    :return:hist
    """
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins=numLabels)

    hist = hist.astype("float")
    hist /= hist.sum()

    return hist

def plot_colors2(hist, centroids):
    bar = np.zeros((50, 300, 3), dtype="uint8")
    startX = 0

    for (percent, color) in zip(hist, centroids):
        # plot the relative percentage of each cluster
        endX = startX + (percent * 300)
        cv2.rectangle(bar, (int(startX), 0), (int(endX), 50),
                      color.astype("uint8").tolist(), -1)
        startX = endX

    # return the bar chart
    return bar

In [None]:
directory = r"your_directory"

viz_name = []
rgb = []
frequency = []

for filename in os.listdir(directory):
    
    if filename.endswith(".jpg"):
        file = (os.path.join(directory, filename))
        
        try:
            
            img = cv2.imread(file)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            img = img.reshape((img.shape[0] * img.shape[1],3)) #represent as row*column,channel number
            clt = KMeans(n_clusters=3) #cluster number
            clt.fit(img)

            hist = find_histogram(clt)
            bar = plot_colors2(hist, clt.cluster_centers_)

            # Get unique array rows and frequencies for each colour
            (unique, counts) = np.unique(bar[0], axis=0, return_counts = True)

            #Turn arrays into lists
            unique_list = np.ndarray.tolist(unique)
            frequency_list = np.ndarray.tolist(counts)

            viz_name.append(file)
            rgb.append([unique_list])
            frequency.append(frequency_list)

        except:
            continue
    else:
        continue

In [None]:
df = pd.DataFrame({"Filename": viz_name, "RGB": rgb, "Frequency": frequency})
pd.options.display.max_colwidth = 200
df