In [35]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np
import cv2
from PIL import Image
import glob
import pandas as pd
from collections import Counter
from skimage.color import rgb2lab, deltaE_cie76
from random import shuffle

%matplotlib inline

Training data is prepared from kaggle's image categorization challenge. The original dataset has name "256_ObjectCategories" on kaggle.
Here data preparation is being carried out on 10,000 images because of local system's hardware limit. The data has been prepared into two categories to classify "color" and "grey" images. 

In [60]:
image_list = []
i=0
for filename in glob.glob('256_ObjectCategories/*/*.jpg'): # having jpg
    dict1 = {}
    if i < 5000:
        img =cv2.imread(filename)
        im =cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        img2= np.zeros_like(img)
        img2[:,:,0] = im
        img2[:,:,1] = im
        img2[:,:,2] = im
        dict1["image"] = img2
        dict1["label"] = 0
        if i < 4000:
            cv2.imwrite('training_data/gray/{}.jpg'.format(filename.split('\\')[2]),img2)
        else:
            cv2.imwrite('testing_data/gray/{}.jpg'.format(filename.split('\\')[2]),img2)        
    else:
        im=cv2.imread(filename)
        dict1["image"] = im
        dict1["label"] = 1
        if i < 9000:
            cv2.imwrite('training_data/color/{}.jpg'.format(filename.split('\\')[2]),im)
        else:
            cv2.imwrite('testing_data/color/{}.jpg'.format(filename.split('\\')[2]),im)   
    image_list.append(dict1)
    i+=1
    if i==10000:
        break

Kmean_cluster is being used to identify if the colored image is correctly labeled as color or not, similarly for grey scale images.

In [20]:
def kmean_cluster(dict1):
    image = dict1["image"]
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#    plt.imshow(image).figure(figsize = (8, 6))

    def RGB2HEX(color):
        return "#{:02x}{:02x}{:02x}".format(int(color[0]), int(color[1]), int(color[2]))

    modified_image = cv2.resize(image, (600, 400), interpolation = cv2.INTER_AREA)
#    if dict1["label"] == 1:
    modified_image = modified_image.reshape(modified_image.shape[0]*modified_image.shape[1], 3)

    clf = KMeans(n_clusters = 4)
    labels = clf.fit_predict(modified_image)

    counts = Counter(labels)

    center_colors = clf.cluster_centers_
# We get ordered colors by iterating through the keys
    ordered_colors = [center_colors[i]/255 for i in counts.keys()]
    hex_colors = [RGB2HEX(ordered_colors[i]*255) for i in counts.keys()]
    rgb_colors = [ordered_colors[i]*255 for i in counts.keys()]
    for i in rgb_colors:
        if i[0]==i[1] and i[1]==i[2] and dict1["label"] == 0:
            continue
        if i[0]!=i[1] or i[1]!=i[2] and dict1["label"] == 1:
            continue 
        if i[0]==i[1] and i[1]==i[2] and dict1["label"] == 1:
            print('error:  ',plt.imshow(image).figure(figsize = (8, 6)))
            break
        if i[0]!=i[1] or i[1]!=i[2] and dict1["label"] == 0:
            print('error:  ',plt.imshow(image).figure(figsize = (8, 6)))
            break

In [None]:

for dict11 in image_list:
    kmean_cluster(dict11)