In [3]:
import numpy as np
import cv2
import glob
from sklearn.cluster import KMeans

try:
    from PIL import Image
except ImportError:
    import Image

In [23]:
# For each pixel create a vector containing its distance from ends of Lightness spectrum, 
# Saturation and x- and y-coordinates
def pixelVectors(hls):
    row_idx = 0
    column_idx = 0
    # Create an empty matrix for storing pixel data
    pixel_data = np.zeros([len(hls), len(hls[0]), 3])
    for row in hls:
        for pixel in row:
            # Put the vector for that pixel in the matrix
            # To create the vector, use the saturation, distance from the end of lightness spectrum and 
            # coordinates. pixel[1] contains lighntess, pixel[2] saturation
            pixel_data[row_idx][column_idx] = [pixel[2],column_idx,row_idx]
            column_idx+=1
        column_idx = 0
        row_idx+=1
    # Flatten the matrix with the pixel data into a list of vectors
    pixel_data = pixel_data.reshape(len(pixel_data)*len(pixel_data[0]), len(pixel_data[0][0]))
    return pixel_data

In [24]:
# Normalise the vectors using means and standard deviations
def normalisePixels(pixel_data):
    means = np.mean(pixel_data, axis=0)
    stds = np.std(pixel_data, axis=0)
    normed_pixels = []
    for row in pixel_data:
        normed_pixels.append((row-means)/stds)
    return normed_pixels,means,stds

In [25]:
# The cluster with the logo is the one with higher Saturation and distance from the ends of the lightness spectrum
# this chooses colourful regions over black and white
def findLogoCluster(cluster_centers):
    if (cluster_centers[0][0]) > (cluster_centers[1][0]):
        logo_cluster = 0
    else:
        logo_cluster = 1
    return logo_cluster

In [26]:
# With red points mark on the original image what points are in the cluster identified as the logo
neon_green = [57, 255, 20]
def showLogoCluster(normed_pixels, kmeans, means, stds, img):
    for point in normed_pixels:
        # Convert to the original points using the means and standard deviations
        if kmeans.predict([point]) == logo_cluster:
            if point[1]*stds[1]+means[1] > len(img)-2:
                xcoord = len(img)-1
            else:
                xcoord = point[1]*stds[1]+means[1]
            if point[2]*stds[2]+means[2] > len(img[0])-2:
                ycoord = len(img[0])-1
            else:
                ycoord = point[2]*stds[2]+means[2]
            img[int(round(ycoord)), int(round(xcoord))] = neon_green
    cv2.imshow('img',img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()    

In [35]:
files_to_test = ['51sm64mnQcL._SX342_.jpg','100082743.png','290066548.png','advance-payment-against-proforma-invoice-non-b-nw.jpg','argentina-certificate-of-origin.jpg','ARITACER1.jpg','b3bab39a63b38db79cd20f1422a5a06d.jpg','Certificate_of_origin_example_LRG.jpg','commercial-invoice (1).jpg','commercial-invoice.jpg','download.PNG','images (1).jpg','images (1).PNG','images (2).PNG','images (3).PNG']
files_to_retest = ['advance-payment-against-proforma-invoice-non-b-nw.jpg','images (3).PNG']
for filename in files_to_retest:
    img = cv2.imread(filename)
    hsl = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
    pixel_data = pixelVectors(hsl)
    normed_pixels, means, stds = normalisePixels(pixel_data)
    # Run K-means clustering on the normalised vectors
    # Assume 2 clusters - one for logo and one for the rest of the document
    kmeans = KMeans(n_clusters=2, random_state=0).fit(normed_pixels)
    cluster_centers = kmeans.cluster_centers_
    print(cluster_centers*stds+means)
    logo_cluster = findLogoCluster(cluster_centers)
    showLogoCluster(normed_pixels, kmeans, means, stds, img)

[[243.3096343  264.38606455 277.50394792]
 [  2.03236241 315.71117439 281.85717823]]


error: OpenCV(4.0.0) C:\projects\opencv-python\opencv\modules\imgproc\src\color.cpp:181: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [13]:
# Read all images into a list
image_list = []
for ext in ["jpg","gif","png"]:
    for filename in glob.glob('C:/users/szyma/Documents/Data Science projects/Logos/logo_images/*.%s' % ext) : #assuming png
        img = cv2.imread(filename)
        image_list.append(img)