In [68]:
#import packages
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd
import glob
import sqlite3
import shutil
import os
import csv
from PIL import Image
from skimage.color import rgb2lab, deltaE_ciede2000
from sklearn_extra.cluster import KMedoids

import warnings
warnings.filterwarnings("ignore")

def copy_and_overwrite(from_path, to_path):
    if os.path.exists(to_path):
        shutil.rmtree(to_path)
    shutil.copytree(from_path, to_path)


# pixels in an image and their distances to eachother in deltaE_ciede2000
class pixel_distances_within_image():
    def __init__(self,pic):
        self.pic=pic
    def smallify_image(self,n):
        imgs = Image.open(self.pic) 
        imgs.thumbnail((n, n))# n=20, m=20
        return imgs.save(self.pic)
    def reshape(self):
        image = cv2.imread(self.pic)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
        image = image.reshape((image.shape[0] * image.shape[1], 3))
        return image
    def transform2lab(self):
        return rgb2lab(self.reshape())
    def distances_between_pixel_df(self):
        dislab=pd.DataFrame()
        image=self.transform2lab()
        for i in range(0,len(image)-1):
            l = [0] * (i+1)
            lo=pd.DataFrame()
            for j in range(i+1,len(image)):
                data=deltaE_ciede2000(image[i],image[j])
                l.append(data)
            lo=lo.append(l).T
            dislab=dislab.append(lo).reset_index(drop=True)
        dislabT=dislab.T.reset_index(drop=True)
        distance_df=dislab.reset_index(drop=True).add(dislabT, fill_value=0).fillna(0)
        return distance_df

#calculate dominant colour clusters with KMedoids
class dominant_colours_via_KMedoids(): 
    def __init__(self, df):
        self.distance_df=df
    def kmedoids_labels(self,n_clusters=5):
        kmedoids = KMedoids(n_clusters,metric='precomputed').fit(self.distance_df)
        labels = kmedoids.predict(self.distance_df)
        #cc=kmedoids.cluster_centers_
        return labels
    def cluster(self, image, n_clusters=5):
        labels=self.kmedoids_labels()
        dist=self.distance_df
        dist.insert(0, 'Cluster Labels', labels)
        centroids=[]
        for n in range(0,n_clusters):
            cluster=dist.loc[dist['Cluster Labels'] == n][dist.index[dist['Cluster Labels'] == n].tolist()]
            summe=cluster.sum(axis=1)
            cluster['sum']=summe
            centroid_row=cluster.loc[cluster['sum']==min(summe)]
            centroid=centroid_row.index[0]
            centroids.append(image[centroid])
        return labels, centroids
    
#Visualisation of dominant colours
class visualise_dominant_colours():
    def __init__(self, labels, centroids):
        self.labels=labels
        self.centroids=centroids
    def centroid_histogram(self):
        numLabels = np.arange(0, len(np.unique(self.labels)) + 1)
        (hist, _) = np.histogram(self.labels, bins = numLabels)
        hist = hist.astype("float")
        hist /= hist.sum()
        return hist
    def plot_colours(self):
        bar = np.zeros((50, 300, 3), dtype = "uint8")
        startX = 0
        for (percent, color) in zip(self.centroid_histogram(), self.centroids):
            endX = startX + (percent * 300)
            cv2.rectangle(bar, (int(startX), 0), (int(endX), 50),
                color.astype("uint8").tolist(), -1)
            startX = endX
        return bar
    def plot_and_save_dominant_colours(self,name):
        bar = self.plot_colours()
        plt.figure()
        plt.axis("off")
        plt.imshow(bar)
        plt.savefig(name,dpi=400,bbox_inches='tight')
        plt.show()
    
    #transform each picture to a 1x100 image of its cluster colours
    def simplify_colours(self):
        bar = np.zeros((1, 100, 3), dtype = "uint8")
        startX = 0
        for (percent, color) in zip(self.centroid_histogram(), self.centroids):
            endX = startX + (percent * 100)
            cv2.rectangle(bar, (int(startX), 0), (int(endX), 1),
                color.astype("uint8").tolist(), -1)
            startX = endX
        return bar
def format_five_clusters(clstr):
    return str(clstr[1][0].tolist())+str(clstr[1][1].tolist())+str(clstr[1][2].tolist())+str(clstr[1][3].tolist())+str(clstr[1][4].tolist())

def write_colour_data_to_database(colour_info):
    conn = sqlite3.connect('C:\\Users\\dherschmann\\Documents\\GitHub\\Instagram-Automation\\database\\instagram.sqlite')
    cur = conn.cursor()
    for line in colour_info:

        name = line[0]
        colours = line[1]

        cur.execute('''INSERT OR IGNORE INTO IMAGE_INFO (IMAGE_NAME, IMAGE_COLOUR) VALUES (?,?)''',
        (name, colours))

    conn.commit()
    
def write_to_file(content, file):    
    with open(file, "w") as the_file:
        csv.register_dialect("custom", delimiter=",", skipinitialspace=True)
        writer = csv.writer(the_file, dialect="custom")
        for tup in content:
            writer.writerow(tup)

def read_from_file(file):            
    with open(file, newline='') as csvfile:
        already_in_db=[]
        csv.register_dialect("custom", delimiter=",", skipinitialspace=True)
        reader = csv.reader(csvfile, dialect='custom')
        for row in reader:
            if row!=[]:
                already_in_db.append(row[0])  
        return already_in_db
            
os.chdir('C:\\Users\\dherschmann\\Documents\\GitHub\\Instagram-Automation')    
    
image_names=glob.glob("colour-to-database/Images/*.jpg")

#try:
#    for image in read_from_file('database/colours.csv'):
#        image_names.remove(image) 
#except:
#    print("Does file not exist or what's happening?")
         
#scale down images too 20x20 pixels
for i in image_names[:10]:
    pixel_distances_within_image(i).smallify_image(20)

list_of_problematic_images=[]
colour_info=[]
for i in image_names[:2]:
    try:
        bsp=pixel_distances_within_image(i)
        dom=bsp.distances_between_pixel_df()
        km=dominant_colours_via_KMedoids(dom)
        clstr=km.cluster(bsp.reshape())
        i=i.replace('colour-to-database/','')
        colours=format_five_clusters(clstr)
        colour_info.append((i, colours))
    except:
        list_of_problematic_images.append(i)
         
         
write_colour_data_to_database(colour_info)
write_to_file(colour_info, "colour-to-database/colours.csv")  
write_to_file(list_of_problematic_images, "colour-to-database/has_issues_colours.csv")  

In [76]:
for image in read_from_file('database/colours.csv'):
        image_names.remove('colour-to-database/'+image) 

In [77]:
image_names

['colour-to-database/Images\\023.JPG',
 'colour-to-database/Images\\028.jpg',
 'colour-to-database/Images\\032.JPG',
 'colour-to-database/Images\\036.JPG',
 'colour-to-database/Images\\10295222_873765872664447_6139990852376371238_o.jpg',
 'colour-to-database/Images\\10608430_843944268979941_2106562515853277705_o.jpg',
 'colour-to-database/Images\\10808073_10152947091749758_938191433_n.jpg',
 'colour-to-database/Images\\10991612_870902366284131_4698480174384260508_o.jpg',
 'colour-to-database/Images\\11080717_898573953516972_7426226399171102221_o.jpg',
 'colour-to-database/Images\\130.JPG',
 'colour-to-database/Images\\134.JPG',
 'colour-to-database/Images\\151.JPG',
 'colour-to-database/Images\\167.JPG',
 'colour-to-database/Images\\168.JPG',
 'colour-to-database/Images\\191662_185181388189569_5408132_o.jpg',
 'colour-to-database/Images\\1973808_898573550183679_2250749955249535106_o.jpg',
 'colour-to-database/Images\\199295_190864547621253_5692184_n.jpg',
 'colour-to-database/Images\\2

In [74]:
read_from_file('database/colours.csv')

['Images\\00000PORTRAIT_00000_BURST20200112193145840.jpg',
 'Images\\00000PORTRAIT_00000_BURST20200123092647375.jpg',
 'Images\\00000PORTRAIT_00000_BURST20200523193045802.jpg']

In [13]:
#scale down images too 20x20 pixels
for i in image_names[:10]:
    pixel_distances_within_image(i).smallify_image(20)
 
   
list_of_problematic_images=[]
colour_info=[]
for i in image_names[:2]:
    try:
        bsp=pixel_distances_within_image(i)
        dom=bsp.distances_between_pixel_df()
        km=dominant_colours_via_KMedoids(dom)
        clstr=km.cluster(bsp.reshape())
        i=i.replace('colour-to-database/','')
        colours=format_five_clusters(clstr)
        colour_info.append((i, colours))
    except:
        list_of_problematic_images.append(i)
    
#image_names=list_of_images   
print("The following images caused troubles and are hence excluded: ", list_of_problematic_images) 

The following images caused troubles and are hence excluded:  ['colour-to-database/Images\\00000PORTRAIT_00000_BURST20200112193145840.jpg', 'colour-to-database/Images\\00000PORTRAIT_00000_BURST20200123092647375.jpg']


In [19]:
colour_info=[]
for i in image_names[:2]:

    bsp=pixel_distances_within_image(i)
    dom=bsp.distances_between_pixel_df()
    km=dominant_colours_via_KMedoids(dom)
    clstr=km.cluster(bsp.reshape())
    i=i.replace('colour-to-database/','')
    colours=format_five_clusters(clstr)
    colour_info.append((i, colours))

In [32]:
colour_info

[('Images\\00000PORTRAIT_00000_BURST20200112193145840.jpg',
  '[113, 63, 88][181, 179, 192][88, 49, 70][56, 45, 53][136, 92, 117]'),
 ('Images\\00000PORTRAIT_00000_BURST20200123092647375.jpg',
  '[147, 139, 102][127, 125, 130][87, 83, 84][193, 191, 194][106, 100, 74]')]

In [55]:
with open('database/colours.csv', newline='') as csvfile:
    already_in_db=[]
    csv.register_dialect("custom", delimiter=",", skipinitialspace=True)
    reader = csv.reader(csvfile, dialect='custom')
    for row in reader:
        if row!=[]:
            already_in_db.append(row[0])

In [58]:
def read_from_file(file):            
    with open(file, newline='') as csvfile:
        already_in_db=[]
        csv.register_dialect("custom", delimiter=",", skipinitialspace=True)
        reader = csv.reader(csvfile, dialect='custom')
        for row in reader:
            if row!=[]:
                already_in_db.append(row[0])  
        return already_in_db

In [61]:
image_names=glob.glob("colour-to-database/Images/*.jpg")

for image in read_from_file('database/colours.csv'):
    image_names.remove("colour-to-database/"+image) 

ValueError: list.remove(x): x not in list

In [62]:
read_from_file('database/colours.csv')

['Image_name',
 'Images\\00000PORTRAIT_00000_BURST20200112193145840.jpg',
 'Images\\00000PORTRAIT_00000_BURST20200123092647375.jpg',
 'Images\\00000PORTRAIT_00000_BURST20200523193045802.jpg',
 'Images\\023.JPG',
 'Images\\028.jpg',
 'Images\\032.JPG',
 'Images\\036.JPG']

In [60]:
image_names=glob.glob("colour-to-database/Images/*.jpg")
image_names

['colour-to-database/Images\\00000PORTRAIT_00000_BURST20200112193145840.jpg',
 'colour-to-database/Images\\00000PORTRAIT_00000_BURST20200123092647375.jpg',
 'colour-to-database/Images\\00000PORTRAIT_00000_BURST20200523193045802.jpg',
 'colour-to-database/Images\\023.JPG',
 'colour-to-database/Images\\028.jpg',
 'colour-to-database/Images\\032.JPG',
 'colour-to-database/Images\\036.JPG',
 'colour-to-database/Images\\10295222_873765872664447_6139990852376371238_o.jpg',
 'colour-to-database/Images\\10608430_843944268979941_2106562515853277705_o.jpg',
 'colour-to-database/Images\\10808073_10152947091749758_938191433_n.jpg',
 'colour-to-database/Images\\10991612_870902366284131_4698480174384260508_o.jpg',
 'colour-to-database/Images\\11080717_898573953516972_7426226399171102221_o.jpg',
 'colour-to-database/Images\\130.JPG',
 'colour-to-database/Images\\134.JPG',
 'colour-to-database/Images\\151.JPG',
 'colour-to-database/Images\\167.JPG',
 'colour-to-database/Images\\168.JPG',
 'colour-to-d

In [31]:
write_colour_data_to_database(colour_info)

In [35]:
with open("colour-to-database/colours.csv", "w") as the_file:
    csv.register_dialect("custom", delimiter=",", skipinitialspace=True)
    writer = csv.writer(the_file, dialect="custom")
    writer.writerow(("Image_name", "Colours"))
    for tup in colour_info:
        writer.writerow(tup)