In [1]:
# import the necessary packages
from sklearn.cluster import KMeans
from collections import Counter
import pandas as pd
import numpy as np
import urllib
import cv2
import time


In [2]:
# METHOD #1: OpenCV, NumPy, and urllib
def url_to_image(url):
    # download the image, convert it to a NumPy array, and then read
    # it into OpenCV format
    resp = urllib.request.urlopen(url)
    image = np.asarray(bytearray(resp.read()), dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)

    # return the image
    return image

In [3]:
def get_dominant_color(image, k=10, image_processing_size = None):
    """
    takes an image as input
    returns the dominant color of the image as a list
    
    dominant color is found by running k means on the 
    pixels & returning the centroid of the largest cluster

    processing time is sped up by working with a smaller image; 
    this resizing can be done with the image_processing_size param 
    which takes a tuple of image dims as input

    >>> get_dominant_color(my_image, k=4, image_processing_size = (25, 25))
    [56.2423442, 34.0834233, 70.1234123]
    """
    #resize image if new dims provided
    if image_processing_size is not None:
        image = cv2.resize(image, image_processing_size, 
                            interpolation = cv2.INTER_AREA)
    
    #reshape the image to be a list of pixels
    image = image.reshape((image.shape[0] * image.shape[1], 3))

    #cluster and assign labels to the pixels 
    clt = KMeans(n_clusters = k)
    labels = clt.fit_predict(image)

    #count labels to find most popular
    label_counts = Counter(labels)

    #subset out most popular centroid
    dominant_color = clt.cluster_centers_[label_counts.most_common(1)[0][0]]

    return list(dominant_color)

In [4]:
img = url_to_image("https://ecs7.tokopedia.net/img/product-1/2019/3/13/252734311/252734311_c3ee4af0-6970-4e4c-b167-1881339113ba_1677_1677.jpg")
b = cv2.resize(img,(100,100))
r,g,b = get_dominant_color(b, k=5, image_processing_size = None)


In [5]:
c = cv2.resize(b,(1000,1000))

In [6]:
df = pd.read_csv("df_merge.csv")

In [8]:
df_head = df.head()

In [58]:
count = 0
time_start = time.time()
for index, row in df.iterrows():
    url = row['image_url']
    img = url_to_image(url)
    img_resized = cv2.resize(img,(100,100))
    r,g,b = get_dominant_color(img_resized, k=5, image_processing_size = None)
    df.loc[index,'r'] = r
    df.loc[index,'g'] = g
    df.loc[index,'b'] = b
    count += 1
    runtime = time.time() - x
    if count == 1:
        print('started at ' + str(runtime))
    if count % 100 == 0:
        print ('Data ke:' + str(count)+ ". runtime " + str(runtime))
    if count > 500:
        break
        
#     row['new'] = "link " + url
#     print(url)

started at 854.6628940105438
Data ke:1000. runtime 1348.257580757141


error: OpenCV(3.4.2) C:\Miniconda3\conda-bld\opencv-suite_1534379934306\work\modules\imgproc\src\resize.cpp:4044: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


In [21]:
x = time.time()

In [56]:
time.time() - x

21.976031064987183

In [17]:
2189180 % 10

0

In [14]:
df_head

Unnamed: 0.1,Unnamed: 0,product_id,product_name,create_time,child_cat_id,cat,product_pic_id,image_url,new
0,0,252734311,Kemeja Tunik Anak cewek,2018-01-18 12:01:16.827911,151,"[(78, Fashion Anak), (82, Pakaian Anak Perempu...",1027145252,https://ecs7.tokopedia.net/img/product-1/2019/...,link https://ecs7.tokopedia.net/img/product-1/...
1,1,252734311,Kemeja Tunik Anak cewek,2018-01-18 12:01:16.827911,151,"[(78, Fashion Anak), (82, Pakaian Anak Perempu...",1027145251,https://ecs7.tokopedia.net/img/product-1/2019/...,link https://ecs7.tokopedia.net/img/product-1/...
2,2,252734311,Kemeja Tunik Anak cewek,2018-01-18 12:01:16.827911,151,"[(78, Fashion Anak), (82, Pakaian Anak Perempu...",537467002,https://ecs7.tokopedia.net/img/product-1/2018/...,link https://ecs7.tokopedia.net/img/product-1/...
3,3,252734311,Kemeja Tunik Anak cewek,2018-01-18 12:01:16.827911,151,"[(78, Fashion Anak), (82, Pakaian Anak Perempu...",537467001,https://ecs7.tokopedia.net/img/product-1/2018/...,link https://ecs7.tokopedia.net/img/product-1/...
4,4,252734311,Kemeja Tunik Anak cewek,2018-01-18 12:01:16.827911,151,"[(78, Fashion Anak), (82, Pakaian Anak Perempu...",537467000,https://ecs7.tokopedia.net/img/product-1/2018/...,link https://ecs7.tokopedia.net/img/product-1/...
