In [1]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import scipy.misc

In [2]:
'''
출처
https://github.com/inyl/my_notebook/blob/master/open_cv/image_color_cluster.ipynb
https://www.pyimagesearch.com/2014/05/26/opencv-python-k-means-color-clustering/
'''

'\n출처\nhttps://github.com/inyl/my_notebook/blob/master/open_cv/image_color_cluster.ipynb\nhttps://www.pyimagesearch.com/2014/05/26/opencv-python-k-means-color-clustering/\n'

In [3]:
def plot_colors(hist, centroids):
    '''
    initialize the bar chart representing the relative frequency of each of the colors
    각 색의 빈도를 나타내는 바 차트를 초기화
    '''
    bar = np.zeros((50, 300, 3), dtype="uint8")
    startX = 0

    # loop over the percentage of each cluster and the color of each cluster
    for (percent, color) in zip(hist, centroids):
        # plot the relative percentage of each cluster
        endX = startX + (percent * 300)
        cv2.rectangle(bar, (int(startX), 0), (int(endX), 50),
                      color.astype("uint8").tolist(), -1)
        startX = endX

    # return the bar chart
    return bar

In [4]:
def centroid_histogram(clt):
    '''
    # grab the number of different clusters and create a histogram
    히스토그램 형식으로 색을 반환
    based on the number of pixels assigned to each cluster
    각 클러스터의 픽셀의 숫자를 기반으로 함
    '''
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins=numLabels)

    # normalize the histogram, such that it sums to one
    hist = hist.astype("float")
    hist /= hist.sum()  # hist = hist/hist.sum()

    # return the histogram
    return hist

In [5]:
def image_color_cluster(image_path, k = 5):
    image = cv2.imread(image_path)
    # image의 shape을 찍어보면, height, width, channel 순으로 나옴
    # channel은 RGB를 말함
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # cv에서는 RGB가 아닌 BGR 순으로 나오기 때문에 순서를 RGB로 전환
    image = image.reshape((image.shape[0] * image.shape[1], 3))
    # shape의 0,1번째 즉, height와 width를 통합시킴
    
    clt = KMeans(n_clusters = k)  # 평균 알고리즘 KMeans
    clt.fit(image)

    hist = centroid_histogram(clt)
    bar = plot_colors(hist, clt.cluster_centers_)
    return bar

In [6]:
def dec_to_hex(color):
    if color < 16:
        return '0' + str(hex(int(color)).split('x')[1])
    else:
        return str(hex(int(color)).split('x')[1])

In [36]:
def read_real_color(filename, color_rank):
    image = cv2.imread(filename, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    image_list = [str(list(image[i][k])) for i in range(len(image)) for k in range(len(image[0]))]
    image_unique = {}
    for d in image_list:
        if d not in image_unique:
            image_unique[d] = 1
        else:
            image_unique[d] += 1

    total_color_lists = sorted(image_unique.keys(), key=lambda x:image_unique[x], reverse=True)
    color_list = total_color_lists[color_rank]
    
    color_R = int(color_list.split('[')[1].split(']')[0].split(', ')[0])
    color_G = int(color_list.split('[')[1].split(']')[0].split(', ')[1])
    color_B = int(color_list.split('[')[1].split(']')[0].split(', ')[2])
    
    color_R = dec_to_hex(color_R)
    color_G = dec_to_hex(color_G)
    color_B = dec_to_hex(color_B)
    
    return str(color_R + color_G + color_B)

In [9]:
df_cafe = pd.read_csv('final_cafe_info_with_path.csv')
# df_cafe.head()

In [10]:
df_cafe = df_cafe.drop('Unnamed: 0', axis=1)
df_cafe.head()

Unnamed: 0,지역,지역구,카페명,주소,위도,경도,파일명
0,서서울,은평,스모어,서울 은평구 연서로29길 8-8,37.618952,126.919697,ws_ep_01.png
1,서서울,은평,필라멘트카페,서울 은평구 통일로 883,37.621753,126.919462,ws_ep_02.png
2,서서울,은평,HUGA,서울 은평구 진관2로 19,37.634753,126.919774,ws_ep_03.png
3,서서울,은평,YM Coffee Project,서울 은평구 연서로29길 21-8,37.619095,126.917542,ws_ep_04.png
4,서서울,은평,카페달력,서울 은평구 연서로18길 28-2,37.61193,126.920534,ws_ep_05.png


In [44]:
for i in range(5):
    color_list = []
    for n in df_cafe.index:
        png = './cafe_color_result/' + df_cafe['파일명'][n]
        color_list.append(read_real_color(png, i))
    col_name = str(i+1) + '번 색'
    df_cafe[col_name] = color_list

In [48]:
df_cafe.head()

Unnamed: 0,지역,지역구,카페명,주소,위도,경도,파일명,1번 색,2번 색,3번 색,4번 색,5번 색
0,서서울,은평,스모어,서울 은평구 연서로29길 8-8,37.618952,126.919697,ws_ep_01.png,dbd6cc,c7b29a,231a14,ab8f73,644f3f
1,서서울,은평,필라멘트카페,서울 은평구 통일로 883,37.621753,126.919462,ws_ep_02.png,201d1e,9d7653,5f4939,cead7d,e7e4cf
2,서서울,은평,HUGA,서울 은평구 진관2로 19,37.634753,126.919774,ws_ep_03.png,36a17c,8ebda8,e5e7df,6f6c49,272b15
3,서서울,은평,YM Coffee Project,서울 은평구 연서로29길 21-8,37.619095,126.917542,ws_ep_04.png,cab9ab,e9dfd8,403126,af8b6d,84634c
4,서서울,은평,카페달력,서울 은평구 연서로18길 28-2,37.61193,126.920534,ws_ep_05.png,e8ddd1,d3c0af,b19c89,5a432c,917457


In [49]:
df_cafe.to_csv('cafe_color_tidy_data.csv')

In [5]:
df = pd.read_csv('final_cafe_color_tidy_data.csv')
df.head()

Unnamed: 0,지역,지역구,카페명,주소,위도,경도,파일명,1번 색,2번 색,3번 색,4번 색,5번 색
0,서서울,은평,스모어,서울 은평구 연서로29길 8-8,37.618952,126.919697,ws_ep_01.png,dbd6cc,c7b29a,231a14,ab8f73,644f3f
1,서서울,은평,필라멘트카페,서울 은평구 통일로 883,37.621753,126.919462,ws_ep_02.png,201d1e,9d7653,5f4939,cead7d,e7e4cf
2,서서울,은평,HUGA,서울 은평구 진관2로 19,37.634753,126.919774,ws_ep_03.png,36a17c,8ebda8,e5e7df,6f6c49,272b15
3,서서울,은평,YM Coffee Project,서울 은평구 연서로29길 21-8,37.619095,126.917542,ws_ep_04.png,cab9ab,e9dfd8,403126,af8b6d,84634c
4,서서울,은평,카페달력,서울 은평구 연서로18길 28-2,37.61193,126.920534,ws_ep_05.png,e8ddd1,d3c0af,b19c89,5a432c,917457


In [12]:
colors = 'red,red_orange,orange,orange_yellow,yellow,yellow_lime,lime,lime_green,green,green_skyblue,skyblue,skyblue_blue,blue,blue_navy,navy,navy_purple,purple,purple_pink,pink,pink_red'.split(',')
len(colors)

20

In [19]:
rgbs = list(['ff','00','00'],['fc','47','00'],['fc','66','00'],['fc','cc','00'],\
['ff','ff','00'],['cc','ff','66'],['7d','cd','00'],['36','b7','00'],\
['00','99','00'],['00','80','80'],['00','66','66'],['00','92','97'],['06','11','f2'],\
['00','3a','9a'],['00','00','7e'],['4d','00','9a'],['80','00','7f'],['77','00','55'],\
['e8','00','74'],['ff','00','66'])
len(rgbs)

20

In [21]:
color_dict = dict()
for color, rgb in zip(colors, rgbs):
    color_dict[color] = rgb
color_dict

{'red': ['ff', '00', '00'],
 'red_orange': ['fc', '47', '00'],
 'orange': ['fc', '66', '00'],
 'orange_yellow': ['fc', 'cc', '00'],
 'yellow': ['ff', 'ff', '00'],
 'yellow_lime': ['cc', 'ff', '66'],
 'lime': ['7d', 'cd', '00'],
 'lime_green': ['36', 'b7', '00'],
 'green': ['00', '99', '00'],
 'green_skyblue': ['00', '80', '80'],
 'skyblue': ['00', '66', '66'],
 'skyblue_blue': ['00', '92', '97'],
 'blue': ['06', '11', 'f2'],
 'blue_navy': ['00', '3a', '9a'],
 'navy': ['00', '00', '7e'],
 'navy_purple': ['4d', '00', '9a'],
 'purple': ['80', '00', '7f'],
 'purple_pink': ['77', '00', '55'],
 'pink': ['e8', '00', '74'],
 'pink_red': ['ff', '00', '66']}

In [29]:
temp = sorted(color_dict.items(), key=lambda x:x[1][0])
temp

[('green', ['00', '99', '00']),
 ('green_skyblue', ['00', '80', '80']),
 ('skyblue', ['00', '66', '66']),
 ('skyblue_blue', ['00', '92', '97']),
 ('blue_navy', ['00', '3a', '9a']),
 ('navy', ['00', '00', '7e']),
 ('blue', ['06', '11', 'f2']),
 ('lime_green', ['36', 'b7', '00']),
 ('navy_purple', ['4d', '00', '9a']),
 ('purple_pink', ['77', '00', '55']),
 ('lime', ['7d', 'cd', '00']),
 ('purple', ['80', '00', '7f']),
 ('yellow_lime', ['cc', 'ff', '66']),
 ('pink', ['e8', '00', '74']),
 ('red_orange', ['fc', '47', '00']),
 ('orange', ['fc', '66', '00']),
 ('orange_yellow', ['fc', 'cc', '00']),
 ('red', ['ff', '00', '00']),
 ('yellow', ['ff', 'ff', '00']),
 ('pink_red', ['ff', '00', '66'])]

In [36]:
result = sorted(temp, key=lambda x:x[1][1])
result

[('navy', ['00', '00', '7e']),
 ('navy_purple', ['4d', '00', '9a']),
 ('purple_pink', ['77', '00', '55']),
 ('purple', ['80', '00', '7f']),
 ('pink', ['e8', '00', '74']),
 ('red', ['ff', '00', '00']),
 ('pink_red', ['ff', '00', '66']),
 ('blue', ['06', '11', 'f2']),
 ('blue_navy', ['00', '3a', '9a']),
 ('red_orange', ['fc', '47', '00']),
 ('skyblue', ['00', '66', '66']),
 ('orange', ['fc', '66', '00']),
 ('green_skyblue', ['00', '80', '80']),
 ('skyblue_blue', ['00', '92', '97']),
 ('green', ['00', '99', '00']),
 ('lime_green', ['36', 'b7', '00']),
 ('orange_yellow', ['fc', 'cc', '00']),
 ('lime', ['7d', 'cd', '00']),
 ('yellow_lime', ['cc', 'ff', '66']),
 ('yellow', ['ff', 'ff', '00'])]

In [37]:
for i in range(len(result)):
    print(result[i][1])

['00', '00', '7e']
['4d', '00', '9a']
['77', '00', '55']
['80', '00', '7f']
['e8', '00', '74']
['ff', '00', '00']
['ff', '00', '66']
['06', '11', 'f2']
['00', '3a', '9a']
['fc', '47', '00']
['00', '66', '66']
['fc', '66', '00']
['00', '80', '80']
['00', '92', '97']
['00', '99', '00']
['36', 'b7', '00']
['fc', 'cc', '00']
['7d', 'cd', '00']
['cc', 'ff', '66']
['ff', 'ff', '00']


In [43]:
r = list()
for rgb in rgbs:
    s = ''
    for i in range(3):
        s += rgb[i]
    r.append(int(f'{s}', 16))
r

[16711680,
 16533248,
 16541184,
 16567296,
 16776960,
 13434726,
 8244480,
 3585792,
 39168,
 32896,
 26214,
 37527,
 397810,
 15002,
 126,
 5046426,
 8388735,
 7798869,
 15204468,
 16711782]

In [44]:
o = list()
for rgb in rgbs:
    s = ''
    for i in range(3):
        s += rgb[i]
    o.append(s)
o

['ff0000',
 'fc4700',
 'fc6600',
 'fccc00',
 'ffff00',
 'ccff66',
 '7dcd00',
 '36b700',
 '009900',
 '008080',
 '006666',
 '009297',
 '0611f2',
 '003a9a',
 '00007e',
 '4d009a',
 '80007f',
 '770055',
 'e80074',
 'ff0066']

In [45]:
colors

['red',
 'red_orange',
 'orange',
 'orange_yellow',
 'yellow',
 'yellow_lime',
 'lime',
 'lime_green',
 'green',
 'green_skyblue',
 'skyblue',
 'skyblue_blue',
 'blue',
 'blue_navy',
 'navy',
 'navy_purple',
 'purple',
 'purple_pink',
 'pink',
 'pink_red']

In [47]:
result = dict()
for color, num in zip(colors, r):
    result[color] = num
result

{'red': 16711680,
 'red_orange': 16533248,
 'orange': 16541184,
 'orange_yellow': 16567296,
 'yellow': 16776960,
 'yellow_lime': 13434726,
 'lime': 8244480,
 'lime_green': 3585792,
 'green': 39168,
 'green_skyblue': 32896,
 'skyblue': 26214,
 'skyblue_blue': 37527,
 'blue': 397810,
 'blue_navy': 15002,
 'navy': 126,
 'navy_purple': 5046426,
 'purple': 8388735,
 'purple_pink': 7798869,
 'pink': 15204468,
 'pink_red': 16711782}

In [49]:
color_ranges = sorted(result.items(), key=lambda x:x[1])
color_ranges

[('navy', 126),
 ('blue_navy', 15002),
 ('skyblue', 26214),
 ('green_skyblue', 32896),
 ('skyblue_blue', 37527),
 ('green', 39168),
 ('blue', 397810),
 ('lime_green', 3585792),
 ('navy_purple', 5046426),
 ('purple_pink', 7798869),
 ('lime', 8244480),
 ('purple', 8388735),
 ('yellow_lime', 13434726),
 ('pink', 15204468),
 ('red_orange', 16533248),
 ('orange', 16541184),
 ('orange_yellow', 16567296),
 ('red', 16711680),
 ('pink_red', 16711782),
 ('yellow', 16776960)]

In [53]:
range_num = [c[1] for c in color_ranges]
range_num.append(int('FFFFFF', 16))
range_num

[126,
 15002,
 26214,
 32896,
 37527,
 39168,
 397810,
 3585792,
 5046426,
 7798869,
 8244480,
 8388735,
 13434726,
 15204468,
 16533248,
 16541184,
 16567296,
 16711680,
 16711782,
 16776960,
 16777215]

In [55]:
len(range_num), len(colors)

(21, 20)

In [60]:
df['1번 색'].apply(lambda x: int(x,16))

0      14407372
1       2104606
2       3580284
3      13285803
4      15261137
5      10653830
6       6638906
7       9531981
8       7821637
9       3284753
10     14737118
11     11642522
12     12696233
13      1117707
14      9802911
15      2170395
16     13618634
17     14671829
18     13746347
19     14802648
20      8878178
21      3287071
22     13414777
23      1840914
24     10385475
25      9206637
26      7687207
27      3153177
28     11907755
29     13615015
         ...   
295     5394768
296     3354410
297     2432789
298     1117197
299     3220753
300     2106922
301     2758151
302     6050372
303     9011571
304     8082746
305     6511440
306    14276307
307     4537910
308     7620904
309     6049089
310     3223321
311     9936541
312    16054007
313     8680025
314     6447703
315     8421221
316     5193785
317     2627603
318    10922402
319    11514029
320     5520689
321    11643296
322     2759438
323    13021080
324     8485235
Name: 1번 색, Length: 325,

In [61]:
pd.cut(df['1번 색'].apply(lambda x: int(x,16)), range_num, labels=colors)

0               blue
1               lime
2               lime
3       skyblue_blue
4          blue_navy
5       skyblue_blue
6              green
7       skyblue_blue
8      green_skyblue
9               lime
10              blue
11      skyblue_blue
12      skyblue_blue
13              lime
14      skyblue_blue
15              lime
16              blue
17              blue
18              blue
19              blue
20      skyblue_blue
21              lime
22      skyblue_blue
23              lime
24      skyblue_blue
25      skyblue_blue
26             green
27              lime
28      skyblue_blue
29              blue
           ...      
295            green
296             lime
297             lime
298             lime
299             lime
300             lime
301             lime
302            green
303     skyblue_blue
304    green_skyblue
305            green
306             blue
307       lime_green
308            green
309            green
310             lime
311     skybl