In [None]:
# 한글폰트 사용 in colab
%matplotlib inline  

import matplotlib as mpl 
import matplotlib.pyplot as plt 
import matplotlib.font_manager as fm  

!apt-get update -qq
!apt-get install fonts-nanum* -qq

path = '/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf' 
font_name = fm.FontProperties(fname=path, size=10).get_name()
print(font_name)
plt.rc('font', family=font_name)

fm._rebuild()
mpl.rcParams['axes.unicode_minus'] = False

In [2]:

import numpy as np
import pandas as pd
import os, sys

from glob import glob
from PIL import Image
from tqdm import tqdm


import seaborn as sns

import matplotlib.pyplot as plt

from matplotlib import gridspec

import ipywidgets

In [None]:
#구글 드라이브에 연결
from google.colab import drive

drive.mount('/content/gdrive/')

In [None]:
#작업 폴더 이동
os.chdir('/content/gdrive/My Drive/Colab Notebooks/urban-data-mining/09 Clustering/data/')
os.listdir()
# os.getcwd()

In [5]:

#%%

flist = glob('images/*.jpg')

image_list = []
for f in flist:
    img = Image.open(f)
    image_list.append(np.array(img))

len(image_list)

6

In [None]:
def display_image(idx = 0) :
  dpi = 80
  plt.figure(figsize=(image_list[idx].shape[1]/dpi, image_list[idx].shape[0]/dpi))
  plt.imshow(image_list[idx])
  plt.show()


ipywidgets.interact(display_image, idx=(0, len(image_list), 1) )

In [7]:

#%%
from sklearn.cluster import KMeans #kmeans
from scipy.spatial.distance import cdist

image_num = 1

raw_img = image_list[image_num]


#각 픽셀별 RGB 값을 하나의 벡터로 변경
xdata = raw_img.reshape(raw_img.shape[0] * raw_img.shape[1], raw_img.shape[2])

xdata.shape

(515200, 3)

In [None]:

#kmeans 클러스터링 진행
n_clusters= 20
clt = KMeans(n_clusters=n_clusters)
clt.fit(xdata)

#중심점간 거리를 교차 계산
centroid = clt.cluster_centers_.astype(np.int64())
dist = cdist(centroid,centroid)

#KMEANS로 분류된 색깔들 표시하기
centroid = clt.cluster_centers_.astype(np.int64())
colorpallet = centroid.reshape(centroid.shape[0], 1, centroid.shape[1])
plt.figure(figsize=(1,10))
plt.imshow(colorpallet)

In [None]:


#%%
#각 픽셀별로 소속 클러스터로 맵핑한다.
xdata = raw_img.reshape(raw_img.shape[0] * raw_img.shape[1], raw_img.shape[2])
xdata = clt.predict(xdata)
xdata = xdata.reshape(xdata.shape[0], 1)

#이미지 형태로 벡터 변환
xdata = xdata.reshape(raw_img.shape[0], raw_img.shape[1], 1)
xdata.shape

In [None]:

#각 클러스터별 이미지를 출력한다.

def display_cluster_image(cnum = 0) :
  dpi = 80
  pos = np.concatenate([xdata, xdata, xdata], axis = 2)
  tmp = raw_img.copy()
  tmp[np.where(pos != cnum)] = 255

  plt.figure(figsize=(tmp.shape[1]/dpi * 2 + 1, tmp.shape[0]/dpi))
  gs = gridspec.GridSpec(nrows=1, ncols=3, width_ratios=[10,1,10])

  plt.subplot(gs[0])
  plt.imshow(raw_img)

  plt.subplot(gs[1])
  plt.imshow(colorpallet)

  plt.subplot(gs[2])
  plt.imshow(tmp) #결과

  plt.show()


ipywidgets.interact(display_cluster_image, cnum=(0, n_clusters, 1) )

In [None]:
from sklearn.cluster import DBSCAN

def display_dbscan(cnum = 0) : 
  dpi = 80

  #DBSCAN도 X-Y 형태 벡터이므로 각 좌표를 X1, X2로 저장하기 위해 행렬 백터를 만든다
  ny = np.arange(raw_img.shape[0]).reshape(raw_img.shape[0],1) * np.ones(raw_img.shape[1]).reshape(raw_img.shape[1],1).T
  nx = np.ones(raw_img.shape[0]).reshape(raw_img.shape[0],1) * np.arange(raw_img.shape[1]).reshape(raw_img.shape[1],1).T

  #벡터 모양 3차원으로 변경
  ny = ny.reshape(ny.shape[0], ny.shape[1], 1)
  nx = nx.reshape(nx.shape[0], nx.shape[1], 1)


  x1 = ny[np.where(xdata == cnum)] #y좌표가져오기
  x2 = nx[np.where(xdata == cnum)] #x좌표가져오기
  y = xdata[np.where(xdata == cnum)] #y값만들기 실은 필요 없다


  #행렬 합치기 위해 shape 변경
  x1 = x1.reshape(x1.shape[0], 1) 
  x2 = x2.reshape(x2.shape[0], 1)
  y = y.reshape(y.shape[0], 1)

  #딕셔너리에 각 클러스터별로 x1, x2, y 저장
  xdatas = np.concatenate([x1, x2, y], axis = 1)

  model = DBSCAN(eps=5,min_samples=50)
  dbscans = model.fit_predict(xdatas[:,:2])

  xdatas = xdatas[np.where(dbscans >= 0)]
  dbscans = dbscans[np.where(dbscans >= 0)]

  pos = np.concatenate([xdata, xdata, xdata], axis = 2)
  tmp = raw_img.copy()
  tmp[np.where(pos != cnum)] = 255

  gs = gridspec.GridSpec(nrows=1, ncols=2, width_ratios=[1,1])



  plt.figure(figsize=(tmp.shape[1]/dpi * 2, tmp.shape[0]/dpi))
  plt.subplot(gs[0])
  plt.imshow(tmp)

  plt.subplot(gs[1])
  ax = sns.scatterplot(xdatas[:,1], xdatas[:,0], marker='.', size=1, legend=False, hue=["cluster-{}".format(x) for x in dbscans])
  ax.invert_yaxis()
  plt.show() 



ipywidgets.interact(display_dbscan, cnum=(0, n_clusters, 1) )