In [None]:
!pip install geopandas

In [None]:
# 한글폰트 사용 in colab
%matplotlib inline  

import matplotlib as mpl 
import matplotlib.pyplot as plt 
import matplotlib.font_manager as fm  

!apt-get update -qq
!apt-get install fonts-nanum* -qq

path = '/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf' 
font_name = fm.FontProperties(fname=path, size=10).get_name()
print(font_name)
plt.rc('font', family=font_name)

fm._rebuild()
mpl.rcParams['axes.unicode_minus'] = False

In [None]:

import pandas as pd
import geopandas as gpd
import numpy as np

import os, re, sys
from glob import glob
from tqdm import tqdm
import time

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [None]:
#구글 드라이브에 연결
from google.colab import drive

drive.mount('/content/gdrive/')

In [None]:
#작업 폴더 이동
os.chdir('/content/gdrive/My Drive/Colab Notebooks/urban-data-mining/09 Clustering/data/')
os.listdir()
# os.getcwd()

In [None]:
g_data = gpd.read_file('AL_11710_D194_20210823.shp', dtype='str', encoding='cp949')
cols = ['code','고유번호','법정동코드','법정동명','대장구분코드','대장구분명','지번','토지일련번호','기준연도','기준월', 
        '지목코드','지목명','토지면적','용도지역코드1','용도지역명1','용도지역코드2','용도지역명2','토지이용상황코드','토지이용상황', 
        '지형높이코드','지형높이','지형형상코드','지형형상','도로접면코드','도로접면','공시지가','데이터기준일자', 'geometry']
g_data.columns=cols #컬럼명 지정
g_data = g_data.set_crs('EPSG:5174') #좌표계 지정

#토지이용상활중 소형 공동주택만 남긴다.
print(g_data['토지이용상황'].value_counts())
g_data = g_data[g_data['토지이용상황'].isin(['다세대','연립'])]


In [None]:

#송파구의 2개 동만 남긴다.
print(g_data['법정동명'].value_counts())


In [None]:
g_data = g_data[g_data['법정동명'].isin(['서울특별시 송파구 방이동', '서울특별시 송파구 송파동'])]
g_data.plot('토지이용상황', figsize=(15,15), legend=True)


In [None]:

#폴리곤 쉐이프를 point로 변경한다.
g_data.geometry = g_data.geometry.centroid
g_data.plot('토지이용상황', figsize=(15,15), legend=True, s=5)

In [None]:
  
#%%
#x, y 좌표를 numpy로 변경
xpos = np.array(g_data.geometry.x)
ypos = np.array(g_data.geometry.y)

xpos = xpos.reshape([xpos.shape[0], 1])
ypos = ypos.reshape([ypos.shape[0], 1])

X = np.concatenate([xpos, ypos], axis=1)

In [None]:

#%%
def display_cluster(X, y, centroid=None):
    y = y.reshape([y.shape[0], 1])
    g = np.concatenate([X, y], axis=1)
    
    g = gpd.GeoDataFrame(g[:,2], geometry=gpd.points_from_xy(g[:,0], g[:,1]))
    g.columns = ['c', 'geometry']
    g = g.set_crs('EPSG:5174')
    
    if centroid is not None :
        c = gpd.GeoDataFrame(centroid, geometry=gpd.points_from_xy(centroid[:,0], centroid[:,1]))
        c = c.set_crs('EPSG:5174')
        
    ax = g[g['c']>=0].plot('c', figsize=(15,15), s=5, cmap='Accent')
    ax = g[g['c']<0].plot(markersize=1, ax=ax, color='black', marker='x')
    if centroid is not None :
        c.plot(ax=ax, markersize=50, color='red')
    
    

In [None]:

#%%
from sklearn.cluster import KMeans #kmeans
from scipy.spatial.distance import cdist


#kmeans 클러스터링 진행
n_clusters= 5
clt = KMeans(n_clusters=n_clusters)
clt.fit(X)


KMeans(n_clusters=5)

In [None]:

#중심점간 거리를 교차 계산
centroid = clt.cluster_centers_.astype(np.int64())
dist = cdist(centroid,centroid)



#KMEANS로 분류된 색깔들 표시하기
centroid = clt.cluster_centers_.astype(np.int64())


#각 픽셀별로 소속 클러스터로 맵핑한다.
y = clt.predict(X)

display_cluster(X, y, centroid)


In [None]:

#%%
from sklearn.cluster import DBSCAN


model = DBSCAN(eps=50,min_samples=10)
y = model.fit_predict(X)


display_cluster(X, y)
#%%