## 요청파일 - 군집분석

In [None]:
### 군집분석을 해보자 ### 판다스 책 p331

### 사전 import
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn import preprocessing

### matplotlib 한글 깨지는 것 방지
from matplotlib import font_manager, rc
font_path = "data/malgun.ttf"
font_name = font_manager.FontProperties(fname=font_path).get_name()
plt.rc('font', family=font_name)
# print (plt.rcParams['font.family'] )
matplotlib.font_manager._rebuild()
matplotlib.rcParams['axes.unicode_minus'] = False
######################################################################################################################


#### 데이터 준비
cluster_data = pd.read_excel('./data/pro_data/last.xlsx')
cluster_data.fillna(0, inplace=True)
cluster_data = cluster_data.drop(cluster_data.columns[[0]], axis=1)
cluster_data.set_index(['동'], inplace=True)
cluster_data = cluster_data[['경사도', '자전거도로길이', '총거치대수', '총대여수', '인구', '면적', '접근성', '교통편']] # 의미있는 데이터만 추출
display(cluster_data)

### 정규화
def z_score(x):
    return (x-x.mean())/x.std()
cluster_data = cluster_data.apply(z_score,axis=0)
display(cluster_data.head())


# 모형 준비
x = cluster_data.iloc[:, :]
x = preprocessing.StandardScaler().fit(x).transform(x)
# print(x[:5])


####  모형 학습 및 검증
from sklearn import cluster

# 모형 객체 생성
kmeans = cluster.KMeans(init='k-means++', n_clusters=5, n_init=10)

# 모형 학습
kmeans.fit(x)

# 예측(군집)
cluster_label = kmeans.labels_
# print(cluster_label)

# 예측결과를 데이터베이스에 입력
cluster_data['군집'] = cluster_label
# display(cluster_data.head(5))

# 그래프로 표현 - 시각화
cluster_data.plot(kind='scatter', x='경사도', y='총대여수', c='군집', cmap='Set1', colorbar=False, figsize=(5,5))
cluster_data.plot(kind='scatter', x='자전거도로길이', y='총대여수', c='군집', cmap='Set1', colorbar=False, figsize=(5,5))
cluster_data.plot(kind='scatter', x='총거치대수', y='총대여수', c='군집', cmap='Set1', colorbar=False, figsize=(5,5))
cluster_data.plot(kind='scatter', x='인구', y='총대여수', c='군집', cmap='Set1', colorbar=False, figsize=(5,5))
cluster_data.plot(kind='scatter', x='면적', y='총대여수', c='군집', cmap='Set1', colorbar=False, figsize=(5,5))
cluster_data.plot(kind='scatter', x='접근성', y='총대여수', c='군집', cmap='Set1', colorbar=False, figsize=(5,5))
cluster_data.plot(kind='scatter', x='교통편', y='총대여수', c='군집', cmap='Set1', colorbar=False, figsize=(5,5))