In [12]:
import pandas as pd
import psycopg2
import numpy as np
from tqdm import tqdm
from sklearn.cluster import KMeans
import json

In [2]:
import matplotlib.pyplot as plt

In [13]:
def load_db():
  with open("/content/drive/MyDrive/yeoreodigm/data_files/db_info.json") as json_file:
    DB_INFO = json.load(json_file)
  
  endpoint = DB_INFO["ENDPOINT"]
  dbname = DB_INFO["DB_NAME"]
  user = DB_INFO["USER_ID"]
  password = DB_INFO["PASSWORD"]
  db = psycopg2.connect(host=endpoint,dbname=dbname,user=user,password=password)
  return db

In [14]:
db = load_db()

In [17]:
db.rollback()

##db에서 여행지 목록 불러오기

In [26]:
cursor = db.cursor()
sql = "SELECT place_id, latitude,longitude FROM places"
cursor.execute(sql)
result = cursor.fetchall()

### 여행지 목록 df로 만들기

In [27]:
places = pd.DataFrame(result)
places.columns = ['id','latitude','longitude']
places = places.iloc[:11]

In [29]:
places

Unnamed: 0,id,latitude,longitude
0,300,33.263477,126.3323652
1,65,33.2077573,126.2908794
2,66,33.2906924,126.322169
3,67,33.3226049,126.2632391
4,68,33.294194,126.1630099
5,69,33.4912863,126.5317364
6,929,33.5075,126.9549
7,70,33.2727117,126.6751305
8,71,33.3253181,126.2548493
9,72,33.5216453,126.8624603


##클러스터링 진행 및 결과 확인

In [5]:
sample = np.array([[1, 2], [1, 4], [1, 0],[10, 2], [10, 4], [10, 0]])

In [8]:
kmeans =  KMeans(n_clusters=3,random_state=0).fit(sample)

In [9]:
kmeans.labels_

array([1, 1, 1, 0, 0, 2], dtype=int32)

In [30]:
kmeans =  KMeans(n_clusters=3,random_state=0)
kmeans = kmeans.fit(places[['latitude','longitude']])
places['cluster'] = kmeans.labels_

In [31]:
places

Unnamed: 0,id,latitude,longitude,cluster
0,300,33.263477,126.3323652,1
1,65,33.2077573,126.2908794,1
2,66,33.2906924,126.322169,1
3,67,33.3226049,126.2632391,1
4,68,33.294194,126.1630099,1
5,69,33.4912863,126.5317364,0
6,929,33.5075,126.9549,2
7,70,33.2727117,126.6751305,0
8,71,33.3253181,126.2548493,1
9,72,33.5216453,126.8624603,2


In [33]:
a = [1,2,3,4]
str(tuple(a))


'(1, 2, 3, 4)'

##모듈화

In [41]:
def load_places_location(place_list):
  db = load_db()
  cursor = db.cursor()
  place_list = str(tuple(place_list))
  sql = f"select place_id, latitude, longitude from places where place_id in {place_list}"
  cursor.execute(sql)
  result = cursor.fetchall()
  result = pd.DataFrame(result)
  result.columns = ['id','latitude','longitude']
  return result

In [53]:
def re_group(place_df,day):
  result = [ [] for _ in range(day) ]
  kmeans = KMeans(n_clusters=day,random_state=0)
  kmeans = kmeans.fit(place_df[['latitude','longitude']])
  place_df['cluster'] = kmeans.labels_
  for i in place_df.values:
    id,latitude,longitude,group = i
    result[group].append(id)
  return result

In [43]:
sample_places = load_places_location([1,2,3,4,5,6,7,8,9,10])

In [48]:
sample_places

Unnamed: 0,id,latitude,longitude
0,1,33.4077504,126.6424787
1,2,33.5194929,126.9510302
2,3,33.4621574,126.9363164
3,4,33.4917178,126.8112799
4,5,33.2447169,126.5595512
5,6,33.5283774,126.7716157
6,7,33.3673209,126.357007
7,8,33.393748,126.2394319
8,9,33.4302205,126.9280527
9,10,33.2901402,126.3683652


In [54]:
re_group(sample_places,3)

[[2, 3, 4, 6, 9], [7, 8, 10], [1, 5]]

In [None]:
[2, 0, 0, 0, 2, 0, 1, 1, 0, 1

#pandas 안쓰고 만들기

In [70]:
def load_places_location(place_list):
  db = load_db()
  cursor = db.cursor()
  place_list = str(tuple(place_list))
  sql = f"select place_id, latitude, longitude from places where place_id in {place_list}"
  cursor.execute(sql)
  result = cursor.fetchall()
  result = list(map(list,result))
  return result

In [71]:
import copy

In [None]:
def optimize_course(place_info,day):
  save_id = []  
  result = [[] for _ in range(day)]
  for i in place_info:
    save_id.append(i[0])
  #id는 클러스터링에 무관하게 만들기
  for i in place_info:
    i[0] = 0
    
  kmeans = KMeans(n_clusters=day,random_state=0)
  kmeans = kmeans.fit(place_info)

  for idx, label in enumerate(kmeans.labels_):
    now = save_id[idx]
    result[label].append(now)
  
  return result

In [85]:
see = load_places_location([1,2,3,4,5,6,7,8,9,10])
see

[[1, Decimal('33.4077504'), Decimal('126.6424787')],
 [2, Decimal('33.5194929'), Decimal('126.9510302')],
 [3, Decimal('33.4621574'), Decimal('126.9363164')],
 [4, Decimal('33.4917178'), Decimal('126.8112799')],
 [5, Decimal('33.2447169'), Decimal('126.5595512')],
 [6, Decimal('33.5283774'), Decimal('126.7716157')],
 [7, Decimal('33.3673209'), Decimal('126.3570070')],
 [8, Decimal('33.3937480'), Decimal('126.2394319')],
 [9, Decimal('33.4302205'), Decimal('126.9280527')],
 [10, Decimal('33.2901402'), Decimal('126.3683652')]]

In [65]:
see = list(map(list,see))
see

[[1, Decimal('33.4077504'), Decimal('126.6424787')],
 [2, Decimal('33.5194929'), Decimal('126.9510302')],
 [3, Decimal('33.4621574'), Decimal('126.9363164')],
 [4, Decimal('33.4917178'), Decimal('126.8112799')],
 [5, Decimal('33.2447169'), Decimal('126.5595512')],
 [6, Decimal('33.5283774'), Decimal('126.7716157')],
 [7, Decimal('33.3673209'), Decimal('126.3570070')],
 [8, Decimal('33.3937480'), Decimal('126.2394319')],
 [9, Decimal('33.4302205'), Decimal('126.9280527')],
 [10, Decimal('33.2901402'), Decimal('126.3683652')]]

In [66]:
for i in see:
  i[0] = i[0]/1000000

In [67]:
see

[[1e-06, Decimal('33.4077504'), Decimal('126.6424787')],
 [2e-06, Decimal('33.5194929'), Decimal('126.9510302')],
 [3e-06, Decimal('33.4621574'), Decimal('126.9363164')],
 [4e-06, Decimal('33.4917178'), Decimal('126.8112799')],
 [5e-06, Decimal('33.2447169'), Decimal('126.5595512')],
 [6e-06, Decimal('33.5283774'), Decimal('126.7716157')],
 [7e-06, Decimal('33.3673209'), Decimal('126.3570070')],
 [8e-06, Decimal('33.3937480'), Decimal('126.2394319')],
 [9e-06, Decimal('33.4302205'), Decimal('126.9280527')],
 [1e-05, Decimal('33.2901402'), Decimal('126.3683652')]]

In [None]:
see

In [68]:
kmeans = KMeans(n_clusters=3,random_state=0)
kmeans = kmeans.fit(see)

In [69]:
kmeans.labels_

array([2, 0, 0, 0, 2, 0, 1, 1, 0, 1], dtype=int32)

In [84]:
see

[[0, Decimal('33.4077504'), Decimal('126.6424787')],
 [0, Decimal('33.5194929'), Decimal('126.9510302')],
 [0, Decimal('33.4621574'), Decimal('126.9363164')],
 [0, Decimal('33.4917178'), Decimal('126.8112799')],
 [0, Decimal('33.2447169'), Decimal('126.5595512')],
 [0, Decimal('33.5283774'), Decimal('126.7716157')],
 [0, Decimal('33.3673209'), Decimal('126.3570070')],
 [0, Decimal('33.3937480'), Decimal('126.2394319')],
 [0, Decimal('33.4302205'), Decimal('126.9280527')],
 [0, Decimal('33.2901402'), Decimal('126.3683652')]]

In [86]:
save_id = []

place_info = see.copy()
day = 3
result = [[] for _ in range(day)]
for i in place_info:
  save_id.append(i[0])
#id는 클러스터링에 무관하게 만들기
for i in place_info:
  i[0] = 0
  
result = [ [] for _ in range(day) ]
kmeans = KMeans(n_clusters=day,random_state=0)
kmeans = kmeans.fit(place_info)
#kmeans.labels_ = [2, 0, 0, 0, 2, 0, 1, 1, 0, 1]
for idx, label in enumerate(kmeans.labels_):
  now = save_id[idx]
  result[label].append(now)

In [88]:
result

[[2, 3, 4, 6, 9], [7, 8, 10], [1, 5]]

In [None]:
[[2, 3, 4, 6, 9], [7, 8, 10], [1, 5]]

In [89]:
def load_places_location(db, place_list):
  cursor = db.cursor()
  place_list = str(tuple(place_list))
  sql = f"select place_id, latitude, longitude from places where place_id in {place_list}"
  cursor.execute(sql)
  result = cursor.fetchall()
  result = list(map(list,result))
  return result

In [90]:
d = load_places_location(db,[1,2,3,4,5,6,7,8,9,10])
d

[[1, Decimal('33.4077504'), Decimal('126.6424787')],
 [2, Decimal('33.5194929'), Decimal('126.9510302')],
 [3, Decimal('33.4621574'), Decimal('126.9363164')],
 [4, Decimal('33.4917178'), Decimal('126.8112799')],
 [5, Decimal('33.2447169'), Decimal('126.5595512')],
 [6, Decimal('33.5283774'), Decimal('126.7716157')],
 [7, Decimal('33.3673209'), Decimal('126.3570070')],
 [8, Decimal('33.3937480'), Decimal('126.2394319')],
 [9, Decimal('33.4302205'), Decimal('126.9280527')],
 [10, Decimal('33.2901402'), Decimal('126.3683652')]]