##필요한 라이브러리 및 함수 로드

In [2]:
import json
import pandas as pd
import psycopg2
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

In [3]:
def load_db():
  with open("/content/drive/MyDrive/yeoreodigm/data_files/db_info.json") as json_file:
    DB_INFO = json.load(json_file)
  
  endpoint = DB_INFO["ENDPOINT"]
  dbname = DB_INFO["DB_NAME"]
  user = DB_INFO["USER_ID"]
  password = DB_INFO["PASSWORD"]
  db = psycopg2.connect(host=endpoint,dbname=dbname,user=user,password=password)
  return db

In [13]:
def load_entire_place(db):
  cursor= db.cursor()
  df = pd.DataFrame()
  
  load_place_sql= (f"SELECT place_id, region2 FROM places_analysis")
  
  cursor.execute(load_place_sql)
  result = cursor.fetchall()
  df = pd.DataFrame(result)
  df.columns = ['id','region2']

  return df

In [10]:
def load_entire_course(db):
  cursor= db.cursor()
  df = pd.DataFrame()
  
  load_course_sql= (f"SELECT id, places FROM course_ ")

  cursor.execute(load_course_sql)
  result = cursor.fetchall()
  df = pd.DataFrame(result)
  df.columns = ['id','places']

  return df

##location 정의 및 적용

In [5]:
east = ['구좌','성산','표선','우도']
west = ['안덕','대정','한경','한림','추자도','가파도','마라도','비양도','차귀도']
north = ['제주시내','애월','조천']
south = ['서귀포시내','남원','중문']

In [6]:
db = load_db()
course = load_entire_course(db)

In [7]:
course.head(3)

Unnamed: 0,id,places
0,1,"[602, 10, 58, 29]"
1,2,"[19, 37, 10, 602, 60, 58, 29]"
2,3,"[19, 602, 10, 58, 29]"


In [14]:
place = load_entire_place(db)

In [15]:
place.head(4)

Unnamed: 0,id,region2
0,204,추자도
1,542,추자도
2,591,추자도
3,592,추자도


In [16]:
sample_course = [19, 37, 10, 602, 60, 58, 29]

In [20]:
location_template = [east,west,south,north]

In [22]:
location_template

[['구좌', '성산', '표선', '우도'],
 ['안덕', '대정', '한경', '한림', '추자도', '가파도', '마라도', '비양도', '차귀도'],
 ['서귀포시내', '남원', '중문'],
 ['제주시내', '애월', '조천']]

In [39]:
for crs in course['places']:
  print(crs)
  break

[602, 10, 58, 29]


##여행지 위치정보 로드

In [40]:
entire_direction_point = []
for crs in tqdm(course['places']):
  direction_point = np.zeros(4)
  for location in crs:
    region = place.loc[place['id']==location,'region2'].values[0]
    #region_list.append(region)
    for i in range(4):
      if region in location_template[i]:
        direction_point[i] += 1
  entire_direction_point.append(direction_point)  

100%|██████████| 3811/3811 [00:12<00:00, 299.93it/s]


In [62]:
direction_df = pd.DataFrame(entire_direction_point)
direction_df.columns = ['east','west','south','north']

##코스에 위치정보 반영

In [73]:
tmp_np = np.array([1,4,2,0])
max_idx = tmp_np.argmax()
percentage = tmp_np[max_idx]/tmp_np.sum()
tmptest = f'{percentage : 0.2f}'
print(tmptest)
print(type(tmptest))

 0.57
<class 'str'>


In [87]:
save_max_location = []
for crs in entire_direction_point:
  max_idx = crs.argmax()
  max_ratio = crs[max_idx] / crs.sum()
  if max_ratio > 0.5:
    if max_idx == 0:
      save_max_location.append("East")
    elif max_idx==1:
      save_max_location.append("West")
    elif max_idx==2:
      save_max_location.append("South")
    else:
      save_max_location.append("North")
  else:
    save_max_location.append("Mixed")
  #max_ratio = f"{max_ratio:.2f}"
  #print(crs, max_ratio,max_idx,save_max_location[cnt])

In [90]:
save_max_location[:20]

['Mixed',
 'Mixed',
 'Mixed',
 'North',
 'Mixed',
 'Mixed',
 'Mixed',
 'Mixed',
 'West',
 'East',
 'West',
 'East',
 'Mixed',
 'Mixed',
 'Mixed',
 'Mixed',
 'Mixed',
 'North',
 'Mixed',
 'Mixed']

In [92]:
course['main_location'] = save_max_location

In [93]:
course

Unnamed: 0,id,places,diriection_point,main_location
0,1,"[602, 10, 58, 29]","[2.0, 1.0, 0.0, 1.0]",Mixed
1,2,"[19, 37, 10, 602, 60, 58, 29]","[2.0, 2.0, 0.0, 3.0]",Mixed
2,3,"[19, 602, 10, 58, 29]","[2.0, 1.0, 0.0, 2.0]",Mixed
3,4,"[37, 202, 58]","[1.0, 0.0, 0.0, 2.0]",North
4,5,"[37, 60, 202, 58, 29]","[2.0, 1.0, 0.0, 2.0]",Mixed
...,...,...,...,...
3806,3807,"[59, 8, 5, 3, 439, 6, 22, 10, 24, 15, 9]","[4.0, 4.0, 2.0, 1.0]",Mixed
3807,3808,"[37, 144, 8, 78, 456, 66, 62, 49, 162, 5, 88, ...","[5.0, 3.0, 6.0, 4.0]",Mixed
3808,3809,"[18, 15, 87, 29, 9, 5, 179, 112, 61]","[2.0, 1.0, 5.0, 1.0]",South
3809,3810,"[22, 62, 5, 75, 284, 142, 172]","[0.0, 3.0, 2.0, 2.0]",Mixed


In [94]:
direction_df

Unnamed: 0,east,west,south,north
0,2.0,1.0,0.0,1.0
1,2.0,2.0,0.0,3.0
2,2.0,1.0,0.0,2.0
3,1.0,0.0,0.0,2.0
4,2.0,1.0,0.0,2.0
...,...,...,...,...
3806,4.0,4.0,2.0,1.0
3807,5.0,3.0,6.0,4.0
3808,2.0,1.0,5.0,1.0
3809,0.0,3.0,2.0,2.0


In [95]:
joined_course = course.join(direction_df)
joined_course

Unnamed: 0,id,places,diriection_point,main_location,east,west,south,north
0,1,"[602, 10, 58, 29]","[2.0, 1.0, 0.0, 1.0]",Mixed,2.0,1.0,0.0,1.0
1,2,"[19, 37, 10, 602, 60, 58, 29]","[2.0, 2.0, 0.0, 3.0]",Mixed,2.0,2.0,0.0,3.0
2,3,"[19, 602, 10, 58, 29]","[2.0, 1.0, 0.0, 2.0]",Mixed,2.0,1.0,0.0,2.0
3,4,"[37, 202, 58]","[1.0, 0.0, 0.0, 2.0]",North,1.0,0.0,0.0,2.0
4,5,"[37, 60, 202, 58, 29]","[2.0, 1.0, 0.0, 2.0]",Mixed,2.0,1.0,0.0,2.0
...,...,...,...,...,...,...,...,...
3806,3807,"[59, 8, 5, 3, 439, 6, 22, 10, 24, 15, 9]","[4.0, 4.0, 2.0, 1.0]",Mixed,4.0,4.0,2.0,1.0
3807,3808,"[37, 144, 8, 78, 456, 66, 62, 49, 162, 5, 88, ...","[5.0, 3.0, 6.0, 4.0]",Mixed,5.0,3.0,6.0,4.0
3808,3809,"[18, 15, 87, 29, 9, 5, 179, 112, 61]","[2.0, 1.0, 5.0, 1.0]",South,2.0,1.0,5.0,1.0
3809,3810,"[22, 62, 5, 75, 284, 142, 172]","[0.0, 3.0, 2.0, 2.0]",Mixed,0.0,3.0,2.0,2.0


###최종 결과본

In [97]:
joined_course[['id','places','east','west','south','north','main_location']]

Unnamed: 0,id,places,east,west,south,north,main_location
0,1,"[602, 10, 58, 29]",2.0,1.0,0.0,1.0,Mixed
1,2,"[19, 37, 10, 602, 60, 58, 29]",2.0,2.0,0.0,3.0,Mixed
2,3,"[19, 602, 10, 58, 29]",2.0,1.0,0.0,2.0,Mixed
3,4,"[37, 202, 58]",1.0,0.0,0.0,2.0,North
4,5,"[37, 60, 202, 58, 29]",2.0,1.0,0.0,2.0,Mixed
...,...,...,...,...,...,...,...
3806,3807,"[59, 8, 5, 3, 439, 6, 22, 10, 24, 15, 9]",4.0,4.0,2.0,1.0,Mixed
3807,3808,"[37, 144, 8, 78, 456, 66, 62, 49, 162, 5, 88, ...",5.0,3.0,6.0,4.0,Mixed
3808,3809,"[18, 15, 87, 29, 9, 5, 179, 112, 61]",2.0,1.0,5.0,1.0,South
3809,3810,"[22, 62, 5, 75, 284, 142, 172]",0.0,3.0,2.0,2.0,Mixed


##sample 뽑아서 확인 - id : 115번, 봄을 찾아 떠나는 제주도 동쪽여행 3박 4일 - 테스트

In [52]:
tttt = np.zeros(4)
for i in [8,13,4,26,145,3,3,52]:
  print(place.loc[place['id']==i,'region2'].values[0])
  for j in range(4):
    if place.loc[place['id']==i,'region2'].values[0] in location_template[j]:
      tttt[j] += 1

한림
구좌
구좌
구좌
성산
성산
성산
성산


In [55]:
tttt

array([7., 1., 0., 0.])

##추천 메소드에 인자로 받기

In [98]:
str1 = "North,East"
str1.split(",")

['North', 'East']

In [None]:
direction_list = ['North','East']
num_of_direction = len(direction_list) 
#기존 sql where 절 내에 location_sql 정보 추가하자


#case 1 : 모든 지역 선택했거나 아무지역도 선택안하면 -> 전지역 
if num_of_direction == 4 or num_of_direction == 0:
  location_sql = 'and 1=1'
#case 2 : 한 지역만 선택한 경우 
elif num_of_direction == 1:
  direction = direction_list[0]
  location_sql = f"and main_location = {direction}"
#case 3 : 여러 지역을 선택한 경우
else:
  if num_of_direction == 2:
    location_sql = f"and main_location = Mix and {direction_list[0]} >0 and {direction_list[1]} >0"
  elif num_of_direction == 3:
    location_sql = f"and main_location = Mix and {direction_list[0]} >0 and {direction_list[1]} >0 and {direction_list[2]} >0"
