In [1]:
import numpy as np
import pandas as pd

import json
import heapq
import requests

from time import time
from tqdm import tqdm
from urllib.parse import urlparse

import warnings
warnings.filterwarnings('ignore')

# kakao API key
api_key = '43dabecbc47029b5ca73d4c599de3185'

# Load data

In [2]:
# 호선, 역이름, 역주소 로드 (https://www.data.go.kr/data/15013205/standard.do)
subway_meta = pd.read_csv('../data/subway_info.csv')

subway_li = subway_meta.apply(lambda row : str(row['subway_line']) + ',' + row['subway_name'], axis=1)
subway_meta['subway_info'] = subway_li

# 역간 소요시간 및 환승시간 반영된 인접행렬 로드
subway_matrix = pd.read_csv('../data/adjacency_matrix.csv')
subway_graph = subway_matrix.drop('subway_name', axis=1).values

# Utils

In [3]:
def address_to_coordinates(address):
    '''카카오 API를 활용하여 주소에 해당하는 위도 경도 추출'''
    url = f"https://dapi.kakao.com/v2/local/search/address.json?query={address}"
    headers = {"Authorization": "KakaoAK " + api_key}
    result = json.loads(str(requests.get(url, headers=headers).text))
    try:
        coordinates = result['documents'][0]['address']
        return float(coordinates['x']), float(coordinates['y'])
    except:
        return None, None


def add_coordinates_to_meta(subway_meta):
    ''''''
    # 카카오 API 활용 위도 경도 정보 추가
    subway_meta['coordinates'] = subway_meta['address'].apply(address_to_coordinates)
    subway_meta['coordinate_x'] = subway_meta['coordinates'].apply(lambda x : x[0])
    subway_meta['coordinate_y'] = subway_meta['coordinates'].apply(lambda x : x[1])
    subway_meta = subway_meta.drop('coordinates', axis=1)    
    
    # API 누락 주소 위도 경도 정보
    missing_li = {
        '덕계' : (37.818761, 127.056676),
        '도봉산' : (37.689603, 127.046347),
        '석계' : (37.615206,	127.065594),
        '성환' : (36.915781,	127.127007),
        '탕정' : (36.788272,	127.080446),
        '화정' : (37.637837,	126.832503),
        '원당' : (37.653103,	126.842891),
        '삼송' : (37.653096,	126.895559),
        '대공원' : (37.435724,	127.006474),
    }

    # API 누락 역주소 위도 경도 추가
    for key, value in missing_li.items():
        subway_meta.loc[subway_meta['subway_name'] == key, 'coordinate_x'] = value[1]
        subway_meta.loc[subway_meta['subway_name'] == key, 'coordinate_y'] = value[0]
        
    return subway_meta

# 1. 지하철역 위도 경도 추출

In [4]:
subway_meta = add_coordinates_to_meta(subway_meta)
subway_meta

Unnamed: 0,subway_line,subway_name,address,subway_info,coordinate_x,coordinate_y
0,1,소요산,경기도 동두천시 평화로 2925(상봉암동 126),"1,소요산",127.061213,37.947954
1,1,동두천,경기도 동두천시 평화로 2687(동두천동 245-210),"1,동두천",127.054940,37.927837
2,1,보산,경기도 동두천시 평화로 2539(보산동),"1,보산",127.057237,37.914319
3,1,동두천중앙,경기도 동두천시 동두천로 228(생연동 682),"1,동두천중앙",127.056239,37.901806
4,1,지행,경기도 동두천시 평화로 2285(지행동),"1,지행",127.055664,37.891877
...,...,...,...,...,...,...
296,5,방이,경기도 하남시 미사강변동로 지하90(망월동),"5,방이",127.192697,37.563103
297,5,오금,경기도 하남시 덕풍서로 지하50(덕풍동),"5,오금",127.203871,37.552058
298,5,개롱,경기도 하남시 하남대로 지하820(덕풍동),"5,개롱",127.206464,37.541902
299,5,거여,경기도 하남시 대청로 지하100(창우동),"5,거여",127.223444,37.539759


## Heuristic 1) Euclidean distance

In [5]:
# 유클리드 거리 계산 함수
def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) * 100    # weight

# 휴리스틱 행렬 초기화
heu_matrix_euclidean = pd.DataFrame(index=subway_li, columns=subway_li)

# 각 이름간 유클리드 거리 계산하여 인접행렬에 저장
for subway1 in tqdm(subway_li):    
    for subway2 in subway_li:
        
        if subway1 == subway2:
            heu_matrix_euclidean.loc[subway1, subway2] = 0.0
        else:
            x1 = subway_meta.loc[subway_meta['subway_info'] == subway1, 'coordinate_x'].values[0]
            y1 = subway_meta.loc[subway_meta['subway_info'] == subway1, 'coordinate_y'].values[0]
            x2 = subway_meta.loc[subway_meta['subway_info'] == subway2, 'coordinate_x'].values[0]
            y2 = subway_meta.loc[subway_meta['subway_info'] == subway2, 'coordinate_y'].values[0]
            heu_matrix_euclidean.loc[subway1, subway2] = euclidean_distance(x1, y1, x2, y2)

100%|████████████████████████████████████████████████████████████████████████████████| 301/301 [01:11<00:00,  4.23it/s]


In [9]:
#heu_matrix_euclidean.to_csv('../heuristics/euclidean.csv')
heu_matrix_euclidean.head()

Unnamed: 0,"1,소요산","1,동두천","1,보산","1,동두천중앙","1,지행","1,덕정","1,덕계","1,양주","1,녹양","1,가능",...,"5,하남풍산","5,하남시청","5,하남검단산","5,둔촌동","5,올림픽공원","5,방이","5,오금","5,개롱","5,거여","5,마천"
"1,소요산",0.0,2.107228,3.386968,4.641553,5.635042,10.367277,12.927264,17.507112,18.950553,20.030745,...,44.412694,45.06464,45.61725,46.222673,40.693408,40.669176,42.081476,43.124995,43.925169,46.234878
"1,동두천",2.107228,0.0,1.371229,2.606365,3.596688,8.384825,10.908986,15.451593,16.891538,17.978831,...,42.523534,43.177638,43.744057,44.3667,38.957708,38.988173,40.42158,41.461524,42.308173,44.397759
"1,보산",3.386968,1.371229,0.0,1.255248,2.249611,7.019424,9.555915,14.121325,15.56422,16.64596,...,41.152305,41.80641,42.373012,42.996283,37.601911,37.643276,39.081236,40.120217,40.977995,43.028505
"1,동두천중앙",4.641553,2.606365,1.255248,0.0,0.994494,5.780657,8.304593,12.866129,14.309163,15.390713,...,39.936374,40.59118,41.162744,41.79239,36.449422,36.515792,37.962992,38.999842,39.879253,41.831894
"1,지행",5.635042,3.596688,2.249611,0.994494,0.0,4.800936,7.312346,11.872162,13.31552,14.396439,...,38.969438,39.624755,40.200106,40.834643,35.532562,35.618894,37.073258,38.108248,39.004813,40.879793


## Heuristic 2) Manhatten distance

In [10]:
# 맨하탄 거리 계산 함수
def manhattan_distance(x1, y1, x2, y2):
    return np.abs(x1 - x2) + np.abs(y1 - y2) * 100    # weight

# 휴리스틱 행렬 초기화
heu_matrix_manhatten = pd.DataFrame(index=subway_li, columns=subway_li)

# 각 이름간 맨해튼 거리 계산하여 인접행렬에 저장
for subway1 in tqdm(subway_li):    
    for subway2 in subway_li:
        
        if subway1 == subway2:
            heu_matrix_manhatten.loc[subway1, subway2] = 0.0
        else:
            x1 = subway_meta.loc[subway_meta['subway_info'] == subway1, 'coordinate_x'].values[0]
            y1 = subway_meta.loc[subway_meta['subway_info'] == subway1, 'coordinate_y'].values[0]
            x2 = subway_meta.loc[subway_meta['subway_info'] == subway2, 'coordinate_x'].values[0]
            y2 = subway_meta.loc[subway_meta['subway_info'] == subway2, 'coordinate_y'].values[0]
            heu_matrix_manhatten.loc[subway1, subway2] = manhattan_distance(x1, y1, x2, y2)

100%|████████████████████████████████████████████████████████████████████████████████| 301/301 [01:12<00:00,  4.15it/s]


In [12]:
#heu_matrix_manhatten.to_csv('../heuristics/manhatten.csv')
heu_matrix_manhatten.head()

Unnamed: 0,"1,소요산","1,동두천","1,보산","1,동두천중앙","1,지행","1,덕정","1,덕계","1,양주","1,녹양","1,가능",...,"5,하남풍산","5,하남시청","5,하남검단산","5,둔촌동","5,올림픽공원","5,방이","5,오금","5,개롱","5,거여","5,마천"
"1,소요산",0.0,2.017967,3.367525,4.619796,5.613203,10.367758,12.923837,17.446341,18.874615,19.975533,...,44.004397,44.635468,45.087879,45.557486,39.157465,38.616565,39.73229,40.750497,40.981717,45.419445
"1,동두천",2.017967,0.0,1.354152,2.604426,3.596684,8.362336,10.909341,15.428374,16.856648,17.957565,...,41.998975,42.630047,43.082458,43.552065,37.152043,36.611144,37.726869,38.745076,38.976296,43.414024
"1,보산",3.367525,1.354152,0.0,1.252271,2.245678,7.008184,9.556311,14.078816,15.50709,16.608007,...,40.644823,41.275895,41.728306,42.197913,35.797891,35.256992,36.372717,37.390924,37.622144,42.059872
"1,동두천중앙",4.619796,2.604426,1.252271,0.0,0.993407,5.75791,8.304915,12.826546,14.254819,15.355737,...,39.394549,40.02562,40.478032,40.947639,34.547617,34.006718,35.122442,36.14065,36.37187,40.809598
"1,지행",5.613203,3.596684,2.245678,0.993407,0.0,4.765653,7.312658,11.833138,13.261412,14.36233,...,38.402292,39.033363,39.485774,39.955381,33.55536,33.01446,34.130185,35.148392,35.379612,39.81734
