In [None]:
%pip install bs4 selenium webdriver_manager python-dotenv haversine pandas

In [1]:
import pandas as pd

df_bus = pd.read_csv('bus.csv', encoding='euc-kr')
df_subway = pd.read_csv('subway.csv', encoding='euc-kr')
df_bike = pd.read_csv('bike.csv', encoding='cp949')

In [2]:
# df_bus

In [3]:
# df_subway[['code', 'name', 'lat(y)', 'lng']]

In [4]:
# df_bike[['대여소\n번호', "보관소(대여소)명", "위도(Y)", "경도(X)"]]

In [57]:
import time
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.common.exceptions import ElementNotInteractableException
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

class KakaoRouteFinder:
    def __init__(self):
        self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

    def fetch_bike(self, verbose=True, init=True, time_delta=0.2):
        if init:
            self.driver.find_element(by=By.CSS_SELECTOR, value=f"#biketab").click()
            time.sleep(2)
            
        if verbose:
            print(" > bike")
            for li in self.driver.find_elements(by=By.CSS_SELECTOR, value=f"div.BikeRouteResultView > ul > li"):
                li.click()        
                time.sleep(time_delta)
                
        element = self.driver.find_element(by=By.CSS_SELECTOR, value=f"div.BikeRouteResultView")
        return element.get_attribute('innerHTML')
    
    def fetch_walk(self, verbose=True, init=True, time_delta=0.2):
        if init:
            self.driver.find_element(by=By.CSS_SELECTOR, value=f"#walktab").click()
            time.sleep(time_delta)

        if verbose:
            print(" > walk")

        element = self.driver.find_element(by=By.CSS_SELECTOR, value=f"div.WalkRouteResultView")
        return element.get_attribute('innerHTML')
        
    def fetch_transit(self, verbose=True, init=True, time_delta=0.2, init_time=2.0):    
        if init:
            self.driver.find_element(by=By.CSS_SELECTOR, value=f"#transittab").click()
            time.sleep(init_time)

        if verbose:
            print(" > transit")
            for route in self.driver.find_elements(by=By.CSS_SELECTOR, value=f"li.TransitRouteItem"):
                route.click()
                for more_route in route.find_elements(by=By.CSS_SELECTOR, value=f"span.moreBtn"):
                    more_route.click()
                    time.sleep(time_delta)
                time.sleep(time_delta)

        element = self.driver.find_element(by=By.CSS_SELECTOR, value=f"ul.TransitTotalPanel")
        return element.get_attribute('innerHTML')

    def find_route_by_url(self, type, base_url, verbose=True, init=True, time_delta=0.2, init_time=2.0):
        self.driver.get(base_url)
        time.sleep(time_delta)

        # self.driver.find_element(by=By.CSS_SELECTOR, value="body").click()
        # time.sleep(time_delta)
        
        try:
            self.driver.find_element(by=By.CSS_SELECTOR, value="#dimmedLayer").click()
        except ElementNotInteractableException:
            pass

        try:
            if (type == 'bike'):
                return self.fetch_bike(verbose, init)
            elif (type == 'transit'):
                return self.fetch_transit(verbose, init, init_time=init_time)
            elif (type == 'walk'):
                return self.fetch_walk(verbose, init)
            else:
                return None
        except Exception as e:
            # print('find_route_by_url', e)
            pass


    def find_route_by_congnamul(self, type, origin, dest, rt1='ORIGIN', rt2='DESTINATION', verbose=False, init=False, time_delta=0.2, init_time=2.0):
        (org_x, org_y) = origin
        (des_x, des_y) = dest
        base_url = f"https://map.kakao.com/?map_type=TYPE_MAP&target={type}&rt={int(org_x)},{int(org_y)},{int(des_x)},{int(des_y)}&rt1={rt1}&rt2={rt2}"
        return self.find_route_by_url(type, base_url, verbose, init, time_delta, init_time)
        

    def find_route_by_keyword(self, origin, dest, time_delta=0.2, init_time=2.0):
        try:
            route = {}
            for type in ['transit']:#, 'walk', 'bike']:
                base_url = f"https://map.kakao.com/?map_type=TYPE_MAP&target={type}&sName={origin}&eName={dest}"
                route[type] = self.find_route_by_url(type, base_url, verbose=True, init=(type=='transit'), time_delta=time_delta, init_time=init_time)
            return route
            
        except Exception as e:
            # print("find_route_by_keyword", e)
            pass

    def __del__(self):
        self.driver.close()

In [24]:
org, des = ('서울시청', '서울고속터미널')
# org, des = ('대한건축학회', '서울대학교 자하연')
routes = KakaoRouteFinder().find_route_by_keyword(org, des, time_delta=0.5, init_time=2.0)



Current google-chrome version is 102.0.5005
Get LATEST chromedriver version for 102.0.5005 google-chrome
Driver [C:\Users\lucet\.wdm\drivers\chromedriver\win32\102.0.5005.27\chromedriver.exe] found in cache


 > transit


In [20]:
def extract_route(type, route):
    res = []
    soup = BeautifulSoup(route, 'html.parser')
    if (type == 'bike'):
        for route in (soup.find_all('li', {"class": "BikeRouteItem"})):
            res.append({
                "mode": route.find('span', {"class": "mode"}).text,
                "time": route.find('span', {"class": "time"}).text.strip(),
                "distance": route.find('span', {"class": "distance"}).text,
                "altitude": route.find('span', {"class": "altitude"}).text,
                "calories": route.find('span', {"class": "calories"}).text,
            })
    elif (type == 'transit'):
        for route in (soup.find_all('li', {"class": "TransitRouteItem"})):
            res.append({
                "time": route.find('span', {"class": "time"}).text.strip(),
                "info": route.find('span', {"class": "walkTime"}).get('title'),
            })
    elif (type == 'walk'):
        for route in (soup.find_all('li', {"class": "WalkRouteItem"})):
            res.append({
                "mode": route.find('span', {"class": "mode"}).text,
                "time": route.find('span', {"class": "time"}).text.strip(),
                "info": route.find('div', {"class": "info"}).text.strip(),
            })
    return res

for type in ['transit']:#, 'bike', 'walk']:
    print(type, extract_route(type, routes[type]))

transit [{'time': '40분', 'info': '도보10분 | 환승없음 | 요금 1,200원 | 10.7km'}, {'time': '38분', 'info': '도보16분 | 환승없음 | 요금 1,200원 | 8.3km'}, {'time': '40분', 'info': '도보10분 | 환승없음 | 요금 2,800원 | 10.7km'}, {'time': '38분', 'info': '도보19분 | 환승1회 | 요금 1,350원 | 11.5km'}, {'time': '41분', 'info': '도보16분 | 환승1회 | 요금 1,350원 | 13.8km'}, {'time': '43분', 'info': '도보10분 | 환승없음 | 요금 2,150원 | 9.3km'}, {'time': '47분', 'info': '도보20분 | 환승없음 | 요금 1,200원 | 10.7km'}, {'time': '44분', 'info': '도보17분 | 환승1회 | 요금 1,350원 | 14.7km'}, {'time': '43분', 'info': '도보24분 | 환승1회 | 요금 1,350원 ~ 2,250원 | 11.5km'}, {'time': '46분', 'info': '도보14분 | 환승1회 | 요금 1,350원 | 11.8km'}]


### Coordinate Transform

In [25]:
import requests
import dotenv
from os import environ
from haversine import haversine

dotenv.load_dotenv('.env', override=True)

def transform(coord, input_coord='WCONGNAMUL', output_coord='WGS84'):
    URL = f'https://dapi.kakao.com/v2/local/geo/transcoord.json?x={coord[0]}&y={coord[1]}&input_coord={input_coord}&output_coord={output_coord}'
    headers = {'Authorization': f'KakaoAK {environ.get("KAKAO_REST_API_KEY")}'}
    new_coord = requests.get(URL, headers=headers).json()['documents'][0]
    return (new_coord['x'], new_coord['y'])

def get_distance(org, des):
    return haversine(org, des, unit='km')

def get_nearest_bike(lat, lng):
    delta = 5e-3
    df = df_bike[(df_bike["위도(Y)"] > lat-delta) & (df_bike["위도(Y)"] < lat+delta) \
               & (df_bike["경도(X)"] > lng-delta) & (df_bike["경도(X)"] < lng+delta) ]

    min_bike, min_dist = "N/A", float("inf")
    for i, bike in df.iterrows():
        dist = get_distance((lat, lng), bike[["위도(Y)", "경도(X)"]])
        if min_dist > dist:
            min_dist = dist
            min_bike = bike
    return min_dist, min_bike

def get_coordinate(keyword):
    URL = f'https://dapi.kakao.com/v2/local/search/keyword.json?query={keyword}'
    headers = {'Authorization': f'KakaoAK {environ.get("KAKAO_REST_API_KEY")}'}
    coord = requests.get(URL, headers=headers).json()['documents'][0]
    return (float(coord['y']), float(coord['x']))

org_coord = get_coordinate(org)
des_coord = get_coordinate(des)

# get_nearest_bike(37.550007, 126.914825)
# transform((495285,1129803), 'WCONGNAMUL', 'WGS84')

(org_x, org_y) = transform(org_coord[::-1], 'WGS84', 'WCONGNAMUL')
(des_x, des_y) = transform(des_coord[::-1], 'WGS84', 'WCONGNAMUL')
print(f'https://map.kakao.com/?map_type=TYPE_MAP&target=car&rt={int(org_x)},{int(org_y)},{int(des_x)},{int(des_y)}&rt1={org}&rt2={des}')

https://map.kakao.com/?map_type=TYPE_MAP&target=car&rt=495285,1129803,501545,1112946&rt1=서울시청&rt2=서울고속터미널


In [26]:
soup = BeautifulSoup(routes['transit'], 'html.parser')
S = set()
for route in (soup.find_all('li', {"class": "TransitRouteItem"})):
    nodes = [name.text.strip().replace(" 승차", "").replace(" 하차", "").replace(" 정류장", "") for name in route.find_all('a', {"data-id": "name"})]
    # print(nodes)
    S.update(nodes)
    nodes = [node.text.strip() for node in route.find_all('li', {"class": "nodeName"})]
    # print(nodes)
    S.update(nodes)
print(S)

{'을지로2가.파인에비뉴', '노들역', '종로3가역', '남산3호터널', '신한은행본점', '신사역', '롯데백화점', '흑석역', '충무로역', '서울역버스환승센터', '용산한신아파트', '해방촌', '반포대교남단.한강시민공원입구', '동대입구역', '남영역', '신반포역', '한강중학교앞', '남대문시장', '삼성본관앞', '시청앞.덕수궁', '잠원역', '서울시청', '노량진역 환승', '서울고속터미널', '숙대입구역', '신세계앞', '시청역', '을지로입구역', '을지로3가역', '서울역버스환승센터(5번승강장)(중)', '남대문시장앞.이회영활동터', '압구정역', '신반포역.세화여중고', '을지로3가', '숭례문', '옥수역', '안국역', '용산역', '약수역', '전쟁기념관', '녹사평역', '경복궁역', '고속터미널역', 'KT광화문지사', '을지로입구.로얄호텔', '고속터미널', '구반포역', '갈월동', '금호역', '동작역', '프레스센터', '서울역', '시청앞', '을지로3가역 환승', '용산구청.크라운호텔'}


In [27]:
from tqdm import tqdm

seoul_bike = {}
for s in tqdm(S, desc="Retrieve Seoul Bike Stations: "):
    bus = df_bus[df_bus['정류소명'] == s]
    subway = df_subway[df_subway['name'] == s.replace('역', '')]
    try:
        if len(subway):
            lat, lng = subway[['lat(y)', 'lng']].iloc[0]
        elif len(bus):
            lat, lng = bus[['Y좌표', 'X좌표']].iloc[0]
        else:
            # print(s, 'N/A')
            continue
        _, bike = get_nearest_bike(float(lat), float(lng))
        bike_id, bike_name, bike_lat, bike_lng = bike[['대여소\n번호', '보관소(대여소)명', '위도(Y)', '경도(X)']]
        # print(s, lat, lng, bike_name)
        congnamul = transform((bike_lng, bike_lat), 'WGS84', 'WCONGNAMUL')
        seoul_bike[bike_id] = ( \
            bike_name, \
            bike_lat, \
            bike_lng, \
            get_distance(org_coord, (bike_lat, bike_lng)), \
            get_distance(des_coord, (bike_lat, bike_lng)), \
            congnamul,
        )
    except:
        # print('N/A')
        pass

for id in seoul_bike.keys():
    name, lat, lng, org_dist, des_dist, congnamul = seoul_bike[id]
    coord = (lat, lng)
    print(id, name, coord, org_dist-des_dist, congnamul)

Retrieve Seoul Bike Stations: 100%|██████████| 55/55 [00:04<00:00, 12.03it/s]

332 을지로2가 사거리 남측 (37.56599045, 126.987793) -6.062984832409214 (497304.0, 1129571.0)
2002 노들역 1번출구 (37.51284409, 126.9524689) 1.5633946184736347 (489494.0, 1114827.0)
3410 종로3가역 8번출구 (37.5724678, 126.99189) -6.1768625833057715 (498209.0, 1131368.0)
471 회현사거리 남측 (37.56035995, 126.9826508) -5.6058351603706456 (496168.0, 1128009.0)
445 대한상공회의소 (37.56015396, 126.9741058) -5.832866559549046 (494280.0, 1127952.0)
2504 신사역 4번출구 뒤 (37.51588058, 127.0193024) 5.164351302832588 (504266.0, 1115667.0)
4774 을지로입구역 8번출구 (37.56590271, 126.981781) -6.7198653685202885 (495976.0, 1129547.0)
2026 흑석역 4번출구 (37.50858688, 126.9634934) 2.7653310721416937 (491931.0, 1113644.0)
390 충무로역 1번출구 (37.56138992, 126.9953537) -4.644448223670654 (498974.0, 1128294.0)
379 서울역9번출구 (37.55599976, 126.9733582) -5.002946664103813 (494115.0, 1126799.0)
810 이태원지하보도 (37.53841019, 126.9866486) -0.7809873712158684 (497050.0, 1121918.0)
4330 반포한강공원입구,씨티21옆 (37.50749588, 127.0001831) 6.24391957399239 (500040.0, 1113340.0)
371 동대입구역 6




In [28]:
finder = KakaoRouteFinder()
bikes = sorted(seoul_bike.items(), key=lambda item: item[1][3]-item[1][4])
bike_routes = {}
for i, srt in tqdm(enumerate(bikes), desc="Finding Bike Routes: "):
    (_, (srt_name, srt_lat, srt_lng, _, _, srt_coord)) = srt
    for j in tqdm(range(i+1, len(bikes))):
        end = bikes[j]
        (_, (end_name, end_lat, end_lng, _, _, end_coord)) = end
        route = finder.find_route_by_congnamul('bike', srt_coord, end_coord, rt1=str(i), rt2=str(j), verbose=False, init=False, time_delta=0.2)
        route_id = f'{i}-{j}'
        bike_routes[route_id] = extract_route('bike', route)
        # print(f'({route_id})', srt_name, end_name, bike_routes[route_id][0]['time'])
        # break
    # break
del finder



Current google-chrome version is 102.0.5005
Get LATEST chromedriver version for 102.0.5005 google-chrome
Driver [C:\Users\lucet\.wdm\drivers\chromedriver\win32\102.0.5005.27\chromedriver.exe] found in cache
100%|██████████| 41/41 [00:35<00:00,  1.15it/s]
100%|██████████| 40/40 [00:35<00:00,  1.12it/s]
100%|██████████| 39/39 [00:34<00:00,  1.12it/s]
100%|██████████| 38/38 [00:33<00:00,  1.14it/s]
100%|██████████| 37/37 [00:32<00:00,  1.14it/s]
100%|██████████| 36/36 [00:33<00:00,  1.09it/s]
100%|██████████| 35/35 [00:30<00:00,  1.14it/s]
100%|██████████| 34/34 [00:31<00:00,  1.10it/s]
100%|██████████| 33/33 [00:29<00:00,  1.10it/s]
100%|██████████| 32/32 [00:29<00:00,  1.09it/s]
100%|██████████| 31/31 [00:29<00:00,  1.07it/s]
100%|██████████| 30/30 [00:27<00:00,  1.08it/s]
100%|██████████| 29/29 [00:27<00:00,  1.07it/s]
100%|██████████| 28/28 [00:26<00:00,  1.06it/s]
100%|██████████| 27/27 [00:25<00:00,  1.07it/s]
100%|██████████| 26/26 [00:24<00:00,  1.06it/s]
100%|██████████| 25/25 

In [51]:
len(bike_routes.keys()), len(bikes)*(len(bikes)-1)//2

(861, 861)

In [58]:
finder = KakaoRouteFinder()

org_routes = []
des_routes = []

for i, bike in tqdm(enumerate(bikes), desc="Finding Transit Routes: "):
    (_, (name, _, _, _, _, coord)) = bike
    try:
        route = finder.find_route_by_congnamul('transit', (org_x, org_y), coord, rt1=org+str(i), rt2=name, verbose=False, init=True, time_delta=0.5, init_time=2)
        org_routes.append(extract_route('transit', route))
    except:
        route = finder.find_route_by_congnamul('walk', (org_x, org_y), coord, rt1=org+str(i), rt2=name, verbose=False, init=False, time_delta=0.5, init_time=2)
        org_routes.append(extract_route('walk', route))
        
    try:
        route = finder.find_route_by_congnamul('transit', coord, (des_x, des_y), rt1=name, rt2=des+str(i), verbose=False, init=True, time_delta=0.5, init_time=2)
        des_routes.append(extract_route('transit', route))
    except:
        route = finder.find_route_by_congnamul('walk', coord, (des_x, des_y), rt1=name, rt2=des+str(i), verbose=False, init=False, time_delta=0.5, init_time=2)
        des_routes.append(extract_route('walk', route))


del finder



Current google-chrome version is 102.0.5005
Get LATEST chromedriver version for 102.0.5005 google-chrome
Driver [C:\Users\lucet\.wdm\drivers\chromedriver\win32\102.0.5005.27\chromedriver.exe] found in cache
Finding Transit Routes: : 42it [04:32,  6.49s/it]


In [76]:
import datetime

def calc_timedelta(str_time):
    delta = datetime.timedelta()
    for t in str_time.strip().split():
        if t.find('시간') != -1:
            delta += datetime.timedelta(hours=int(t.replace("시간", "")))
        elif t.find('분') != -1:
            delta += datetime.timedelta(minutes=int(t.replace("분", "")))
        elif t.find('초') != -1:
            delta += datetime.timedelta(seconds=int(t.replace("초", "")))
    return delta

In [78]:
len(org_routes), len(des_routes)

(42, 42)

In [80]:
for i, srt in enumerate(bikes):
    (_, (srt_name, srt_lat, srt_lng, _, _, srt_coord)) = srt
    for j in range(i+1, len(bikes)):
        try:
            (_, (end_name, end_lat, end_lng, _, _, end_coord)) = bikes[j]
            route_id = f'{i}-{j}'
            org_time = calc_timedelta(org_routes[j][0]['time'])
            bike_time = calc_timedelta(bike_routes[route_id][0]['time'])
            des_time = calc_timedelta(des_routes[j][0]['time'])
            total_time = org_time + bike_time + des_time
            print(f'({route_id} - {total_time})', org, str(org_time), srt_name, str(bike_time), end_name, str(des_time), des)
        except:
            pass

(0-1 - 0:58:00) 서울시청 0:09:00 경복궁역 4번출구 뒤 0:05:00 종로구청 옆 0:44:00 서울고속터미널
(0-2 - 0:48:00) 서울시청 0:05:00 경복궁역 4번출구 뒤 0:06:00 서울도시건축전시관 0:37:00 서울고속터미널
(0-3 - 0:46:00) 서울시청 0:04:00 경복궁역 4번출구 뒤 0:09:00 롯데호텔 0:33:00 서울고속터미널
(0-4 - 0:59:00) 서울시청 0:18:00 경복궁역 4번출구 뒤 0:08:00 안국역 5번출구 앞 0:33:00 서울고속터미널
(0-5 - 0:48:00) 서울시청 0:07:00 경복궁역 4번출구 뒤 0:09:00 을지로입구역 2번출구 0:32:00 서울고속터미널
(0-6 - 2:42:00) 서울시청 0:07:00 경복궁역 4번출구 뒤 0:09:00 을지로입구역 8번출구 2:26:00 서울고속터미널
(0-7 - 0:49:00) 서울시청 0:07:00 경복궁역 4번출구 뒤 0:10:00 웨스틴 조선호텔 0:32:00 서울고속터미널
(0-8 - 0:54:00) 서울시청 0:10:00 경복궁역 4번출구 뒤 0:10:00 을지로지하쇼핑센터 0:34:00 서울고속터미널
(0-9 - 0:56:00) 서울시청 0:09:00 경복궁역 4번출구 뒤 0:10:00 시청역(2호선) 9번출구 뒤 0:37:00 서울고속터미널
(0-10 - 1:00:00) 서울시청 0:17:00 경복궁역 4번출구 뒤 0:12:00 종로3가역 8번출구 0:31:00 서울고속터미널
(0-11 - 0:58:00) 서울시청 0:13:00 경복궁역 4번출구 뒤 0:11:00 을지로2가 사거리 남측 0:34:00 서울고속터미널
(0-12 - 0:54:00) 서울시청 0:15:00 경복궁역 4번출구 뒤 0:12:00 신세계백화점 본점 앞 0:27:00 서울고속터미널
(0-13 - 0:56:00) 서울시청 0:12:00 경복궁역 4번출구 뒤 0:11:00 대한상공회의소 0:33:00 서울고속터미널
(0-14 - 0:55:00