In [1]:
from selenium import webdriver
import googlemaps
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tn

pd.set_option('display.max_row',500)

In [2]:
coord = pd.read_csv('./DATA/station_coord.csv')
crawled = pd.read_csv('./DATA/station_side.csv')

In [3]:
coord.drop(columns=['st_id'], inplace=True)

In [4]:
coord['st_line'].unique()

array(['8', '3', '5', '1', '4', '2', 'A', 'K', 'B', 'UI', 'G', '7', '6',
       'I', 'SU', 'U', 'S', 'KK', '9', 'I2', 'E'], dtype=object)

In [5]:
crawled['st_line'].unique()

array(['1호선', '2호선', '3호선', '4호선', '5호선', '6호선', '7호선', '8호선', '9호선',
       '인천1호선', '인천2호선', '분당', '신분당', '경의중앙', '공항철도', '경춘선', '수인선',
       '의정부경전철', '용인경전철', '경강선', '우이신설경전철', '서해선', '김포'], dtype=object)

In [6]:
line_dict = {'1':'1호선',
             '2':'2호선',
             '3':'3호선',
             '4':'4호선',
             '5':'5호선',
             '6':'6호선',
             '7':'7호선',
             '8':'8호선',
             '9':'9호선',
             'I':'인천1호선',
             'I2':'인천2호선',
             'B':'분당',
             'S':'신분당',
             'K':'경의중앙',
             'A':'공항철도',
             'G':'경춘선',
             'SU':'수인선',
             'U':'의정부경전철',
             'E':'용인경전철',
             'KK':'경강선',
             'UI':'우이신설경전철'}

In [7]:
coord['st_line'] = coord['st_line'].map(line_dict)

In [8]:
coord = coord.replace('서울','서울역')
coord = coord.replace('서울(경의중앙)','서울역')
coord.loc[coord['st_name']=='419민주묘지','st_name'] = '4·19민주묘지'
coord = coord[coord['st_name']!='풍기']
coord = coord[coord['st_name']!='탕정']
coord.loc[coord['st_name']=='서울(경의중앙선)','st_name'] = '서울역'
coord.loc[coord['st_name']=='신촌(경의중앙선)','st_name'] = '신촌(경의중앙)'
coord.loc[coord['st_name']=='양평(경의중앙선)','st_name'] = '양평'
coord.loc[coord['st_name']=='인천국제공항','st_name'] = '인천공항1터미널'

In [9]:
miss_9 = crawled.loc[(crawled['st_line']=='9호선') & ~(crawled['st_name'].isin(coord.loc[coord['st_line']=='9호선','st_name'].tolist()))]
miss_ap = crawled.loc[(crawled['st_line']=='공항철도') & ~(crawled['st_name'].isin(coord.loc[coord['st_line']=='공항철도','st_name'].tolist()))]
miss_sb = crawled.loc[(crawled['st_line']=='신분당') & ~(crawled['st_name'].isin(coord.loc[coord['st_line']=='신분당','st_name'].tolist()))]
miss_b = crawled.loc[(crawled['st_line']=='분당') & ~(crawled['st_name'].isin(coord.loc[coord['st_line']=='분당','st_name'].tolist()))]

In [10]:
missed = crawled[(crawled['st_line']=='서해선')|(crawled['st_line']=='김포')]
missed = pd.concat([missed, miss_9, miss_ap, miss_sb, miss_b], axis=0).reset_index(drop=True)
missed = missed[['st_name','st_line']]
missed['st_x'] = np.NaN
missed['st_y'] = np.NaN

In [11]:
target = pd.concat([coord[coord['st_x'].isnull()],missed], axis=0).reset_index(drop=True)

In [12]:
df = target.copy()

# 호선 이름 수정
df.loc[df['st_line'] ==  '경의중앙','st_line'] = '경의중앙선'
df.loc[df['st_line'] == '우이신설경전철','st_line'] = '우이신설선'
df.loc[df['st_line'] == '김포','st_line'] = '김포골드라인'
df.loc[df['st_line'] == '신분당','st_line'] = '신분당선'
df.loc[df['st_line'] == '분당','st_line'] = '분당선'
df.loc[df['st_name'] == '사우(김포시청)','st_name'] = '사우'

In [13]:
driver = webdriver.Chrome('../webdriver/chromedriver.exe')
driver.get('https://map.kakao.com/')
driver.find_element_by_xpath('//*[@id="dimmedLayer"]').click()
address = []
for i in tn(range(len(df))):
    driver.get('https://map.kakao.com/')
    keyword = f"{df['st_name'][i]}역 {df['st_line'][i]}"
    driver.find_element_by_xpath('//*[@id="search.keyword.query"]').clear()
    driver.implicitly_wait(1)
    driver.find_element_by_xpath('//*[@id="search.keyword.query"]').send_keys(keyword)
    driver.implicitly_wait(1)
    driver.find_element_by_xpath('//*[@id="search.keyword.submit"]').click()
    unit = driver.find_element_by_xpath('//*[@id="info.search.place.list"]/li[1]/div[5]/div[2]/p[1]').text
    address.append(unit)
driver.close()

HBox(children=(IntProgress(value=0, max=46), HTML(value='')))




In [14]:
target['address'] = address

In [15]:
target.loc[target['st_name']=='양촌','address'] = '경기도 김포시 양촌읍 유현리 290'
target.loc[target['st_name']=='사우(김포시청)','address'] = '경기도 김포시 사우동 854'
target.loc[target['st_name']=='운양','address'] = '경기도 김포시 운양동 1403'
target.loc[target['st_name']=='구래','address'] = '경기도 김포시 구래동 6907'
target.loc[target['st_name']=='풍무','address'] = '경기도 김포시 사우동 542-3'
target.loc[target['st_name']=='장기','address'] = '경기도 김포시 장기동 1791'
target.loc[target['st_name']=='고촌','address'] = '경기도 김포시 고촌읍 신곡리 532-24'
target.loc[target['st_name']=='걸포북변','address'] = '경기도 김포시 북변동 135-10'
target.loc[target['st_name']=='마산','address'] = '경기도 김포시 마산동 691'

In [16]:
g_key = 'AIzaSyAma_NvTosIEM9hMfzwqFcCQER9K8iGo4I'
gmaps = googlemaps.Client(key=g_key)

x, y = [[],[]]
for k in range(len(target)):
    result_01 = gmaps.geocode(target['address'][k])[0].get('geometry')['location']
    x.append(float(result_01['lat']))
    y.append(float(result_01['lng']) )       

In [17]:
target['st_x'] = x
target['st_y'] = y

In [18]:
target = target.drop(columns='address')

In [19]:
output = pd.concat([coord[~coord['st_x'].isnull()], target], axis=0).sort_values('st_line').drop_duplicates().reset_index(drop=True)

In [20]:
output.loc[(output['st_name']=='김포공항')&(output['st_line']=='김포'),'st_x'] = 37.562434
output.loc[(output['st_name']=='김포공항')&(output['st_line']=='김포'),'st_y'] = 126.801058

In [21]:
output.to_csv('./DATA/station_coord_1216.csv', index=False)