In [1]:
import os
import time
import pandas as pd
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options

In [2]:
start_date = '20160101'
end_date = '20191231'
maturities = ['3월', '6월', '9월', '1년', '1년6월', '2년', '2년6월', '3년', '4년', '5년', '7년', '10년', '15년', '20년', '30년']

In [3]:
if not any([s=='data' for s in os.listdir('.')]):
    os.mkdir('data')
now = datetime.now().strftime('%Y%m%d%H%M%S')
os.mkdir('./data/' + now)
downloads = os.path.abspath(os.curdir) + '\\data\\{}'.format(now)

chrome_options = Options()
chrome_options.add_experimental_option("prefs", {
  "download.default_directory": downloads,
  "download.prompt_for_download": False,
  "download.directory_upgrade": True,
  "safebrowsing.enabled": True,
  'profile.default_content_setting_values.automatic_downloads': 1
})
driver = webdriver.Chrome('chromedriver', options=chrome_options)

In [None]:
url = 'http://www.kofiabond.or.kr'
driver.get(url)
driver.switch_to.frame(driver.find_element_by_xpath('//frame[@name="fraAMAKMain"]'))
driver.find_element_by_id('group26').click()
driver.implicitly_wait(10)
driver.switch_to.frame(driver.find_element_by_id('maincontent'))
driver.find_element_by_id('tabContents1_tab_tabs2').click()
driver.implicitly_wait(10)

driver.switch_to.frame(driver.find_element_by_id('tabContents1_contents_tabs2_body'))
driver.find_element_by_id('schSstandardDt_input').click()
for _ in range(4):
    driver.find_element_by_id('schSstandardDt_input').send_keys(Keys.BACK_SPACE)
    driver.find_element_by_id('schSstandardDt_input').send_keys(Keys.DELETE)
driver.find_element_by_id('schSstandardDt_input').send_keys(start_date)
driver.find_element_by_id('schEstandardDt_input').click()
for _ in range(4):
    driver.find_element_by_id('schEstandardDt_input').send_keys(Keys.BACK_SPACE)
    driver.find_element_by_id('schEstandardDt_input').send_keys(Keys.DELETE)
driver.find_element_by_id('schEstandardDt_input').send_keys(end_date)


for i in range(len(maturities)):
    Select(driver.find_element_by_id('selectBndTyp{}_input_0'.format(i%4+1))).select_by_visible_text('국채/국고채권/양곡,외평,재정')
    Select(driver.find_element_by_id('selectTrm{}_input_0'.format(i%4+1))).select_by_visible_text(maturities[i])
    if i%4==3 or i==len(maturities)-1:
        driver.execute_script('searchData()')
        while True:
            time.sleep(5)
            if driver.find_element_by_id('___commonProcessbar2').get_attribute('tabindex') == '-1':
                break
        driver.execute_script('excelDownLoad()')
        for j in range(4):
            Select(driver.find_element_by_id('selectBndTyp{}_input_0'.format(j+1))).select_by_visible_text('--선택--')
            Select(driver.find_element_by_id('selectTrm{}_input_0'.format(j+1))).select_by_visible_text('--선택--')

In [None]:
files = [f for f in  os.listdir(downloads) if f[-3:]=='xls']
data_by_file_stack = []
for file in files:
    data = pd.read_excel("{}/{}".format(downloads, file))
    data = data.loc[:, data.iloc[1] != '금투협']
    data.columns = data.iloc[1]
    dates = data.iloc[2:, 0]
    num_maturity = (data.shape[1]-1)//5
    maturity = []
    for i in range(num_maturity):
        maturity.append(data.iloc[0, 1+5*i])
    data = data.iloc[2:, 1:]
    data_by_maturity_stack = []
    for i in range(num_maturity):
        data_by_maturity = data.iloc[:, 5*i:5*(i+1)]
        data_by_maturity.insert(0, 'maturity', maturity[i])
        data_by_maturity.insert(0, 'base_date', dates)
        data_by_maturity_stack.append(data_by_maturity)
    data_by_file = pd.concat(data_by_maturity_stack)
    data_by_file_stack.append(data_by_file)
data_all_file = pd.concat(data_by_file_stack)
data_all_file = data_all_file.melt(id_vars=['base_date', 'maturity'], var_name='agency', value_name='yield')
data_all_file = data_all_file.loc[data_all_file['yield'] != "-"].reset_index(drop=True)
data_all_file['base_date'] = data_all_file['base_date'].str.replace('/', '')
data_all_file['maturity'] = data_all_file['maturity'].apply(lambda x: {'3월': 0.25, '6월': 0.5, '9월': 0.75, '1년': 1., '1년6월': 1.5, '2년': 2., '2년6월': 2.5, '3년': 3., '4년': 4., '5년': 5., '7년': 7., '10년': 10., '15년': 15., '20년': 20., '30년': 30.}.get(x))
data_all_file['yield'] = data_all_file['yield'].astype(float)
data_all_file.insert(4, 'collection_datetime', now)

In [None]:
if not any([s=='result' for s in os.listdir('.')]):
    os.mkdir('result')
with pd.ExcelWriter('./result/채권시가평가기준수익률_{}.xlsx'.format(now), 'xlsxwriter') as writer:
    data_all_file.to_excel(writer, index=False)