In [162]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc

rc("font", family="AppleGothic")
plt.rcParams["axes.unicode_minus"] = False

class bok_api():
    
    key = "RK4O8KBR4N0O4E2W6V9E"
    url_base = "http://ecos.bok.or.kr/api/"
    table = None
    stat_code = None
    data_format = {
        "DD": "%Y%m%d",
        "MM": "%Y%m",
        "YY": "%Y"
    }
    
    def __init__(self):
        self.url = self.url_base + "StatisticTableList/{}/json/kr/0/{}".format(self.key, str(1000))
        result = requests.get(self.url)
        result = pd.DataFrame(result.json()["StatisticTableList"]["row"])
        self.table = result
    
    def get_stat_list_detail(self, rows=1000, stat_code=""):
        if stat_code == "":
            raise ValueError("Stat code not provided! Please insert stat code!")
        self.url = self.url_base + "StatisticItemList/{}/json/kr/0/{}/{}".format(self.key, str(rows), stat_code)
        self.stat_code = stat_code
        result = requests.get(self.url)
        result = pd.DataFrame(result.json()["StatisticItemList"]["row"])
        self.table_detail = result
    
    def get_stat_data(self, rows=10000, start="19000101", end="30000101", item_code="", stat_code=""):
        if item_code == "":
            raise ValueError("Item code not provided! Please insert item code!")
        if stat_code != "":
            self.stat_code = stat_code
        self.item_code = item_code
        self.period = self.table_detail[self.table_detail["ITEM_CODE"] == self.item_code]["CYCLE"].values[0]
        data_format = self.data_format[self.period]
        self.url = self.url_base + "StatisticSearch/{}/json/kr/0/{}/{}/{}/{}/{}/{}".format(self.key, rows, self.stat_code, self.period, start, end, self.item_code)
        result = requests.get(self.url)
        result = pd.DataFrame(result.json()["StatisticSearch"]["row"])
        self.data = result
        self.data = self.data[["TIME", "DATA_VALUE"]]
        self.data.index = pd.to_datetime(self.data["TIME"], format=data_format)
        self.data = self.data["DATA_VALUE"].replace("", np.nan)
        return self.data
    

In [101]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


def to_float(element):
    if (type(element) == str) and (len(element) != 0):
        res = element.replace('\"', "")
        return float(res)
    
class krx_indus_index():
    
    base_url = "http://marketdata.krx.co.kr/contents/COM/GenerateOTP.jspx?name=fileDown&filetype=csv&url=MKD/13/1301/13010102/mkd13010102&type=2&ind_type={}&period_strt_dd={}&period_end_dd={}&pagePath=%2Fcontents%2FMKD%2F13%2F1301%2F13010102%2FMKD13010102.jsp"
    df_columns = ["date", "close", "chg", "%chg", "open", "high", "low", "vol", "vol_won", "mkt_cap"]
    
    def __init__(self):
        pass
    
    def get_df_by_code(self, start="19000101", end="30000101", index_no="5300"):
        self.url = self.base_url.format(index_no, start, end)
        response = requests.get(self.url)
        download_url = "http://file.krx.co.kr/download.jspx"
        json_data = {"code": response.content}
        headers_json = {"Referer": "http://marketdata.krx.co.kr/contents/MKD/99/MKD9900001.jspx"}
        data = requests.post(download_url, data=json_data, headers=headers_json)
        parsing = BeautifulSoup(data.text)
        parsing = parsing.text.split("\n")
#         parsing = [x.split('\",\"') for x in parsing]
#         parsing = pd.DataFrame(parsing[1:], columns=self.df_columns)
        self.data = parsing
        

In [102]:
test_inst = krx_indus_index()
test_inst.get_df_by_code()



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


In [105]:
import csv

In [114]:
test_inst.data.text.split('\n')

['년/월/일,종가,대비,등락률(%),시가,고가,저가,거래량(천주),거래대금(원),상장시가총액(원)',
 '"2019/04/26","1,300.65","-7.29","-0.56","1,299.27","1,305.80","1,293.50","117,163","4,366,250,763,652","1,373,934,852,061,790"',
 '"2019/04/25","1,307.94","-4.65","-0.35","1,308.89","1,314.36","1,304.35","127,365","4,653,960,343,790","1,381,329,959,388,485"',
 '"2019/04/24","1,312.59","-12.27","-0.93","1,331.03","1,331.04","1,304.89","146,536","5,336,484,190,312","1,388,482,728,633,700"',
 '"2019/04/23","1,324.86","2.62","0.20","1,322.39","1,326.67","1,319.62","113,867","3,842,947,529,572","1,400,125,822,341,960"',
 '"2019/04/22","1,322.24","-0.01","-0.00","1,323.43","1,330.27","1,316.13","113,711","3,600,461,176,697","1,397,372,567,157,015"',
 '"2019/04/19","1,322.25","2.07","0.16","1,326.58","1,328.26","1,321.39","100,054","3,711,978,209,485","1,397,277,654,075,270"',
 '"2019/04/18","1,320.18","-21.60","-1.61","1,343.87","1,344.98","1,317.97","132,070","5,247,188,190,697","1,394,864,520,970,750"',
 '"2019/04/17","1,341.78","

In [110]:
list(x)

[['년'],
 ['/'],
 ['월'],
 ['/'],
 ['일'],
 ['', ''],
 ['종'],
 ['가'],
 ['', ''],
 ['대'],
 ['비'],
 ['', ''],
 ['등'],
 ['락'],
 ['률'],
 ['('],
 ['%'],
 [')'],
 ['', ''],
 ['시'],
 ['가'],
 ['', ''],
 ['고'],
 ['가'],
 ['', ''],
 ['저'],
 ['가'],
 ['', ''],
 ['거'],
 ['래'],
 ['량'],
 ['('],
 ['천'],
 ['주'],
 [')'],
 ['', ''],
 ['거'],
 ['래'],
 ['대'],
 ['금'],
 ['('],
 ['원'],
 [')'],
 ['', ''],
 ['상'],
 ['장'],
 ['시'],
 ['가'],
 ['총'],
 ['액'],
 ['('],
 ['원'],
 [')'],
 [],
 ['2019/04/26'],
 ['', ''],
 ['1,300.65'],
 ['', ''],
 ['-7.29'],
 ['', ''],
 ['-0.56'],
 ['', ''],
 ['1,299.27'],
 ['', ''],
 ['1,305.80'],
 ['', ''],
 ['1,293.50'],
 ['', ''],
 ['117,163'],
 ['', ''],
 ['4,366,250,763,652'],
 ['', ''],
 ['1,373,934,852,061,790'],
 [],
 ['2019/04/25'],
 ['', ''],
 ['1,307.94'],
 ['', ''],
 ['-4.65'],
 ['', ''],
 ['-0.35'],
 ['', ''],
 ['1,308.89'],
 ['', ''],
 ['1,314.36'],
 ['', ''],
 ['1,304.35'],
 ['', ''],
 ['127,365'],
 ['', ''],
 ['4,653,960,343,790'],
 ['', ''],
 ['1,381,329,959,388,485'],
 [],
 [

In [96]:
test_df = pd.DataFrame(test_inst.data)
test_df.fillna(np.nan)
for x in test_df.columns[1:]:
    try:
        test_df[x] = test_df[x].str.replace(',', '').astype("float64")
    except:
        pass
    try:
        test_df[x] = test_df[x].str.replace('"', '').astype("float64")
    except:
        pass


In [97]:
test_df.iloc[500:, :]

Unnamed: 0,date,close,chg,%chg,open,high,low,vol,vol_won,mkt_cap
500,"""2017/04/06",1254.73,-4.73,"-0.38"",,,,,,",,,,,,
501,"""2017/04/05",1259.46,-0.23,"-0.02"",,,,,,",,,,,,
502,"""2017/04/04",1259.69,-2.88,"-0.23"",,,,,,",,,,,,
503,"""2017/04/03",1262.57,5.37,"0.43"",,,,,,",,,,,,
504,"""2017/03/31",1257.20,-3.42,"-0.27"",,,,,,",,,,,,
505,"""2017/03/30",1260.62,-0.60,"-0.05"",,,,,,",,,,,,
506,"""2017/03/29",1261.22,3.03,"0.24"",,,,,,",,,,,,
507,"""2017/03/28",1258.19,4.18,"0.33"",,,,,,",,,,,,
508,"""2017/03/27",1254.01,-7.90,"-0.63"",,,,,,",,,,,,
509,"""2017/03/24",1261.91,-1.82,"-0.14"",,,,,,",,,,,,


In [None]:
url_2 = "http://marketdata.krx.co.kr/contents/COM/GenerateOTP.jspx?name=fileDown&filetype=csv&url=MKD/13/1301/13010102/mkd13010102&type=2&ind_type=5600&period_strt_dd=20190419&period_end_dd=20190426&pagePath=%2Fcontents%2FMKD%2F13%2F1301%2F13010102%2FMKD13010102.jsp"

In [190]:
url.format("a", "c")

'a/b/c/d'

In [1]:
a = "http://marketdata.krx.co.kr/contents/COM/GenerateOTP.jspx?name=fileDown&filetype=csv&url=MKD/13/1301/13010102/mkd13010102&type=2&ind_type=5300&period_strt_dd=20190419&period_end_dd=20190426&pagePath=%2Fcontents%2FMKD%2F13%2F1301%2F13010102%2FMKD13010102.jsp"
a.index("5300")

138

In [3]:
a[138:142]

'5300'