In [18]:
import pandas as pd

#1. http://marketdata.krx.co.kr/contents/MKD/04/0406/04060100/MKD04060100.jsp에서 excel download -> csv 변환
code_df = pd.read_csv('../data/data.csv')

# code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0] 
#  종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌 

code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다. 
code_df = code_df[['기업명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다. 
code_df = code_df.rename(columns={'기업명': 'name', '종목코드': 'code'})

code_df.head()


Unnamed: 0,name,code
0,3S,60310
1,AJ네트웍스,95570
2,AJ렌터카,68400
3,AK홀딩스,6840
4,APS홀딩스,54620


In [7]:
import requests, time
from bs4 import BeautifulSoup
import progressbar

# 종목 이름을 입력하면 종목에 해당하는 코드를 불러와 
# 네이버 금융(http://finance.naver.com)에 넣어줌 
def get_url(item_name, code_df): 
    code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False) 
    url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code) 
    print("요청 URL = {}".format(url)) 
    
    r = requests.get(url)
    source = BeautifulSoup(r.content, 'html.parser')
    max_page = source.find_all('table', align='center')
    max_page = max_page[0].find_all('td', class_='pgRR')
    max_page = max_page[0].a.get('href')
    
    return url, int(max_page[max_page.rfind('page=')+5:])

# 신라젠의 일자데이터 url 가져오기 
item_name='3S' 
url, max_page = get_url(item_name, code_df) 
# 일자 데이터를 담을 df라는 DataFrame 정의 
df = pd.DataFrame() # 1페이지에서 20페이지의 데이터만 가져오기 

for page in progressbar.progressbar(range(1, max_page+1)):
    try:
        pg_url = '{url}&page={page}'.format(url=url, page=page) 
        df = df.append(pd.read_html(pg_url, header=0)[0], ignore_index=True) 
    except:
        print(item_name, page)
        break
        
    if page % 50 == 0:
        time.sleep(1.50)

# df.dropna()를 이용해 결측값 있는 행 제거 
df = df.dropna() # 상위 5개 데이터 확인하기 
df.head()

# 한글로 된 컬럼명을 영어로 바꿔줌 
df = df.rename(columns= {'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'})
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(by=['date'], ascending=True)


                                                                               N/A% (0 of 416) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--

요청 URL = http://finance.naver.com/item/sise_day.nhn?code=060310


100% (416 of 416) |######################| Elapsed Time: 0:01:25 Time:  0:01:25


In [8]:
df

Unnamed: 0,date,close,diff,open,high,low,volume
6234,2002-04-24,8510,910,8370,8510,7750,140723
6230,2002-04-25,8750,240,9530,9530,7490,768702
6229,2002-04-26,7700,1050,8550,8750,7700,176111
6228,2002-04-29,6780,920,6800,7200,6780,139388
6227,2002-04-30,5970,810,6310,6500,5970,146777
6226,2002-05-02,6160,190,6300,6370,5950,192706
6223,2002-05-03,6140,20,6050,6490,6050,158840
6222,2002-05-06,5700,440,6160,6190,5590,129961
6221,2002-05-07,6380,680,5340,6380,5340,239475
6220,2002-05-08,7140,760,6940,7140,6500,132093


In [65]:
# 필요한 모듈 import 하기 
import plotly.offline as offline 
import plotly.graph_objs as go 

# jupyter notebook 에서 출력 
offline.init_notebook_mode(connected=True) 
trace = go.Scatter( x=df.date, y=df.close, name=item_name) 
data = [trace] 
# data = [celltrion] 
layout = dict(
                title='{}의 종가(close) Time Series'.format(item_name), 
                xaxis=dict( 
                    rangeselector=dict( 
                        buttons=list([ 
                            dict(count=1, label='1m', step='month', stepmode='backward'), 
                            dict(count=3, label='3m', step='month', stepmode='backward'), 
                            dict(count=6, label='6m', step='month', stepmode='backward'), 
                            dict(count=12, label='12m', step='month', stepmode='backward'), 
                            dict(count=36, label='36m', step='month', stepmode='backward'), 
                            dict(step='all') ]) ), 
                    rangeslider=dict(), 
                    type='date' 
                )
        )

fig = go.Figure(data=data, layout=layout) 
offline.iplot(fig)

In [3]:
url_template = 'https://companyinfo.stock.naver.com/v1/company/c1010001.aspx?cmp_cd=%s&fin_typ=%s&freq_typ=%s'
url = url_template % ('005930', '4', 'Y')
url
    

'https://companyinfo.stock.naver.com/v1/company/c1010001.aspx?cmp_cd=005930&fin_typ=4&freq_typ=Y'

In [31]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
 
# 셋팅값
item_name = '삼성전자'
code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False) 
ticker = code

print(code)
 
# 내부변수 생성
pat_enc = re.compile("encparam: '(.*)'", re.IGNORECASE)
pat_id = re.compile("id: '([a-zA-Z0-9]*)' ?", re.IGNORECASE)
 
url = "https://companyinfo.stock.naver.com/v1/company/c1010001.aspx?cmp_cd={}".format(ticker)
html = requests.get(url).text
encparam = pat_enc.search(html).group(1)
encid = pat_id.search(html).group(1)

#url = 'https://companyinfo.stock.naver.com/v1/company/ajax/cF1001.aspx?cmp_cd=005930&fin_typ=4&freq_typ=Y&encparam=d05nMWFRMVpUR0x0SE1WdjFwekJyUT09&id=RVArcVR1a2'
url = 'https://companyinfo.stock.naver.com/v1/company/ajax/cF1001.aspx?'
params = 'cmp_cd={}&fin_typ=4&freq_typ=Y&encparam={}&id={}'.format(ticker, encparam, encid)

headers = {
  "Referer": "HACK"
}

print(url)
html = requests.get(url+params, headers=headers).text
 
# HTML 파싱
soup = BeautifulSoup(html, "html5lib")
result = soup.select("table > thead > tr:nth-of-type(2) > th")
     
# DataFrame 변환
df = pd.read_html(html)[1]
df.columns = ["구분"] + [x.text.split()[0] for x in result]
df = df.set_index('구분')
df = df.applymap("{:.2f}".format)
print(df)

005930
https://companyinfo.stock.naver.com/v1/company/ajax/cF1001.aspx?
                  2013/12        2014/12        2015/12        2016/12  \
구분                                                                       
매출액            2286927.00     2062060.00     2006535.00     2018667.00   
영업이익            367850.00      250251.00      264134.00      292407.00   
영업이익(발표기준)      367850.00      250251.00      264134.00      292407.00   
세전계속사업이익        383643.00      278750.00      259610.00      307137.00   
당기순이익           304748.00      233944.00      190601.00      227261.00   
당기순이익(지배)       298212.00      230825.00      186946.00      224157.00   
당기순이익(비지배)        6535.00        3119.00        3655.00        3104.00   
자산총계           2140750.00     2304230.00     2421795.00     2621743.00   
부채총계            640590.00      623348.00      631197.00      692113.00   
자본총계           1500160.00     1680882.00     1790598.00     1929630.00   
자본총계(지배)       1444426.00     1621817.00