# 01 Get stock data and plotting.

* **Original Text**: https://excelsior-cjh.tistory.com/109?category=975542 [EXCELSIOR]

* **Final Edit**: 2021-08-03


In [8]:
!pip3 install --user pandas
!pip3 install --user html5lib
!pip3 install --user BeautifulSoup4
!pip3 install --user plotly
!pip3 install --user nbformat



In [9]:
# 1: Get event_code from 상장법인목록.xls
import pandas as pd 

code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format) 
code_df = code_df[['회사명', '종목코드']]
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})

In [10]:
import requests
from bs4 import BeautifulSoup as bs

headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'}

def get_url(item_name, code_df): 
  code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False) 
  url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code) 
  print("요청 URL = {}".format(url)) 
  return url

def get_html_table_symbol(url):
  # https://www.inflearn.com/questions/152894 [no tables found]
  response = requests.get(url, headers=headers)
  html = bs(response.text, 'lxml')
  html_table = html.select('table')
  len(html_table)
  return str(html_table)

In [11]:
# 2. Get stock data from naver finance
item_name='삼성전자' 
url = get_url(item_name, code_df)

df = pd.DataFrame()
for page in range(1, 21):
  pg_url = '{url}&page={page}'.format(url=url, page=page)
  table = get_html_table_symbol(pg_url)
  df = df.append(pd.read_html(table, header=0)[0], ignore_index=True)

df = df.dropna()
#df.head()

요청 URL = http://finance.naver.com/item/sise_day.nhn?code=005930


In [12]:
# 3. Rename columns
df = df.rename(columns= {'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'}) 
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int) 
df['date'] = pd.to_datetime(df['date']) 
df = df.sort_values(by=['date'], ascending=True) 
#df.head()

In [13]:
# 4. Plot
import plotly.offline as offline 
import plotly.graph_objs as go 

offline.init_notebook_mode(connected=True) 
trace = go.Scatter( x=df.date, y=df.close, name=item_name) 
data = [trace] # data = [celltrion] 

layout = dict(
  title='{}의 종가(close) Time Series'.format(item_name),
  xaxis=dict( 
    rangeselector=dict( 
      buttons=list([ 
        dict(count=1, label='1m', step='month', stepmode='backward'), 
        dict(count=3, label='3m', step='month', stepmode='backward'), 
        dict(count=6, label='6m', step='month', stepmode='backward'), 
        dict(step='all') ]) 
      ), 
      rangeslider=dict(), 
      type='date' 
    ) 
  ) 
fig = go.Figure(data=data, layout=layout) 
offline.iplot(fig)

