In [4]:
import requests, time, datetime, os
import pandas as pd
from pandas.core.frame  import DataFrame
from pandas.core.series import Series
from pandas_datareader  import data as pdr
from bs4 import BeautifulSoup as bs
import yfinance as yf

def append_missing_trading_date(df:pd.core.frame.DataFrame):
  ''' Return
    input: 
              Date	Close	Diff	Open	High	Low	Volume
      0	2022-02-07	10610	30	10605	10615	10585	72843
    output:
              Date	Close	Diff	Open	High	Low	Volume	Trade
      0	2022-02-07	10610	30	10605	10615	10585	72843	True
  '''
  idx = pd.date_range(df.loc[:,'Date'].min(),df.loc[:,'Date'].max())
  s = df.set_index('Date')
  s = s.reindex(idx, fill_value=0)

  s['Trade'] = s.apply(lambda x: x['Close'] != 0, axis=1)
  s = s.reset_index().rename(columns={"index": "Date"})

  for i in s.index:
    if s.iloc[i,-1]==False:
      s.iloc[i,:-1] = s.iloc[i-1,:-1] 
  return s

def get_from_naver(ticker:str, page:int):
  url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=ticker)
  def get_html_table(url:str):
    headers =  {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'}
    response = requests.get(url, headers=headers)
    html = bs(response.text, 'lxml')
    html_table = html.select('table') 
    return str(html_table)

  df = pd.DataFrame()
  for _page in range(1, page):
    pg_url = '{url}&page={page}'.format(url=url, page=_page).replace(' ','')
    html_table = get_html_table(pg_url)
    df = pd.concat([df,pd.read_html(html_table, header=0)[0]],axis=0,ignore_index=True)
  assert df.empty == False, "the requested dataframe is empty."
  df = df.dropna(axis=0)
  
  df = df.rename(columns= {'날짜': 'Date', '종가': 'Close', '전일비': 'Diff', '시가': 'Open', '고가': 'High', '저가': 'Low', '거래량': 'Volume'}) 
  df[['Close', 'Diff', 'Open', 'High', 'Low', 'Volume']] = df[['Close', 'Diff', 'Open', 'High', 'Low', 'Volume']].astype(int) 
  df['Date'] = pd.to_datetime(df['Date']) 

  df = append_missing_trading_date(df)
  ret = df.sort_values(by=['Date'], ascending=True)
  return ret

def get_from_yahoo(ticker:str, page=None):
  ''' Return
    Input : Date,[ Open, High, Low, Close, Adj Close, Volume]
    Output: [Date,Open, High, Low, Close, Adj Close, Volume, Trade]
  '''
  yf.pdr_override()
  df_price = pdr.get_data_yahoo(ticker)
  df_price = df_price.reset_index()
  df_price = append_missing_trading_date(df_price)
  return df_price


In [7]:
get_from_yahoo(ticker='SPY', page=2)

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Trade
0,1993-01-29,43.968750,43.968750,43.750000,43.937500,25.627335,1003200,True
1,1993-01-29,43.968750,43.968750,43.750000,43.937500,25.627335,1003200,False
2,1993-01-29,43.968750,43.968750,43.750000,43.937500,25.627335,1003200,False
3,1993-02-01,43.968750,44.250000,43.968750,44.250000,25.809635,480500,True
4,1993-02-02,44.218750,44.375000,44.125000,44.343750,25.864292,201300,True
...,...,...,...,...,...,...,...,...
10607,2022-02-11,449.410004,451.609985,438.940002,440.459991,440.459991,153064100,False
10608,2022-02-14,439.920013,441.600006,435.339996,439.019989,439.019989,122062800,True
10609,2022-02-15,443.730011,446.279999,443.179993,446.100006,446.100006,88659500,True
10610,2022-02-16,443.929993,448.059998,441.940002,446.600006,446.600006,84863600,True


In [8]:
get_from_naver(ticker='261240', page=2)

Unnamed: 0,Date,Close,Diff,Open,High,Low,Volume,Trade
0,2022-02-07,10610,30,10605,10615,10585,72843,True
1,2022-02-08,10595,15,10585,10600,10575,185531,True
2,2022-02-09,10575,20,10590,10590,10565,188785,True
3,2022-02-10,10580,5,10560,10585,10555,102221,True
4,2022-02-11,10605,25,10610,10625,10595,177963,True
5,2022-02-11,10605,25,10610,10625,10595,177963,False
6,2022-02-11,10605,25,10610,10625,10595,177963,False
7,2022-02-14,10570,35,10605,10610,10570,83128,True
8,2022-02-15,10605,35,10580,10610,10575,48750,True
9,2022-02-16,10580,25,10575,10590,10570,42041,True
