## 주가 데이터 수집을 위한 pykrx 모듈

- 설치

In [1]:
!pip install pykrx

Collecting pykrx
  Downloading pykrx-1.0.45-py3-none-any.whl (2.2 MB)
Collecting datetime
  Downloading DateTime-5.2-py3-none-any.whl (52 kB)
Collecting deprecated
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: deprecated, datetime, pykrx
Successfully installed datetime-5.2 deprecated-1.2.14 pykrx-1.0.45


In [9]:
!pip install pytimekr # 공휴일 데이터 처리를 위한 패키지

ERROR: Invalid requirement: '#'


## Import Module

In [51]:
from pykrx import stock

import pandas as pd
import numpy as np

from pytimekr import pytimekr
import datetime
from datetime import timedelta

import time
import pickle

## Some Useful User-Defined Functions

In [17]:
# 날짜 index 생성을 위한 함수 정의
def get_str_date_range(start, end, freq = 'BM'): # BM : 주말이 아닌 평일 중 각 달의 가장 마지막 날
    dr = list(pd.date_range(start, end, freq = freq))
    for i in range(len(dr)):
        date = dr[i].date()
        # 거래소가 개장하지 않는 경우에는 그 전날, 혹은 그 전전날의 주가 데이터를 가져옴.
        # ex. 공휴일, 주말, 경우에 따라 12월 30일이나 31일
        while (date in pytimekr.holidays(year=date.year)) or (date.weekday() >= 5) or (date.month == 12 and date.day >= 30) : 
            date = date - timedelta(days = 1)
        dr[i] = date.strftime("%Y%m%d")
        
    return dr

        
        
                

                
    

- 특정 기간의 종목코드 및 종목명 데이터프레임 반환

In [3]:
def get_all_ticker_m(start, end):
    
    """ 특정 기간 동안 KOSPI 시장에 상장했던 모든 종목의 ticker 검색 """
    
    date_range = get_str_date_range(start, end)
    results = pd.DataFrame(columns = ['티커'])

    for date in date_range : 
        ticker = pd.DataFrame(stock.get_market_ticker_list(date), columns = ['티커'])
        results = pd.merge(results, ticker, how = 'outer', on = '티커')
        
    return results
    
def get_stock_info_m(start, end):
    
    """ 특정 기간 동안 KOSPI 시장에 상장했던 모든 종목 정보 검색 """
    
    tickers = get_all_ticker_m(start, end)
    names = pd.DataFrame([stock.get_market_ticker_name(ticker) for ticker in tickers['티커']], columns = ['종목명'])
    results = pd.concat([names, tickers], axis = 1)
    return results

## Stock Price Return Data Preparing & Preprocessing

In [4]:
# 시작일과 종료일을 인수로 받아 해당 기간의 장내 모든 종목의 주가 데이터프레임을 반환하는 함수
def get_price_m(start, end):
    
    """ 특정 기간 동안 KOSPI 시장 내의 모든 종목들의 주가 데이터(종가만) 검색 """
    
    results = pd.DataFrame()
    date_idx = get_str_date_range(start, end, freq='BM')
    print('\tStart to Load the Stock Price Data ...')
    for date in date_idx : 
        ks = stock.get_market_ohlcv(date).reset_index()   #장내 모든 종목의 OHLC 데이터
        ks['날짜'] = pd.to_datetime(date) 
        results = pd.concat([results, ks], axis = 0)
        print(f'\t\t{pd.to_datetime(date).date()} completed.')
        time.sleep(0.5) # KRX 정보 시스템에서의 차단 방지를 위해 time.sleep()으로 지연.
    
    return results[['티커', '종가', '날짜' ]].reset_index(drop = True)

def get_all_stock_m(start, end):
    
    """ 특정 기간 동안 KOSPI 시장 내의 모든 종목들에 대한 정보와 주가 데이터 함께 검색 """
    """ KRX 차단을 막기 위해 어쩔 수 없이 지연시키는 과정에서 실행에 오랜 시간이 걸릴 수 있음 """
    
    stock_info = get_stock_info_m(start, end)
    stock_price = get_price_m(start, end)
    
    return pd.merge(stock_info, stock_price, how = 'inner', on = '티커')
    

In [6]:
start = '20030101'
end = '20221231'

In [5]:
market = get_all_stock_m(start, end)
market

NameError: name 'start' is not defined

## BE, ME Factor Data Preparing & Preprocessing

In [7]:
#시작일과 종료일을 인자로 받아 해당 기간 동안의 PBR 데이터를 수집. 이를 통해 BE/ME 팩터 수익률 계산하여 데이터프레임으로 반환.
def get_BPR_m(start, end):
    results = pd.DataFrame()
    date_range = get_str_date_range(start, end)
    print('\tStart to Load the Book Equity Data ...')
    for date in date_range : 
        bpr = stock.get_market_fundamental(date).reset_index()[['티커', 'PBR']]   #장내 모든 종목의 PBR 데이터
        bpr = bpr.loc[bpr['PBR'] > 0]    # PBR을 역수를 취해줄 때 inf가 되는 것을 방지하기 위해 PBR이 0인 종목은 제외.
        bpr['날짜'] = pd.to_datetime(date) 
        bpr['BE/ME'] = 1/bpr['PBR']
        bpr = bpr[['티커', 'BE/ME', '날짜']]

        results = pd.concat([results, bpr], axis = 0)
        print(f'\t\t{pd.to_datetime(date).date()} completed.')
        time.sleep(0.5)
    
    return results

#시작일과 종료일을 인자로 받아 해당 기간 동안의 시가총액(ME) 데이터를 수집. 이를 통해 Size 팩터 수익률 계산하여 데이터프레임으로 반환.
def get_ME_m(start, end):
    results = pd.DataFrame()
    date_range = get_str_date_range(start, end)
    print('\tStart to Load the Market Equity Data ...')
    for date in date_range : 
        me = stock.get_market_cap(date).reset_index()[['티커', '시가총액']] # 장내 모든 종목의 시가총액 데이터
        me.columns = ['티커', 'ME']
        me['날짜'] = pd.to_datetime(date)
        results = pd.concat([results, me], axis = 0)
        print(f'\t\t{pd.to_datetime(date).date()} completed.')
        time.sleep(0.5)
        
    return results

#위의 두 함수를 통해 만들어진 데이터프레임을 inner join을 활용하여 하나의 데이터프레임으로 반환.
def get_fundamental_m(start, end):
    bpr = get_BPR_m(start, end)
    me = get_ME_m(start, end)
    
    fundamental = pd.merge(bpr, me, how = 'inner', on = ['티커', '날짜'])
    fundamental['BE'] = fundamental['BE/ME'] * fundamental['ME']
    
    fundamental = fundamental[['티커', 'BE', 'ME', 'BE/ME', '날짜']]
    
    return fundamental

In [95]:
#시작일과 종료일을 인자로 받아 해당 기간 동안의 PBR 데이터를 수집. 이를 통해 BE/ME 팩터 수익률 계산하여 데이터프레임으로 반환.
def get_value_factor(start, end):
    results = []
    date_range = get_str_date_range(start, end)
    print('\tStart to Load the Book Equity Data ...')
    for date in date_range : 
        start = time.time()
        values = stock.get_market_fundamental(date).reset_index()
        while values['PER'].sum() <= 0 : 
            date = str(int(date) - 1)
            values = stock.get_market_fundamental(date).reset_index()
        values['날짜'] = pd.to_datetime(date) 
        values = values[['티커', '날짜', 'BPS', 'PER', 'PBR']]
        results.append(values)
        time.sleep(0.5)
        print(f'\t\t{pd.to_datetime(date).date()} completed.', end = '')
        end = time.time()
        print(f'({end - start : .3f} sec)')
    
    
    return pd.concat(results)

def get_cap(start, end):
    results = []
    date_range = get_str_date_range(start, end)
    print('\tStart to Load the Market Equity Data ...')
    for date in date_range : 
        start = time.time()
        cap = stock.get_market_cap(date).reset_index()[['티커', '시가총액']] # 장내 모든 종목의 시가총액 데이터
        while cap['시가총액'].sum() <= 0 : 
            date = str(int(date) - 1)
            cap = stock.get_market_cap(date).reset_index()[['티커', '시가총액']] 
        cap['날짜'] = pd.to_datetime(date)
        results.append(cap)
        
        time.sleep(0.5)
        end = time.time()
        print(f'\t\t{pd.to_datetime(date).date()} completed.')
        print(f'({end - start : .3f} sec)')
        
    return pd.concat(results)

def get_fundamental(start, end) : 
    results = []
    value = get_value_factor(start, end)
    cap = get_cap(start, end)
    
    return pd.merge(value, cap, on = ['티커', '날짜'])




In [96]:
fundamental = get_fundamental(start, end)
fundamental

	Start to Load the Book Equity Data ...
		2003-01-30 completed.( 3.626 sec)
		2003-02-28 completed.( 2.349 sec)
		2003-03-31 completed.( 2.348 sec)
		2003-04-30 completed.( 2.285 sec)
		2003-05-30 completed.( 2.122 sec)
		2003-06-30 completed.( 1.920 sec)
		2003-07-31 completed.( 1.689 sec)
		2003-08-29 completed.( 1.470 sec)
		2003-09-30 completed.( 1.410 sec)
		2003-10-31 completed.( 1.673 sec)
		2003-11-28 completed.( 1.216 sec)
		2003-12-29 completed.( 1.296 sec)
		2004-01-30 completed.( 3.505 sec)
		2004-02-27 completed.( 2.220 sec)
		2004-03-31 completed.( 2.359 sec)
		2004-04-30 completed.( 2.163 sec)
		2004-05-31 completed.( 1.960 sec)
		2004-06-30 completed.( 1.407 sec)
		2004-07-30 completed.( 1.769 sec)
		2004-08-31 completed.( 1.441 sec)
		2004-09-30 completed.( 1.442 sec)
		2004-10-29 completed.( 1.534 sec)
		2004-11-30 completed.( 1.765 sec)
		2004-12-29 completed.( 1.257 sec)
		2005-01-31 completed.( 3.027 sec)
		2005-02-28 completed.( 2.605 sec)
		2005-03-31 completed.(

		2021-12-29 completed.( 1.507 sec)
		2022-01-28 completed.( 1.390 sec)
		2022-02-28 completed.( 1.279 sec)
		2022-03-31 completed.( 1.608 sec)
		2022-04-29 completed.( 1.523 sec)
		2022-05-31 completed.( 1.401 sec)
		2022-06-30 completed.( 1.232 sec)
		2022-07-29 completed.( 1.619 sec)
		2022-08-31 completed.( 1.594 sec)
		2022-09-30 completed.( 1.632 sec)
		2022-10-31 completed.( 1.529 sec)
		2022-11-30 completed.( 1.291 sec)
		2022-12-29 completed.( 1.316 sec)
	Start to Load the Market Equity Data ...
		2003-01-30 completed.
( 2.542 sec)
		2003-02-28 completed.
( 1.675 sec)
		2003-03-31 completed.
( 1.351 sec)
		2003-04-30 completed.
( 1.442 sec)
		2003-05-30 completed.
( 1.273 sec)
		2003-06-30 completed.
( 1.674 sec)
		2003-07-31 completed.
( 1.473 sec)
		2003-08-29 completed.
( 1.348 sec)
		2003-09-30 completed.
( 1.596 sec)
		2003-10-31 completed.
( 1.347 sec)
		2003-11-28 completed.
( 1.554 sec)
		2003-12-29 completed.
( 1.301 sec)
		2004-01-30 completed.
( 2.188 sec)
		2004-02

		2020-05-29 completed.
( 1.622 sec)
		2020-06-30 completed.
( 1.492 sec)
		2020-07-31 completed.
( 1.443 sec)
		2020-08-31 completed.
( 1.481 sec)
		2020-09-29 completed.
( 1.297 sec)
		2020-10-30 completed.
( 1.716 sec)
		2020-11-30 completed.
( 1.439 sec)
		2020-12-29 completed.
( 1.835 sec)
		2021-01-29 completed.
( 1.378 sec)
		2021-02-26 completed.
( 1.632 sec)
		2021-03-31 completed.
( 1.319 sec)
		2021-04-30 completed.
( 1.318 sec)
		2021-05-31 completed.
( 1.273 sec)
		2021-06-30 completed.
( 1.557 sec)
		2021-07-30 completed.
( 1.605 sec)
		2021-08-31 completed.
( 1.534 sec)
		2021-09-30 completed.
( 1.806 sec)
		2021-10-29 completed.
( 1.438 sec)
		2021-11-30 completed.
( 1.660 sec)
		2021-12-29 completed.
( 1.497 sec)
		2022-01-28 completed.
( 1.625 sec)
		2022-02-28 completed.
( 1.345 sec)
		2022-03-31 completed.
( 1.642 sec)
		2022-04-29 completed.
( 1.505 sec)
		2022-05-31 completed.
( 1.602 sec)
		2022-06-30 completed.
( 1.610 sec)
		2022-07-29 completed.
( 1.600 sec)
	

Unnamed: 0,티커,날짜,BPS,PER,PBR,시가총액
0,005760,2003-01-30,51.0,0.00,7.84,19269736800
1,015670,2003-01-30,312.0,0.00,1.86,18224951400
2,015675,2003-01-30,0.0,0.00,0.00,1668044900
3,004560,2003-01-30,4278.0,60.89,0.64,21320452380
4,004565,2003-01-30,0.0,0.00,0.00,417437600
...,...,...,...,...,...,...
499592,215050,2022-12-29,,,,1934959392
499593,288490,2022-12-29,,,,1831056000
499594,267060,2022-12-29,,,,1654666524
499595,267810,2022-12-29,,,,1572560000


In [100]:
fundamental.dropna(inplace = True)
fundamental

Unnamed: 0,티커,날짜,BPS,PER,PBR,시가총액
0,005760,2003-01-30,51.0,0.00,7.84,19269736800
1,015670,2003-01-30,312.0,0.00,1.86,18224951400
2,015675,2003-01-30,0.0,0.00,0.00,1668044900
3,004560,2003-01-30,4278.0,60.89,0.64,21320452380
4,004565,2003-01-30,0.0,0.00,0.00,417437600
...,...,...,...,...,...,...
207232,005010,2022-12-29,14549.0,5.65,0.35,284873540250
207233,000540,2022-12-29,10599.0,3.86,0.32,216497713650
207234,000547,2022-12-29,0.0,0.00,0.00,2488320000
207235,000545,2022-12-29,0.0,0.00,0.00,4723200000


In [103]:
stock_data_big.dropna(subset=['티커'], inplace = True)

In [190]:
df = pd.merge(stock_data_big, fundamental, how = 'left', on = ['티커', '날짜'])
df

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
0,060310,3S,2003-01-02,1037.0,1107.0,1007.0,1081.0,164270.0,7.135778,0.000000,,,,
1,060310,3S,2003-01-03,1095.0,1173.0,1095.0,1111.0,238965.0,2.775208,-2.180285,,,,
2,060310,3S,2003-01-06,1114.0,1132.0,1075.0,1119.0,144530.0,0.720072,-2.823614,,,,
3,060310,3S,2003-01-07,1138.0,1187.0,1095.0,1111.0,168800.0,-0.714924,-3.193958,,,,
4,060310,3S,2003-01-08,1101.0,1126.0,1074.0,1086.0,132570.0,-2.250225,-3.783407,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8477966,238490,힘스,2022-12-23,6050.0,6050.0,5950.0,6020.0,9421.0,-0.495868,-0.335925,,,,
8477967,238490,힘스,2022-12-26,5940.0,6070.0,5860.0,5860.0,24368.0,-2.657807,-2.491430,,,,
8477968,238490,힘스,2022-12-27,5940.0,5990.0,5860.0,5950.0,7276.0,1.535836,1.698196,,,,
8477969,238490,힘스,2022-12-28,5880.0,5970.0,5870.0,5940.0,6909.0,-0.168067,0.008601,,,,


In [191]:
drop_list = [ticker for ticker in df.티커.unique() if ticker not in fundamental.티커.unique()]
drop_list

['060310',
 '054620',
 '265520',
 '211270',
 '126600',
 '013720',
 '083790',
 '035760',
 '311690',
 '051500',
 '058820',
 '023460',
 '056730',
 '065770',
 '083660',
 '367340',
 '367360',
 '060900',
 '025440',
 '068790',
 '241520',
 '065150',
 '180400',
 '245620',
 '037370',
 '365550',
 '050120',
 '214270',
 '130500',
 '900290',
 '083450',
 '028150',
 '045890',
 '297890',
 '078150',
 '195940',
 '028300',
 '278650',
 '067630',
 '024850',
 '047920',
 '115450',
 '046210',
 '036640',
 '351340',
 '095340',
 '099520',
 '950170',
 '067290',
 '035900',
 '318000',
 '024840',
 '024120',
 '021320',
 '036670',
 '044180',
 '151860',
 '046440',
 '035600',
 '226360',
 '111870',
 '060720',
 '058400',
 '122450',
 '052900',
 '376190',
 '061970',
 '060370',
 '086960',
 '038340',
 '160550',
 '053290',
 '060250',
 '104200',
 '400760',
 '338100',
 '030190',
 '024940',
 '218410',
 '327260',
 '091340',
 '019550',
 '950110',
 '046140',
 '151910',
 '036120',
 '099220',
 '036540',
 '255220',
 '040610',
 '049470',

In [192]:
len(drop_list)

1595

In [193]:
df = df.loc[~df.티커.isin(drop_list)].reset_index(drop = True)
df

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
0,095570,AJ네트웍스,2015-08-21,6180.0,7300.0,5600.0,7100.0,1994022.0,,,,,,
1,095570,AJ네트웍스,2015-08-24,6700.0,6940.0,6240.0,6480.0,392831.0,-8.732394,0.000000,,,,
2,095570,AJ네트웍스,2015-08-25,6610.0,6730.0,6189.0,6220.0,242730.0,-4.012346,2.360024,,,,
3,095570,AJ네트웍스,2015-08-26,6260.0,7760.0,6260.0,7090.0,703721.0,13.987138,13.573006,,,,
4,095570,AJ네트웍스,2015-08-27,7239.0,8060.0,6960.0,7570.0,499913.0,6.770099,4.766974,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3785310,003280,흥아해운,2022-12-23,1505.0,1510.0,1450.0,1455.0,252745.0,-3.960396,-3.803694,,,,
3785311,003280,흥아해운,2022-12-26,1440.0,1445.0,1380.0,1410.0,226158.0,-3.092784,-2.906875,,,,
3785312,003280,흥아해운,2022-12-27,1410.0,1430.0,1375.0,1390.0,211506.0,-1.418440,-1.238906,,,,
3785313,003280,흥아해운,2022-12-28,1390.0,1410.0,1360.0,1385.0,146629.0,-0.359712,-0.171470,,,,


In [123]:
temp = df.loc[(df.티커 == '005930') & (df.날짜 <= '2003-12-31')]
temp

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
1781176,005930,삼성전자,2003-01-02,6380.0,6460.0,6300.0,6430.0,511660.0,2.388535,0.000000,,,,
1781177,005930,삼성전자,2003-01-03,6700.0,7000.0,6640.0,6880.0,1110675.0,6.998445,2.304955,,,,
1781178,005930,삼성전자,2003-01-06,6940.0,7030.0,6879.0,6990.0,499437.0,1.598837,-2.063102,,,,
1781179,005930,삼성전자,2003-01-07,7180.0,7180.0,6840.0,6850.0,577944.0,-2.002861,-4.248600,,,,
1781180,005930,삼성전자,2003-01-08,6920.0,6930.0,6770.0,6800.0,504232.0,-0.729927,-2.380533,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1781418,005930,삼성전자,2003-12-23,8880.0,8930.0,8840.0,8840.0,452067.0,0.000000,-0.167840,,,,
1781419,005930,삼성전자,2003-12-24,8900.0,8930.0,8710.0,8780.0,586749.0,-0.678733,-0.843104,,,,
1781420,005930,삼성전자,2003-12-26,8780.0,8840.0,8780.0,8830.0,439136.0,0.569476,0.403452,,,,
1781421,005930,삼성전자,2003-12-29,8800.0,8850.0,8760.0,8800.0,368876.0,-0.339751,-0.503719,158691.0,10.47,2.77,6.637541e+13


In [124]:
temp.loc[~temp.시가총액.isna()]

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
1781196,5930,삼성전자,2003-01-30,5860.0,5910.0,5790.0,5830.0,701387.0,0.0,0.312268,112952.0,16.69,2.58,44808970000000.0
1781216,5930,삼성전자,2003-02-28,5670.0,5740.0,5580.0,5590.0,409838.0,-1.410935,-1.165886,112952.0,16.01,2.47,42978130000000.0
1781237,5930,삼성전자,2003-03-31,5960.0,6000.0,5680.0,5680.0,928851.0,-7.491857,-7.371025,112952.0,16.26,2.51,43670090000000.0
1781259,5930,삼성전자,2003-04-30,6040.0,6130.0,5990.0,6100.0,640825.0,1.497504,1.491486,158691.0,7.26,1.92,46899210000000.0
1781278,5930,삼성전자,2003-05-30,6490.0,6510.0,6400.0,6480.0,463912.0,-0.154083,-0.221502,158691.0,7.71,2.04,48851790000000.0
1781298,5930,삼성전자,2003-06-30,7160.0,7210.0,7070.0,7100.0,312166.0,-1.66205,-1.797061,158691.0,8.45,2.24,53525880000000.0
1781320,5930,삼성전자,2003-07-31,8400.0,8490.0,8270.0,8320.0,412397.0,-1.304864,-1.531518,158691.0,9.9,2.62,62723290000000.0
1781340,5930,삼성전자,2003-08-29,8820.0,8840.0,8580.0,8700.0,531871.0,-0.684932,-0.913064,158691.0,10.36,2.74,65588050000000.0
1781359,5930,삼성전자,2003-09-30,7820.0,8000.0,7820.0,7840.0,841868.0,1.423027,1.271762,158691.0,9.33,2.47,59110570000000.0
1781381,5930,삼성전자,2003-10-31,9500.0,9510.0,9270.0,9400.0,491659.0,-0.212314,-0.437872,158691.0,11.19,2.96,70887670000000.0


In [194]:
for ticker in df.티커.unique() : 
    print(f'[{ticker}]')
    temp = df.loc[df.티커 == ticker]
    df.loc[(df.티커 == ticker), ['BPS', 'PER', 'PBR', '시가총액']] = temp.loc[:, ['BPS', 'PER', 'PBR', '시가총액']].ffill()
df
    

[095570]
[006840]
[027410]
[282330]
[138930]
[001460]
[001465]
[001040]
[079160]
[00104K]
[000120]
[011150]
[011155]
[001045]
[097950]
[097955]
[000480]
[000590]
[012030]
[016610]
[005830]
[000990]
[000995]
[139130]
[001530]
[000210]
[001880]
[000215]
[375500]
[37550K]
[007340]
[004840]
[155660]
[069730]
[017940]
[383220]
[007700]
[114090]
[078930]
[006360]
[001250]
[007070]
[078935]
[012630]
[039570]
[089470]
[294870]
[009540]
[267250]
[267270]
[322000]
[042670]
[267260]
[329180]
[097230]
[014790]
[003580]
[204320]
[060980]
[011200]
[082740]
[035000]
[003560]
[175330]
[234080]
[001060]
[001067]
[001065]
[096760]
[105560]
[002380]
[344820]
[009070]
[009440]
[119650]
[092220]
[003620]
[016380]
[016385]
[001390]
[033180]
[015590]
[001940]
[025000]
[092230]
[000040]
[044450]
[030200]
[033780]
[058850]
[058860]
[093050]
[003550]
[034220]
[051900]
[051905]
[003555]
[032640]
[011070]
[066570]
[066575]
[037560]
[051910]
[051915]
[079550]
[006260]
[010120]
[000680]
[229640]
[108320]
[001120]
[

[081660]
[005870]
[079980]
[005010]
[000540]
[000547]
[000545]
[003280]


Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
0,095570,AJ네트웍스,2015-08-21,6180.0,7300.0,5600.0,7100.0,1994022.0,,,,,,
1,095570,AJ네트웍스,2015-08-24,6700.0,6940.0,6240.0,6480.0,392831.0,-8.732394,0.000000,,,,
2,095570,AJ네트웍스,2015-08-25,6610.0,6730.0,6189.0,6220.0,242730.0,-4.012346,2.360024,,,,
3,095570,AJ네트웍스,2015-08-26,6260.0,7760.0,6260.0,7090.0,703721.0,13.987138,13.573006,,,,
4,095570,AJ네트웍스,2015-08-27,7239.0,8060.0,6960.0,7570.0,499913.0,6.770099,4.766974,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3785310,003280,흥아해운,2022-12-23,1505.0,1510.0,1450.0,1455.0,252745.0,-3.960396,-3.803694,428.0,13.77,3.80,3.906905e+11
3785311,003280,흥아해운,2022-12-26,1440.0,1445.0,1380.0,1410.0,226158.0,-3.092784,-2.906875,428.0,13.77,3.80,3.906905e+11
3785312,003280,흥아해운,2022-12-27,1410.0,1430.0,1375.0,1390.0,211506.0,-1.418440,-1.238906,428.0,13.77,3.80,3.906905e+11
3785313,003280,흥아해운,2022-12-28,1390.0,1410.0,1360.0,1385.0,146629.0,-0.359712,-0.171470,428.0,13.77,3.80,3.906905e+11


In [195]:
contain_list = df.loc[~df.시가총액.isna()]['티커'].unique()
contain_list

array(['095570', '006840', '027410', '282330', '138930', '001460',
       '001465', '001040', '079160', '00104K', '000120', '011150',
       '011155', '001045', '097950', '097955', '000480', '000590',
       '012030', '016610', '005830', '000990', '000995', '139130',
       '001530', '000210', '001880', '000215', '375500', '37550K',
       '007340', '004840', '155660', '069730', '017940', '383220',
       '007700', '114090', '078930', '006360', '001250', '007070',
       '078935', '012630', '039570', '089470', '294870', '009540',
       '267250', '267270', '322000', '042670', '267260', '329180',
       '097230', '014790', '003580', '204320', '060980', '011200',
       '082740', '035000', '003560', '175330', '234080', '001060',
       '001067', '001065', '096760', '105560', '002380', '344820',
       '009070', '009440', '119650', '092220', '003620', '016380',
       '016385', '001390', '033180', '015590', '001940', '025000',
       '092230', '000040', '044450', '030200', '033780', '0588

In [196]:
new_df = df.loc[df.티커.isin(contain_list)]
new_df

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
0,095570,AJ네트웍스,2015-08-21,6180.0,7300.0,5600.0,7100.0,1994022.0,,,,,,
1,095570,AJ네트웍스,2015-08-24,6700.0,6940.0,6240.0,6480.0,392831.0,-8.732394,0.000000,,,,
2,095570,AJ네트웍스,2015-08-25,6610.0,6730.0,6189.0,6220.0,242730.0,-4.012346,2.360024,,,,
3,095570,AJ네트웍스,2015-08-26,6260.0,7760.0,6260.0,7090.0,703721.0,13.987138,13.573006,,,,
4,095570,AJ네트웍스,2015-08-27,7239.0,8060.0,6960.0,7570.0,499913.0,6.770099,4.766974,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3785310,003280,흥아해운,2022-12-23,1505.0,1510.0,1450.0,1455.0,252745.0,-3.960396,-3.803694,428.0,13.77,3.80,3.906905e+11
3785311,003280,흥아해운,2022-12-26,1440.0,1445.0,1380.0,1410.0,226158.0,-3.092784,-2.906875,428.0,13.77,3.80,3.906905e+11
3785312,003280,흥아해운,2022-12-27,1410.0,1430.0,1375.0,1390.0,211506.0,-1.418440,-1.238906,428.0,13.77,3.80,3.906905e+11
3785313,003280,흥아해운,2022-12-28,1390.0,1410.0,1360.0,1385.0,146629.0,-0.359712,-0.171470,428.0,13.77,3.80,3.906905e+11


In [197]:
new_df.to_pickle('C:/Users/kim56/anaconda3/Find-A/quant/Data/fundamental_data.p')
data = pd.read_pickle('C:/Users/kim56/anaconda3/Find-A/quant/Data/fundamental_data.p')
data

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
0,095570,AJ네트웍스,2015-08-21,6180.0,7300.0,5600.0,7100.0,1994022.0,,,,,,
1,095570,AJ네트웍스,2015-08-24,6700.0,6940.0,6240.0,6480.0,392831.0,-8.732394,0.000000,,,,
2,095570,AJ네트웍스,2015-08-25,6610.0,6730.0,6189.0,6220.0,242730.0,-4.012346,2.360024,,,,
3,095570,AJ네트웍스,2015-08-26,6260.0,7760.0,6260.0,7090.0,703721.0,13.987138,13.573006,,,,
4,095570,AJ네트웍스,2015-08-27,7239.0,8060.0,6960.0,7570.0,499913.0,6.770099,4.766974,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3785310,003280,흥아해운,2022-12-23,1505.0,1510.0,1450.0,1455.0,252745.0,-3.960396,-3.803694,428.0,13.77,3.80,3.906905e+11
3785311,003280,흥아해운,2022-12-26,1440.0,1445.0,1380.0,1410.0,226158.0,-3.092784,-2.906875,428.0,13.77,3.80,3.906905e+11
3785312,003280,흥아해운,2022-12-27,1410.0,1430.0,1375.0,1390.0,211506.0,-1.418440,-1.238906,428.0,13.77,3.80,3.906905e+11
3785313,003280,흥아해운,2022-12-28,1390.0,1410.0,1360.0,1385.0,146629.0,-0.359712,-0.171470,428.0,13.77,3.80,3.906905e+11


In [180]:
df.loc[(df.티커 == '003280') & ~(df.시가총액.isna())]

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
3780388,003280,흥아해운,2003-01-30,2903.0,2981.0,2903.0,3024.0,730.0,2.997275,2.549912,730.0,1.17,9.18,5.628000e+09
3780389,003280,흥아해운,2003-02-03,3204.0,3204.0,2957.0,3067.0,3343.0,1.421958,0.930294,730.0,1.17,9.18,5.628000e+09
3780390,003280,흥아해운,2003-02-04,2911.0,3109.0,2911.0,3111.0,3211.0,1.434627,0.901965,730.0,1.17,9.18,5.628000e+09
3780391,003280,흥아해운,2003-02-05,3181.0,3567.0,2958.0,3573.0,55996.0,14.850530,13.721291,730.0,1.17,9.18,5.628000e+09
3780392,003280,흥아해운,2003-02-06,3613.0,3613.0,3072.0,3078.0,44151.0,-13.853904,-14.383818,730.0,1.17,9.18,5.628000e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3780610,003280,흥아해운,2003-12-23,5735.0,5917.0,5675.0,5695.0,2923.0,-2.997786,-3.374053,17229.0,0.70,0.67,9.702000e+09
3780611,003280,흥아해운,2003-12-24,5618.0,5618.0,5346.0,5420.0,5288.0,-4.828797,-5.183732,17229.0,0.70,0.67,9.702000e+09
3780612,003280,흥아해운,2003-12-26,5218.0,5664.0,5218.0,5552.0,1174.0,2.435424,2.071998,17229.0,0.70,0.67,9.702000e+09
3780613,003280,흥아해운,2003-12-29,5394.0,5562.0,5394.0,5552.0,5713.0,0.000000,-0.361949,17229.0,0.75,0.71,1.033200e+10


In [58]:
fundamental.to_pickle('C:/Users/kim56/anaconda3/Find-A/quant/Data/fundamental.p')

In [60]:
data = pd.read_pickle('C:/Users/kim56/anaconda3/Find-A/quant/Data/fundamental.p')
data

Unnamed: 0,티커,날짜,BPS,PER,PBR,시가총액
0,005760,2003-01-30,51,0.00,7.84,19269736800
1,015670,2003-01-30,312,0.00,1.86,18224951400
2,015675,2003-01-30,0,0.00,0.00,1668044900
3,004560,2003-01-30,4278,60.89,0.64,21320452380
4,004565,2003-01-30,0,0.00,0.00,417437600
...,...,...,...,...,...,...
207232,005010,2022-12-29,14549,5.65,0.35,284873540250
207233,000540,2022-12-29,10599,3.86,0.32,216497713650
207234,000547,2022-12-29,0,0.00,0.00,2488320000
207235,000545,2022-12-29,0,0.00,0.00,4723200000


In [61]:
stock_data = pd.read_csv('C:/Users/kim56/anaconda3/Find-A/quant/Data/kospi200_data.csv', index_col = 0, parse_dates=['날짜'], dtype = {'티커' : 'str'})
stock_data

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM
0,282330,BGF리테일,2017-12-08,149500.0,194000.0,149500.0,194000.0,68343.0,,
1,282330,BGF리테일,2017-12-11,222000.0,249000.0,214000.0,248500.0,684631.0,28.092784,0.000000
2,282330,BGF리테일,2017-12-12,242500.0,246500.0,222500.0,235000.0,286513.0,-5.432596,-16.762690
3,282330,BGF리테일,2017-12-13,235000.0,242500.0,228000.0,235000.0,94609.0,0.000000,-7.553396
4,282330,BGF리테일,2017-12-14,236000.0,236500.0,221500.0,234000.0,118532.0,-0.425532,-5.984196
...,...,...,...,...,...,...,...,...,...,...
744498,145020,휴젤,2022-12-23,138800.0,140000.0,136600.0,138300.0,41991.0,-1.425517,-1.427121
744499,145020,휴젤,2022-12-26,138200.0,142400.0,138000.0,140500.0,45049.0,1.590745,1.589598
744500,145020,휴젤,2022-12-27,141500.0,142500.0,139900.0,141900.0,42388.0,0.996441,0.990572
744501,145020,휴젤,2022-12-28,141200.0,141800.0,138300.0,138500.0,47687.0,-2.396054,-2.401136


In [86]:
df= pd.merge(stock_data, fundamental, how = 'left', on = ['티커', '날짜']).drop_duplicates()
df

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
0,282330,BGF리테일,2017-12-08,149500.0,194000.0,149500.0,194000.0,68343.0,,,,,,
1,282330,BGF리테일,2017-12-11,222000.0,249000.0,214000.0,248500.0,684631.0,28.092784,0.000000,,,,
2,282330,BGF리테일,2017-12-12,242500.0,246500.0,222500.0,235000.0,286513.0,-5.432596,-16.762690,,,,
3,282330,BGF리테일,2017-12-13,235000.0,242500.0,228000.0,235000.0,94609.0,0.000000,-7.553396,,,,
4,282330,BGF리테일,2017-12-14,236000.0,236500.0,221500.0,234000.0,118532.0,-0.425532,-5.984196,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
744498,145020,휴젤,2022-12-23,138800.0,140000.0,136600.0,138300.0,41991.0,-1.425517,-1.427121,,,,
744499,145020,휴젤,2022-12-26,138200.0,142400.0,138000.0,140500.0,45049.0,1.590745,1.589598,,,,
744500,145020,휴젤,2022-12-27,141500.0,142500.0,139900.0,141900.0,42388.0,0.996441,0.990572,,,,
744501,145020,휴젤,2022-12-28,141200.0,141800.0,138300.0,138500.0,47687.0,-2.396054,-2.401136,,,,


In [88]:
drop_list = []
for ticker in df.티커.unique() : 
    if ticker not in fundamental.티커.unique() : 
        drop_list.append(ticker)

In [89]:
df.loc[~df.티커.isin(drop_list)]

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM,BPS,PER,PBR,시가총액
0,282330,BGF리테일,2017-12-08,149500.0,194000.0,149500.0,194000.0,68343.0,,,,,,
1,282330,BGF리테일,2017-12-11,222000.0,249000.0,214000.0,248500.0,684631.0,28.092784,0.000000,,,,
2,282330,BGF리테일,2017-12-12,242500.0,246500.0,222500.0,235000.0,286513.0,-5.432596,-16.762690,,,,
3,282330,BGF리테일,2017-12-13,235000.0,242500.0,228000.0,235000.0,94609.0,0.000000,-7.553396,,,,
4,282330,BGF리테일,2017-12-14,236000.0,236500.0,221500.0,234000.0,118532.0,-0.425532,-5.984196,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
742766,081660,휠라홀딩스,2022-12-23,34100.0,35000.0,33900.0,34450.0,360537.0,1.472754,1.464576,,,,
742767,081660,휠라홀딩스,2022-12-26,34450.0,34600.0,34000.0,34500.0,138323.0,0.145138,0.132558,,,,
742768,081660,휠라홀딩스,2022-12-27,34500.0,35000.0,34300.0,34950.0,316271.0,1.304348,1.273898,,,,
742769,081660,휠라홀딩스,2022-12-28,34050.0,34250.0,33500.0,33550.0,180426.0,-4.005722,-4.021987,,,,


In [90]:
stock.get_market_cap('20221228', '20221229', '145020')

Unnamed: 0_level_0,시가총액,거래량,거래대금,상장주식수
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-12-28,1715385517500,47687,6659683700,12385455
2022-12-29,1670797879500,49630,6670294300,12385455


In [91]:
fundamental.loc[fundamental.티커 == '145020']

Unnamed: 0,티커,날짜,BPS,PER,PBR,시가총액


In [65]:
stock_data_big = pd.read_csv('C:/Users/kim56/anaconda3/Find-A/quant/Data/mom_data.csv', index_col = 0, parse_dates=['날짜'], dtype = {'티커' : 'str'})
stock_data_big

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM
0,060310,3S,2003-01-02,1037.0,1107.0,1007.0,1081.0,164270.0,7.135778,0.000000
1,060310,3S,2003-01-03,1095.0,1173.0,1095.0,1111.0,238965.0,2.775208,-2.180285
2,060310,3S,2003-01-06,1114.0,1132.0,1075.0,1119.0,144530.0,0.720072,-2.823614
3,060310,3S,2003-01-07,1138.0,1187.0,1095.0,1111.0,168800.0,-0.714924,-3.193958
4,060310,3S,2003-01-08,1101.0,1126.0,1074.0,1086.0,132570.0,-2.250225,-3.783407
...,...,...,...,...,...,...,...,...,...,...
4942,238490,힘스,2022-12-23,6050.0,6050.0,5950.0,6020.0,9421.0,-0.495868,-0.335925
4943,238490,힘스,2022-12-26,5940.0,6070.0,5860.0,5860.0,24368.0,-2.657807,-2.491430
4944,238490,힘스,2022-12-27,5940.0,5990.0,5860.0,5950.0,7276.0,1.535836,1.698196
4945,238490,힘스,2022-12-28,5880.0,5970.0,5870.0,5940.0,6909.0,-0.168067,0.008601


In [67]:
stock_data_big.to_pickle('C:/Users/kim56/anaconda3/Find-A/quant/Data/mom_data.p')
data = pd.read_pickle('C:/Users/kim56/anaconda3/Find-A/quant/Data/mom_data.p')
data

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM
0,060310,3S,2003-01-02,1037.0,1107.0,1007.0,1081.0,164270.0,7.135778,0.000000
1,060310,3S,2003-01-03,1095.0,1173.0,1095.0,1111.0,238965.0,2.775208,-2.180285
2,060310,3S,2003-01-06,1114.0,1132.0,1075.0,1119.0,144530.0,0.720072,-2.823614
3,060310,3S,2003-01-07,1138.0,1187.0,1095.0,1111.0,168800.0,-0.714924,-3.193958
4,060310,3S,2003-01-08,1101.0,1126.0,1074.0,1086.0,132570.0,-2.250225,-3.783407
...,...,...,...,...,...,...,...,...,...,...
4942,238490,힘스,2022-12-23,6050.0,6050.0,5950.0,6020.0,9421.0,-0.495868,-0.335925
4943,238490,힘스,2022-12-26,5940.0,6070.0,5860.0,5860.0,24368.0,-2.657807,-2.491430
4944,238490,힘스,2022-12-27,5940.0,5990.0,5860.0,5950.0,7276.0,1.535836,1.698196
4945,238490,힘스,2022-12-28,5880.0,5970.0,5870.0,5940.0,6909.0,-0.168067,0.008601


In [182]:
stock.get_market_fundamental('20210101', '20210301', '00380')

In [70]:
stock_data.to_pickle('C:/Users/kim56/anaconda3/Find-A/quant/Data/kospi200_data.p')
data = pd.read_pickle('C:/Users/kim56/anaconda3/Find-A/quant/Data/kospi200_data.p')
data

Unnamed: 0,티커,종목명,날짜,시가,고가,저가,종가,거래량,등락률,ABS_MOM
0,282330,BGF리테일,2017-12-08,149500.0,194000.0,149500.0,194000.0,68343.0,,
1,282330,BGF리테일,2017-12-11,222000.0,249000.0,214000.0,248500.0,684631.0,28.092784,0.000000
2,282330,BGF리테일,2017-12-12,242500.0,246500.0,222500.0,235000.0,286513.0,-5.432596,-16.762690
3,282330,BGF리테일,2017-12-13,235000.0,242500.0,228000.0,235000.0,94609.0,0.000000,-7.553396
4,282330,BGF리테일,2017-12-14,236000.0,236500.0,221500.0,234000.0,118532.0,-0.425532,-5.984196
...,...,...,...,...,...,...,...,...,...,...
744498,145020,휴젤,2022-12-23,138800.0,140000.0,136600.0,138300.0,41991.0,-1.425517,-1.427121
744499,145020,휴젤,2022-12-26,138200.0,142400.0,138000.0,140500.0,45049.0,1.590745,1.589598
744500,145020,휴젤,2022-12-27,141500.0,142500.0,139900.0,141900.0,42388.0,0.996441,0.990572
744501,145020,휴젤,2022-12-28,141200.0,141800.0,138300.0,138500.0,47687.0,-2.396054,-2.401136


In [37]:
cap = get_cap('20061201', '20070201')
cap

	Start to Load the Market Equity Data ...
		2006-12-28 completed.
		2007-01-31 completed.


Unnamed: 0,티커,시가총액,날짜
0,005930,90294493581000,2006-12-28
1,015760,27202470988800,2006-12-28
2,005490,26940732015000,2006-12-28
3,060000,25194795788400,2006-12-28
4,055550,18124461665000,2006-12-28
...,...,...,...
1865,007595,49625000,2007-01-31
1866,002365,43664400,2007-01-31
1867,012805,34240000,2007-01-31
1868,001385,24651000,2007-01-31


Unnamed: 0,티커,날짜,BPS,PER,PBR,시가총액
0,005760,2006-12-28,5624,7.99,1.04,35740643960
1,015670,2006-12-28,502,0.00,0.98,16627469880
2,015675,2006-12-28,0,0.00,0.00,145098195
3,004560,2006-12-28,13854,0.00,0.52,108567439200
4,004565,2006-12-28,0,0.00,0.00,618466760
...,...,...,...,...,...,...
1661,069260,2007-01-31,7484,22.19,1.15,183740743160
1662,000540,2007-01-31,5676,0.00,0.89,153340420190
1663,000545,2007-01-31,0,0.00,0.00,2810880000
1664,000547,2007-01-31,0,0.00,0.00,1121280000


In [92]:
test = get_cap('20221101', '20230101')
test

	Start to Load the Market Equity Data ...
		2022-11-30 completed.
( 1.703 sec)
		2022-12-29 completed.
( 1.532 sec)


Unnamed: 0,티커,시가총액,날짜
0,005930,371320474610000,2022-11-30
1,373220,137358000000000,2022-11-30
2,207940,63131338000000,2022-11-30
3,000660,61880201025000,2022-11-30
4,051910,52238333820000,2022-11-30
...,...,...,...
2685,215050,1934959392,2022-12-29
2686,288490,1831056000,2022-12-29
2687,267060,1654666524,2022-12-29
2688,267810,1572560000,2022-12-29


In [94]:
test.loc[test.티커 == '145020']

Unnamed: 0,티커,시가총액,날짜
191,145020,1415657506500,2022-11-30
161,145020,1670797879500,2022-12-29


In [11]:
get_fundamental_m('20210101', '20210301') 

	Start to Load the Book Equity Data ...
		2021-01-29 completed.
		2021-02-26 completed.
	Start to Load the Market Equity Data ...
		2021-01-29 completed.
		2021-02-26 completed.


Unnamed: 0,티커,BE,ME,BE/ME,날짜
0,095570,3.182329e+11,187757402950,1.694915,2021-01-29
1,006840,8.337324e+11,383516890950,2.173913,2021-01-29
2,027410,1.498838e+12,659488689990,2.272727,2021-01-29
3,282330,6.225136e+11,2938264020000,0.211864,2021-01-29
4,138930,8.163196e+12,1795903205460,4.545455,2021-01-29
...,...,...,...,...,...
1542,079980,3.926197e+11,278760000000,1.408451,2021-02-26
1543,005010,3.976717e+11,67604195160,5.882353,2021-02-26
1544,069260,6.546291e+11,936119665200,0.699301,2021-02-26
1545,000540,6.962297e+11,278491866075,2.500000,2021-02-26


## Load Stock Price & Factor Data At Once

In [8]:
def get_stock_market_data(start, end):
    
    """ 특정 기간 동안의 KOSPI 시장 내의 모든 종목별 주가 데이터와 재무 데이터를 검색 """
    
    stock = get_all_stock_m(start, end)
    fundamental = get_fundamental_m(start, end)
    results =  pd.merge(stock, fundamental, on = ['티커', '날짜'])[['티커', '종목명', '날짜', '종가', 'BE', 'ME', 'BE/ME']]
    
    return results
    

In [104]:
stock_data = get_stock_market_data(start, end)
stock_data

Unnamed: 0,티커,종목명,날짜,종가,BE,ME,BE/ME
0,095570,AJ네트웍스,2021-08-31,5910,2.852781e+11,276719763450,1.030928
1,095570,AJ네트웍스,2021-09-30,5840,2.848356e+11,273442202800,1.041667
2,095570,AJ네트웍스,2021-10-29,5590,2.844963e+11,261736629050,1.086957
3,095570,AJ네트웍스,2021-11-30,5350,2.846583e+11,250499278250,1.136364
4,095570,AJ네트웍스,2021-12-29,5460,2.840553e+11,255649730700,1.111111
...,...,...,...,...,...,...,...
18894,003280,흥아해운,2023-03-31,1379,1.029646e+11,331545935721,0.310559
18895,003280,흥아해운,2023-04-28,1306,1.029492e+11,313994918094,0.327869
18896,003280,흥아해운,2023-05-31,1352,1.305440e+11,325054463448,0.401606
18897,003280,흥아해운,2023-06-30,2100,1.308011e+11,504892287900,0.259067


In [106]:
stock_data.to_csv('./ff3_factor_data.csv') # csv 파일로 변환하여 로컬 저장소에 저장.

## 10년치 데이터 불러오기

- pykrx가 사설 라이브러리이기 때문에 과도하게 데이터를 불러올 시 krx 정보시스템에서 사용자를 차단하는 경우가 발생. 
- 이를 방지하기 위해 중간에 time.sleep(5)를 이용하여 5초 간격으로 1년치 데이터들을 불러올 예정임.
- 위에서 2년치 데이터의 경우 한번에 불러와도 차단하지 않은 것을 확인함. 

In [7]:
# 특정기간 동안의 년도별 start와 end를 구하는 함수

def get_start_end_date(start_year : int, end_year : int):
    
    length = end_year - start_year + 1
    
    results = {}
    
    for year in range(start_year, end_year):
        start_date = str(year)+'0101'
        end_date = str(year) + '1231'
        results[year] = [start_date, end_date]
    return results 

In [8]:
# 특정 기간 동안 년도별로 get_stock_market_data() 함수를 실행하여 제공하는 함수
def get_all_stock_market_data(start_year: int, end_year : int):
    
    results = pd.DataFrame() #pd.DataFrame(columns = ['티커', '종목명', '날짜', '종가', 'BE', 'ME', 'BE/ME'])
    
    date_index_dict = get_start_end_date(start_year, end_year)
    
    for year, values in date_index_dict.items() : 
        print('-'*50)
        print(f'* {year} year starts...')
        print('-'*50)
        start, end = values[0], values[1]
        stock_data = get_stock_market_data(start, end)
        
        results = pd.concat([results, stock_data], axis = 0)
        print('-'*50)
        if year == end_year - 1:
            print(f'* {year} year ended.')
        else : 
            print(f'* {year} year ended. Next is {year + 1} ...')
        time.sleep(5)
        
    return results
        

In [9]:
start_year = 2003
end_year = 2023
stock_datas = get_all_stock_market_data(start_year, end_year)
stock_datas

--------------------------------------------------
* 2003 year starts...
--------------------------------------------------
	Start to Load the Stock Price Data ...
		2003-01-30 completed.
		2003-02-28 completed.
		2003-03-31 completed.
		2003-04-30 completed.
		2003-05-30 completed.
		2003-06-30 completed.
		2003-07-31 completed.
		2003-08-29 completed.
		2003-09-30 completed.
		2003-10-31 completed.
		2003-11-28 completed.
		2003-12-29 completed.
	Start to Load the Book Equity Data ...
		2003-01-30 completed.
		2003-02-28 completed.
		2003-03-31 completed.
		2003-04-30 completed.
		2003-05-30 completed.
		2003-06-30 completed.
		2003-07-31 completed.
		2003-08-29 completed.
		2003-09-30 completed.
		2003-10-31 completed.
		2003-11-28 completed.
		2003-12-29 completed.
	Start to Load the Market Equity Data ...
		2003-01-30 completed.
		2003-02-28 completed.
		2003-03-31 completed.
		2003-04-30 completed.
		2003-05-30 completed.
		2003-06-30 completed.
		2003-07-31 completed.
		2003-08-

		2009-09-30 completed.
		2009-10-30 completed.
		2009-11-30 completed.
		2009-12-29 completed.
--------------------------------------------------
* 2009 year ended. Next is 2010 ...
--------------------------------------------------
* 2010 year starts...
--------------------------------------------------
	Start to Load the Stock Price Data ...
		2010-01-29 completed.
		2010-02-26 completed.
		2010-03-31 completed.
		2010-04-30 completed.
		2010-05-31 completed.
		2010-06-30 completed.
		2010-07-30 completed.
		2010-08-31 completed.
		2010-09-30 completed.
		2010-10-29 completed.
		2010-11-30 completed.
		2010-12-29 completed.
	Start to Load the Book Equity Data ...
		2010-01-29 completed.
		2010-02-26 completed.
		2010-03-31 completed.
		2010-04-30 completed.
		2010-05-31 completed.
		2010-06-30 completed.
		2010-07-30 completed.
		2010-08-31 completed.
		2010-09-30 completed.
		2010-10-29 completed.
		2010-11-30 completed.
		2010-12-29 completed.
	Start to Load the Market Equity Data

		2016-02-29 completed.
		2016-03-31 completed.
		2016-04-29 completed.
		2016-05-31 completed.
		2016-06-30 completed.
		2016-07-29 completed.
		2016-08-31 completed.
		2016-09-30 completed.
		2016-10-31 completed.
		2016-11-30 completed.
		2016-12-29 completed.
--------------------------------------------------
* 2016 year ended. Next is 2017 ...
--------------------------------------------------
* 2017 year starts...
--------------------------------------------------
	Start to Load the Stock Price Data ...
		2017-01-31 completed.
		2017-02-28 completed.
		2017-03-31 completed.
		2017-04-28 completed.
		2017-05-31 completed.
		2017-06-30 completed.
		2017-07-31 completed.
		2017-08-31 completed.
		2017-09-29 completed.
		2017-10-31 completed.
		2017-11-30 completed.
		2017-12-29 completed.
	Start to Load the Book Equity Data ...
		2017-01-31 completed.
		2017-02-28 completed.
		2017-03-31 completed.
		2017-04-28 completed.
		2017-05-31 completed.
		2017-06-30 completed.
		2017-07-31 

Unnamed: 0,티커,종목명,날짜,종가,BE,ME,BE/ME
0,005760,위너스인프라인,2003-01-30,400,2.457875e+09,19269736800,0.127551
1,005760,위너스인프라인,2003-02-28,430,2.327524e+10,20714967060,1.123596
2,005760,위너스인프라인,2003-03-31,400,2.349968e+10,19269736800,1.219512
3,005760,위너스인프라인,2003-04-30,405,2.322691e+10,19510608510,1.190476
4,005760,위너스인프라인,2003-05-30,415,2.324692e+10,19992351930,1.162791
...,...,...,...,...,...,...,...
9473,403550,쏘카,2022-11-30,19150,1.716976e+11,626696235800,0.273973
9474,403550,쏘카,2022-12-29,21750,1.715140e+11,711782931000,0.240964
9475,108320,LX세미콘,2022-11-30,87700,8.013366e+11,1426379110000,0.561798
9476,108320,LX세미콘,2022-12-29,71700,7.987331e+11,1166150310000,0.684932


In [10]:
stock_datas = stock_datas.sort_values(by = ['종목명', '날짜'], ignore_index= True)
stock_datas

Unnamed: 0,티커,종목명,날짜,종가,BE,ME,BE/ME
0,095570,AJ네트웍스,2015-08-31,33500,2.224889e+11,313709376500,0.709220
1,095570,AJ네트웍스,2015-09-30,43850,2.219630e+11,410631527150,0.540541
2,095570,AJ네트웍스,2015-10-30,47400,2.219377e+11,443875356600,0.500000
3,095570,AJ네트웍스,2015-11-30,42100,2.214852e+11,394243723900,0.561798
4,095570,AJ네트웍스,2015-12-29,46550,2.224059e+11,435915566450,0.510204
...,...,...,...,...,...,...,...
168901,003280,흥아해운,2022-08-31,1705,1.029961e+11,409924452795,0.251256
168902,003280,흥아해운,2022-09-30,1230,1.030392e+11,295722625770,0.348432
168903,003280,흥아해운,2022-10-31,1420,1.028323e+11,341403356580,0.301205
168904,003280,흥아해운,2022-11-30,1625,1.028133e+11,390690460875,0.263158


In [11]:
stock_datas.to_csv('./ff3_factor_datas.csv') # csv 파일로 변환하여 로컬 저장소에 저장.

In [12]:
ss_adjx = stock.get_market_ohlcv('20180101', '20190101', '005930', adjusted = False)
ss_adjo = stock.get_market_ohlcv('20180101', '20190101', '005930')

In [5]:
ss_fd = stock.get_market_fundamental('20180101', '20190101', '005930')

In [6]:
ss_fd # 여기서 pbr = 수정x주가 / 수정xBPS 이다. 따라서 모든 행에 수정주가 / 수정x주가 를 곱해주면 표준화된 pbr을 얻는다. 

Unnamed: 0_level_0,BPS,PER,PBR,EPS,DIV,DPS
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,1156530,16.15,2.21,157967,1.12,28500
2018-01-03,1156530,16.34,2.23,157967,1.10,28500
2018-01-04,1156530,16.17,2.21,157967,1.12,28500
2018-01-05,1156530,16.50,2.25,157967,1.09,28500
2018-01-08,1156530,16.47,2.25,157967,1.10,28500
...,...,...,...,...,...,...
2018-12-21,28126,6.44,1.37,5997,2.20,850
2018-12-24,28126,6.47,1.38,5997,2.19,850
2018-12-26,28126,6.39,1.36,5997,2.22,850
2018-12-27,28126,6.38,1.36,5997,2.22,850


In [35]:
#네이버금융 PBR 산정 기준 : 보통주수정주가 / BPS
std_coef = ss_adjo['종가'] / ss_adjx['종가']

adj_pbr = ss_adjo['종가'] / (ss_fd['BPS'] * std_coef* 50)
