## Preliminary Settings

In [1]:
import sys
print(sys.version)

3.11.7 | packaged by Anaconda, Inc. | (main, Dec 15 2023, 18:05:47) [MSC v.1916 64 bit (AMD64)]


In [2]:
import numpy as np
import pandas as pd

from pykrx import stock
from pykrx import bond
import yfinance as yf
import FinanceDataReader as fdr

import time
from datetime import datetime
from tqdm.auto import tqdm

In [3]:
today = datetime.today().strftime('%Y-%m-%d')
print(today)

2025-01-25


## US Stock Market Data

In [4]:
# Get the list of tickers.

def get_tickers(save=True, path='./data/tickers/'):

    print('Downloading list of tickers of firms listed in NYSE and NASDAQ.')
    tickers_nyse = fdr.StockListing('NYSE')
    tickers_nasdaq = fdr.StockListing('NASDAQ')
    
    tickers_nyse.drop(['IndustryCode', 'Industry'], axis=1, inplace=True)
    tickers_nasdaq.drop(['IndustryCode', 'Industry'], axis=1, inplace=True)
    
    tickers_nyse['Market'] = 'NYSE'
    tickers_nasdaq['Market'] = 'NASDAQ'
    
    tickers = pd.concat([tickers_nyse, tickers_nasdaq], axis=0).reset_index(drop=True)

    if save:
        today = datetime.today().strftime('%Y-%m-%d')
        tickers.to_csv(path+f'ticker_list_{today}.csv', index=False)
        print('Successfully saved ticker list in the local directory.')
        
    return tickers

In [5]:
tickers = get_tickers()

Downloading list of tickers of firms listed in NYSE and NASDAQ.


100%|█████████████████████████████████████████████████████████████████████████████| 2745/2745 [00:04<00:00, 618.30it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 3652/3652 [00:06<00:00, 562.44it/s]


Successfully saved ticker list in the local directory.


In [None]:
# Get the market data for collected tickers.

print('Downloading market data. ')
tickers_list = list(tickers.Symbol)
market_data = yf.download(tickers_list)

In [None]:
market_data[['Adj Close']]

In [13]:
len(tickers_list)

6397

## Korean Exchange Market Data

In [45]:
START_DATE = '1990-12-01'
END_DATE = today

DATE_RANGE = pd.date_range(START_DATE, END_DATE, freq='d')
market = 'KOSPI'

In [98]:
def collect_korea_market_data(date_range, market='KOSPI'):
    
    init = True
    
    for date in tqdm(date_range):
        try:
            ohlcv = stock.get_market_ohlcv_by_ticker(date, market=market)
    
            if ohlcv['종가'].sum() == 0:
                raise KeyError('Not a trading day.')
    
            if init:
                o = pd.DataFrame(ohlcv['시가'].rename(date))
                h = pd.DataFrame(ohlcv['고가'].rename(date))
                l = pd.DataFrame(ohlcv['저가'].rename(date))
                c = pd.DataFrame(ohlcv['종가'].rename(date))
                v = pd.DataFrame(ohlcv['거래량'].rename(date))
                w = pd.DataFrame(ohlcv['거래대금'].rename(date))
                print(f'Collection initialized: {date.strftime("%Y-%m-%d")}.')
                init = False
                continue
                
            # Update the stock market data as dataframe respectively.
            o = pd.concat([o, ohlcv['시가'].rename(date)], axis=1)
            h = pd.concat([h, ohlcv['고가'].rename(date)], axis=1)
            l = pd.concat([l, ohlcv['저가'].rename(date)], axis=1)
            c = pd.concat([c, ohlcv['종가'].rename(date)], axis=1)
            v = pd.concat([v, ohlcv['거래량'].rename(date)], axis=1)
            w = pd.concat([w, ohlcv['거래대금'].rename(date)], axis=1)

            time.sleep(1)
            
        except KeyError as e:
            with open(f'./data/{market.lower()}-market/error_log.txt', 'a', encoding='utf-8') as f:
                # Record a date that prints error.
                f.write(f'Download Error: {date} with error msg: {e}\n')
                time.sleep(0.3)

    return o, h, l, c, v, w

In [99]:
o_ks, h_ks, l_ks, c_ks, v_ks, w_ks = collect_korea_market_data(DATE_RANGE, market='KOSPI')

  0%|          | 0/12472 [00:00<?, ?it/s]

Collection initialized: 1995-05-02.


In [100]:
o_kq, h_kq, l_kq, c_kq, v_kq, w_kq = collect_korea_market_data(DATE_RANGE, market='KOSDAQ')

  0%|          | 0/12472 [00:00<?, ?it/s]

Collection initialized: 1996-07-01.


In [101]:
kospi = pd.concat([o_ks.stack().rename('open'), 
                   h_ks.stack().rename('high'), 
                   l_ks.stack().rename('low'), 
                   c_ks.stack().rename('close'),
                   v_ks.stack().rename('volume'), 
                   w_ks.stack().rename('trade_value')], axis=1)

In [102]:
kosdaq = pd.concat([o_kq.stack().rename('open'), 
                    h_kq.stack().rename('high'), 
                    l_kq.stack().rename('low'), 
                    c_kq.stack().rename('close'),
                    v_kq.stack().rename('volume'), 
                    w_kq.stack().rename('trade_value')], axis=1)

In [103]:
kospi.to_csv('./data/kospi-market/kospi_market_1995_2024.csv')

In [104]:
kosdaq.to_csv('./data/kosdaq-market/kosdaq_market_1995_2024.csv')