<a href="https://colab.research.google.com/github/slayerzeroa/wvkospi/blob/master/VKOSPI_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install pykrx --q
%pip install pandas_datareader --q
%pip install gdown --q

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

import datetime

from sklearn import preprocessing

import gdown

from pandas_datareader import data as pdr
from pykrx import stock
import yfinance as yf
yf.pdr_override()

now = datetime.datetime.now()

def get_data(symbol, start_date, end_date):
    scaler1 = preprocessing.MinMaxScaler() #better than StandardScaler() for stock data

    if symbol == "SPX" or symbol == "spx":
        spx = pdr.get_data_yahoo("^SPX", start = start_date, end = end_date)
        spx = spx[::-1] #do this because the data comes in reverse order from Stooq
        spx['scaled'] = scaler1.fit_transform(spx['Close'].values.reshape(-1,1)) #scale data so fits on chart
        return spx

    elif symbol == "VIX" or symbol == "vix":
        vix = pdr.get_data_yahoo("^vix", start = start_date, end = end_date)
        vix['scaled'] = vix['Adj Close'] / 100 #do this so on same scale as calculated vol
        return vix

    else:
        dataset = pdr.get_data_yahoo(symbol, start = start_date, end = end_date)
        dataset['scaled'] = scaler1.fit_transform(dataset['Adj Close'].values.reshape(-1,1))
        return dataset

def get_realized_vol(dataset, time):
    dataset['returns'] = np.log(dataset["Adj Close"]/dataset["Adj Close"].shift(1))
    dataset.fillna(0, inplace = True)
    #window/time tells us how many days out vol you want. ~21 = 1 month out vol (~21 trading days in a month)
    #we do this so we can match up with the vix which is the 30 day out (~21 trading day) calculated vol
    volatility = dataset.returns.rolling(window=time).std(ddof=0)*np.sqrt(252)
    return volatility

def get_realized_spx_vol(dataset, time):
    dataset['returns'] = np.log(dataset["Close"]/dataset["Close"].shift(1))
    dataset.fillna(0, inplace = True)
    spx_volatility = dataset.returns.rolling(window=time).std(ddof=0)*np.sqrt(252)
    return spx_volatility

def show_vol(df, start, end):
    scaler1 = preprocessing.MinMaxScaler()
    scaler1.fit(df.vix.values.reshape(-1, 1))

    scaler2 = preprocessing.MinMaxScaler()
    scaler2.fit(df.kospi_vol_21.values.reshape(-1, 1))

    scaler3 = preprocessing.MinMaxScaler()
    scaler3.fit(df.kospi_close.values.reshape(-1, 1))

    df['scaled_vix'] = scaler1.transform(df.vix.values.reshape(-1, 1))
    df['scaled_kospi_vol_21'] = scaler2.transform(df.kospi_vol_21.values.reshape(-1, 1))
    df['scaled_kospi_close'] = scaler3.transform(df.kospi_close.values.reshape(-1, 1))

    fig = plt.figure(figsize = (18,8))
    plt.plot(df.scaled_vix[start : end])
    plt.plot(df.scaled_kospi_vol_21[start : end])
    plt.plot(df.scaled_kospi_close[start : end])
    plt.legend(["VIX", "Realized Vol", "Underlier (Scaled)"])
    plt.xlabel("Date")
    plt.ylabel("Volatility")

In [None]:
start = datetime.datetime(2013, 10, 2)
end = datetime.datetime.today()

In [None]:
# VKOSPI 정보 받아오기
rows = []
while start < end:
  try:
    date = start.strftime('%Y%m%d')
    start += datetime.timedelta(days=1)
    row = {'date': date, 'vkospi':(stock.get_future_ohlcv(date, 'KRDRVFUVKI')['현물가'][0])}
    rows.append(row)
  except:
    pass

df = pd.DataFrame(rows)
df.to_csv('vkospi.csv', index=False)

KeyboardInterrupt: 

In [None]:
kospi = get_data('^KS11', start, end)
kosdaq = get_data('^KQ11', start, end)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [None]:
kospi

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,scaled
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-10-02,2012.069946,2012.819946,1999.020020,1999.469971,1999.469971,238300,0.293266
2013-10-04,1999.920044,2004.650024,1985.630005,1996.979980,1996.979980,241100,0.291919
2013-10-07,1997.170044,2002.609985,1991.020020,1994.420044,1994.420044,249800,0.290533
2013-10-08,1987.969971,2002.780029,1981.209961,2002.760010,2002.760010,329600,0.295047
2013-10-10,2000.910034,2010.000000,1997.660034,2001.400024,2001.400024,222700,0.294311
...,...,...,...,...,...,...,...
2024-05-22,2723.459961,2733.070068,2707.870117,2723.459961,2723.459961,484700,0.685127
2024-05-23,2719.030029,2734.270020,2704.000000,2721.810059,2721.810059,427400,0.684234
2024-05-24,2693.649902,2696.989990,2681.169922,2687.600098,2687.600098,501600,0.665718
2024-05-27,2697.159912,2725.080078,2693.659912,2722.989990,2722.989990,556700,0.684873


In [None]:
kospi_vol_21 = get_realized_vol(kospi, 21)
kospi_vol_63 = get_realized_vol(kospi, 63)
kospi_vol_126 = get_realized_vol(kospi, 126)
kospi_vol_252 = get_realized_vol(kospi, 252)
kospi_returns = kospi['Adj Close'].pct_change()
kospi_close = kospi['Adj Close']

In [None]:
# 데이터 로드
vkospi = pd.read_csv('vkospi.csv')
vkospi = vkospi.iloc[:-1, :]

# 'date' 열을 datetime 객체로 변환
vkospi['date'] = pd.to_datetime(vkospi['date'], format='%Y%m%d')

# 'date' 열의 포맷을 '%Y-%m-%d'로 변경
vkospi['date'] = vkospi['date'].dt.strftime('%Y-%m-%d')

vkospi.columns = ['Date', 'vkospi']
vkospi = vkospi.set_index('Date')
vkospi

Unnamed: 0_level_0,vkospi
Date,Unnamed: 1_level_1
2015-01-02,12.75
2015-01-05,12.65
2015-01-06,13.80
2015-01-07,13.40
2015-01-08,13.45
...,...
2024-05-22,16.15
2024-05-23,16.20
2024-05-24,16.00
2024-05-27,15.40


In [None]:
kospi_vol_21.index = kospi_vol_21.index.strftime('%Y-%m-%d')
kospi_vol_63.index = kospi_vol_63.index.strftime('%Y-%m-%d')
kospi_vol_126.index = kospi_vol_126.index.strftime('%Y-%m-%d')
kospi_vol_252.index = kospi_vol_252.index.strftime('%Y-%m-%d')
kospi_returns.index = kospi_returns.index.strftime('%Y-%m-%d')
kospi_close.index = kospi_close.index.strftime('%Y-%m-%d')

In [None]:
df = pd.DataFrame()
df = pd.concat([kospi_vol_21, kospi_vol_63, kospi_vol_126, kospi_vol_252, kospi_returns, kospi_close, vkospi], axis=1)
df = df.dropna()

In [None]:
df.columns = ['kospi_vol_21', 'kospi_vol_63', 'kospi_vol_126', 'kospi_vol_252', 'kospi_returns', 'kospi_close', 'vkospi']
df

Unnamed: 0_level_0,kospi_vol_21,kospi_vol_63,kospi_vol_126,kospi_vol_252,kospi_returns,kospi_close,vkospi
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-01-02,0.115106,0.111261,0.097909,0.100575,0.005664,1926.439941,12.75
2015-01-05,0.116044,0.108351,0.097667,0.100643,-0.005549,1915.750000,12.65
2015-01-06,0.127696,0.112584,0.100609,0.101888,-0.017382,1882.449951,13.80
2015-01-07,0.122571,0.112440,0.099884,0.101860,0.000733,1883.829956,13.40
2015-01-08,0.130657,0.114667,0.101166,0.102470,0.011052,1904.650024,13.45
...,...,...,...,...,...,...,...
2024-05-22,0.166730,0.161414,0.156883,0.160292,-0.000264,2723.459961,16.15
2024-05-23,0.154396,0.159784,0.154083,0.160274,-0.000606,2721.810059,16.20
2024-05-24,0.156095,0.160918,0.155229,0.160664,-0.012569,2687.600098,16.00
2024-05-27,0.160691,0.162889,0.155795,0.161163,0.013168,2722.989990,15.40


In [None]:
df.to_csv('kospi.csv')