# SVM(Support Vector Machine) / multi-intraday data

In [None]:
# import packages
import numpy as np  # NumPy
import pandas as pd  # pandas
import cufflinks as cf  # Cufflinks
from sklearn.svm import SVC  # sckikit-learn
import warnings; warnings.simplefilter('ignore')
from statsmodels.tsa.stattools import adfuller
import configparser as cp
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['figure.dpi'] = 200

In [None]:
#load datasets
ktb = pd.read_csv('./csv/KTB_1m.csv')
lktb = pd.read_csv('./csv/LKTB_1m.csv')
k200 = pd.read_csv('./csv/K200_1m.csv')

In [None]:
# 인포맥스 data 전처리를 위한 함수. Date와 Time을 합쳐 DateTimeIndex를 만듭니다.
def pre_infomax(df):
    df['DATETIME'] = pd.to_datetime(df['DATE'] + ' ' + df['TIME'])
    df.drop(['DATE', 'TIME'], axis=1, inplace=True)
    df.rename(columns={'DATETIME':'DATE'}, inplace=True)
    df.set_index('DATE', inplace=True)

In [None]:
# lagged 수익율 컬럼 추가 함수. 여러날짜의 일중data이므로 날짜사이에는 lagging되지 않도록 짤라주는? 기능 포함.
def pre_intraday(df, lags):
    result = pd.DataFrame()
    dates = np.unique(df.index.date)
    for date in dates:
        data = df.loc[date.isoformat()]
        data['RETURN'] = np.log(data['CLOSE'] / data['CLOSE'].shift(1))
        data['RETURN_INTRADAY'] = np.log(data['CLOSE'] / data['OPEN'])
        cols=[]
        # lags = 5
        for lag in range(1, lags + 1):
            col = 'lag{}'.format(lag)
            data[col] = data['RETURN'].shift(lag)
            cols.append(col)
        result = result.append(data)
    return result.dropna(), cols

In [None]:
# SVM으로 train시키고, 예측한후 결과 plot하는 함수. SVM의 파라미터인 C를 입력받는다. 일반 plot()버전
def svm_plot(df, cols, title, svm_c):
    model = SVC(C=svm_c)
    split = int(len(df)/2)
    train_x = np.sign(df[cols]).iloc[:split]
    train_y = np.sign(df['RETURN']).iloc[:split]
    test_x = np.sign(df[cols]).iloc[split:]
    test_y = df['RETURN'].iloc[split:]
    real_y = df['RETURN_INTRADAY'].iloc[split:]
    model.fit(train_x, train_y)
    pred = model.predict(test_x)
    strat = pred * real_y
    res = pd.DataFrame({
        'Market': test_y,
        'Prediction' : pred,
        'Strategy' : strat
    })
    res[['BM', 'Return']] = res[['Market', 'Strategy']].cumsum().apply(np.exp)
    layout = dict(
        title=title,
        xaxis=dict(
            type='category'
        )
    )
    res[['BM', 'Return']].plot()
    return res

In [None]:
# SVM으로 train시키고, 예측한후 결과 plot하는 함수. SVM의 파라미터인 C를 입력받는다. plotly iplot()버전
def svm_iplot(df, cols, title, svm_c):
    model = SVC(C=svm_c)
    split = int(len(df)/2)
    train_x = np.sign(df[cols]).iloc[:split]
    train_y = np.sign(df['RETURN']).iloc[:split]
    test_x = np.sign(df[cols]).iloc[split:]
    test_y = df['RETURN'].iloc[split:]
    real_y = df['RETURN_INTRADAY'].iloc[split:]
    model.fit(train_x, train_y)
    pred = model.predict(test_x)
    strat = pred * real_y
    res = pd.DataFrame({
        'Market': test_y,
        'Prediction' : pred,
        'Strategy' : strat
    })
    res[['BM', 'Return']] = res[['Market', 'Strategy']].cumsum().apply(np.exp)
    layout = dict(
        title=title,
        xaxis=dict(
            type='category'
        )
    )
    res[['BM', 'Return']].iplot(layout=layout)
    return res

In [None]:
# 주요값 설정, lag수, hyperparameter C list
lags = 5
#c_param = [1,5,10,20,50,100]
c_param = [1,10,100]

In [None]:
pre_infomax(lktb)
lktb_out, lktb_cols = pre_intraday(lktb, lags)
result = {}
for c in c_param:
    result[c] = svm_iplot(lktb_out, lktb_cols, "KTB 10y with C=%d" %c, c)

In [None]:
# KOSPI200 선물
pre_infomax(k200)
k200_out, k200_cols = pre_intraday(k200, lags)
result = {}
for c in c_param:
    result[c] = svm_iplot(k200_out, k200_cols, "KOSPI 200 with C=%d" %c, c)

In [None]:
# KTB3년선물
pre_infomax(ktb)
ktb_out, ktb_cols = pre_intraday(ktb, lags)
result = {}
for c in c_param:
    result[c] = svm_iplot(ktb_out, ktb_cols, "KTB 3y with C=%d" % c, c)