In [1]:
import numpy as np
import math
import pandas as pd 
from pandas_datareader import data, wb
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score

In [4]:
# get KOSPI, NASDAQ, US/KRWON currency data
kospi = data.DataReader('KRX:KOSPI', 'google')
nasdaq = data.DataReader('NASDAQ:NDAQ', 'google')
currency = data.DataReader('DEXKOUS', 'fred')

In [5]:
# make kospi direction feature
kospi['Close_diff'] = kospi['Close'].diff()
kospi.dropna(inplace=True)  # drop first data 
kospi['Direction'] = kospi['Close_diff'].apply(lambda x: 1 if x>=0 else -1 )

In [6]:
# compute first order difference natural logarithmatic transformation
nasdaq['ln_Close'] = nasdaq['Close'].apply(lambda x: math.log(x))
currency['ln_Close'] = currency['DEXKOUS'].apply(lambda x: math.log(x))

nasdaq['diff'] = nasdaq['ln_Close'].diff()
currency['diff'] = currency['ln_Close'].diff()

nasdaq.dropna(inplace=True)
currency.dropna(inplace=True)

In [7]:
# join nasdaq data and currency data
df = nasdaq.join(currency, lsuffix='_nasdaq', rsuffix='_currency')
df.fillna(method='ffill', inplace=True)

# select input feature
df = df[['diff_nasdaq', 'diff_currency']]

kospi.index = kospi.index - pd.DateOffset(days=1)

data = df.join(kospi)
data = data[['diff_nasdaq', 'diff_currency', 'Direction']]
data.dropna(inplace=True)

In [11]:
x = data[['diff_nasdaq', 'diff_currency']]
y = data['Direction']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [12]:
# SVM
svm = SVC(kernel='rbf', gamma=1/10, probability=True)
svm = svm.fit(x_train, y_train)

print("SVM training score : {}".format(svm.score(x_train, y_train)))
print("SVM test score : {}".format(svm.score(x_test, y_test)))

# LDA
lda = LinearDiscriminantAnalysis().fit(x_train, y_train)
print("LDA training score : {}".format(lda.score(x_train, y_train)))
print("LDA test score : {}".format(lda.score(x_test, y_test)))

# QDA
qda = QuadraticDiscriminantAnalysis().fit(x_train, y_train)
print("QDA training score : {}".format(qda.score(x_train, y_train)))
print("QDA test score : {}".format(qda.score(x_test, y_test)))

SVM training score : 0.515736040609137
SVM test score : 0.541371158392435
LDA training score : 0.5766497461928934
LDA test score : 0.5862884160756501
QDA training score : 0.5736040609137056
QDA test score : 0.5957446808510638


In [13]:
# Ensemble
svm_prob = svm.predict_proba(x_test)
lda_prob = lda.predict_proba(x_test)
qda_prob = qda.predict_proba(x_test)

w1 = svm.score(x_train, y_train)
w2 = lda.score(x_train, y_train)
w3 = qda.score(x_train, y_train)

prob = (w1 * svm_prob + w2 * lda_prob + w3 * qda_prob) / (w1 + w2 + w3)

pred = np.argmax(prob, axis=1)
pred[pred==0] = -1

print("Ensemble score : {}".format(accuracy_score(y_test, pred)))

Ensemble score : 0.5886524822695035
