In [46]:
from pandas_datareader import data, wb
from datetime import datetime
import math
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
from sklearn.metrics import accuracy_score

In [47]:
kospi = data.DataReader('KRX:KOSPI', 'google')
nasdaq = data.DataReader('NASDAQ:NDAQ', 'google')
currency = data.DataReader('DEXKOUS', 'fred')

In [48]:
kospi['Close_diff'] = kospi['Close'].diff()
kospi.dropna(inplace=True)  # drop first data 
kospi['Direction'] = kospi['Close_diff'].apply(lambda x: 1 if x>=0 else -1 )

In [52]:
# compute first order difference natural logarithmatic transformation
nasdaq['ln_Close'] = nasdaq['Close'].apply(lambda x: math.log(x))
currency['ln_Close'] = currency['DEXKOUS'].apply(lambda x: math.log(x))

nasdaq['diff'] = nasdaq['ln_Close'].diff()
currency['diff'] = currency['ln_Close'].diff()

nasdaq.dropna(inplace=True)
currency.dropna(inplace=True)

In [62]:
df = nasdaq.join(currency, lsuffix='_nasdaq', rsuffix='_currency')
df.fillna(method='ffill', inplace=True)

In [70]:
x = df[['diff_nasdaq', 'diff_currency']]

In [77]:
import pandas as pd
kospi.index = kospi.index - pd.DateOffset(days=1)

In [81]:
data = x.join(kospi)

data = data[['diff_nasdaq', 'diff_currency', 'Direction']]

data.dropna(inplace=True)

In [92]:
x = data[['diff_nasdaq', 'diff_currency']]
y = data[['Direction']]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [123]:
# SVM
svm = SVC(kernel='rbf', gamma=1/10, probability=True)
svm = svm.fit(x_train, y_train)

print("SVM training score : {}".format(svm.score(x_train, y_train)))
print("SVM test score : {}".format(svm.score(x_test, y_test)))

# LDA
lda = LinearDiscriminantAnalysis().fit(x_train, y_train)
print("LDA training score : {}".format(lda.score(x_train, y_train)))
print("LDA test score : {}".format(lda.score(x_test, y_test)))

# QDA
qda = QuadraticDiscriminantAnalysis().fit(x_train, y_train)
print("QDA training score : {}".format(qda.score(x_train, y_train)))
print("QDA test score : {}".format(qda.score(x_test, y_test)))

SVM training score : 0.5329949238578681
SVM test score : 0.5011820330969267
LDA training score : 0.5928934010152285
LDA training score : 0.557919621749409
QDA training score : 0.5898477157360406
QDA training score : 0.557919621749409


  y = column_or_1d(y, warn=True)


In [124]:
# Ensemble
svm_prob = svm.predict_proba(x_test)
lda_prob = lda.predict_proba(x_test)
qda_prob = qda.predict_proba(x_test)

w1 = svm.score(x_train, y_train)
w2 = lda.score(x_train, y_train)
w3 = qda.score(x_train, y_train)

prob = (w1 * svm_prob + w2 * lda_prob + w3 * qda_prob) / (w1 + w2 + w3)

pred = np.argmax(prob, axis=1)
pred[pred==0] = -1

print("Ensemble score : {}".format(accuracy_score(y_test, pred)))