In [155]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import ta
import matplotlib.pyplot as plt

In [156]:
# Data Load
df = pd.read_csv('D:/Denoising/Autoencoder/LSTM_SAE/Samsung.txt', sep = ',')

In [157]:
# Make Indicator : SMA, EMA, Stochastic K%, Stochastic D%, RSI, MACD, Disparity

# Bollinger Band
bol_h = ta.volatility.bollinger_hband(df["CLOSE"])
bol_l = ta.volatility.bollinger_lband(df["CLOSE"])

# SMA
sma = ta.trend.sma_indicator(df["CLOSE"],12)

# EMA
ema = ta.trend.ema_indicator(df["CLOSE"],12)

# Stochastic
sto_k = ta.momentum.stochrsi_k(df["CLOSE"])
sto_d = ta.momentum.stochrsi_d(df["CLOSE"])

# RSI
rsi = ta.momentum.rsi(df["CLOSE"], 14)

# MACD
macd = ta.trend.macd(df["CLOSE"], 13, 26)

In [158]:
# Label Close Price as Up(1) and Down(0)
updown_list = []
for i in range(len(df)):
    if i == 0:
        updown_list.append(0)
    else:
        updown = df["CLOSE"][i] - df["CLOSE"][i-1]
        if updown >= 0:
            updown_list.append(1)
        else:
            updown_list.append(0)

    

In [159]:
# Concat Indicators and Price
x = np.zeros((len(df)-200,9))
idx = 0
for i in range(200,len(df)):
    x[idx][0] = bol_h[i]
    x[idx][1] = bol_l[i]
    x[idx][2] = sma[i]
    x[idx][3] = ema[i]
    x[idx][4] = sto_k[i]
    x[idx][5] = sto_d[i]
    x[idx][6] = rsi[i]
    x[idx][7] = macd[i]
    x[idx][8] = df["CLOSE"][i]
    idx += 1

y = np.zeros((len(df)-200,1))
idx2 = 0
for j in range(200,len(df)):
    y[idx2][0] = updown_list[j]
    idx2 += 1

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=123)

In [160]:
# Model Build
model = XGBClassifier(max_depth = 5,  n_estimators=300, learning_rate=0.05)
model.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.05, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=5, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=300,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [161]:
# Predict
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

In [162]:
# Check Accuracy
acc = accuracy_score(y_test, predictions)
print("Accuracy : %.2f%%" % (acc*100.0))

Accuracy : 72.55%


In [163]:
# Check Precision
pre_1_list = [] # If predict value is 1 and real value is 1 then append 1
pre_0_list = [] # If Predict value is 0 and real value is 0 then append 1
for i in range(len(predictions)):
    if predictions[i] == 1:
        if y_test[i] == 1:
            pre_1_list.append(1)
        else:
            pre_1_list.append(0)
    elif predictions[i] == 0:
        if y_test[i] == 0:
            pre_0_list.append(1)
        else:
            pre_0_list.append(0)
pre_1 = round(sum(pre_1_list)/len(pre_1_list)*100,2)
pre_0 = round(sum(pre_0_list)/len(pre_0_list)*100,2)
print(str(pre_1)+"%")
print(str(pre_0)+"%")

72.99%
72.03%


In [164]:
# Check Recall
up_count = 0
for i in range(len(updown_list)):
    if updown_list[i] == 1:
        up_count +=1
recall = round(up_count / len(updown_list)*100,2)
print(str(recall)+"%")

55.25%


In [165]:
# And Compare to Just Close Price
x2 = np.zeros((len(df)-200,1))
idx = 0
for i in range(200,len(df)):
    x[idx][0] = df["CLOSE"][i]
    idx += 1

X_train, X_test, y_train, y_test = train_test_split(x2, y, test_size=0.2, random_state=123)

In [166]:
# Model Build2
model = XGBClassifier(max_depth = 5,  n_estimators=300, learning_rate=0.05)
model.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.05, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=5, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=300,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [167]:
# Predict2
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

In [168]:
# Check Accuracy2
acc = accuracy_score(y_test, predictions)
print("Accuracy : %.2f%%" % (acc*100.0))

Accuracy : 52.16%
