In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from pickle import dump
import warnings
import os
from keras.optimizers import Adam
from keras import models
from keras.layers import Dropout, Dense, GRU, LSTM, Flatten
warnings.filterwarnings('ignore')

In [2]:
### Setting simulation year and learning period
SimulationYear = 2021
Period = 10
LearningYearStart = SimulationYear - Period + 1

In [3]:
### Stock symbol
code = "EURUSD"
### Prepared dataset with indicators
PreparedDatasetPath = "Datasets_with_Indicators/"+code+".csv"
### Dataset for backtesting simulation path
SimulationDatasetPath = "SimulationDatasets/Classification/"+code+"_"+str(SimulationYear)+".csv"
### Creating directory for learning data
LearningDataPath = "ClassificationData/"+code+"_"+str(SimulationYear)
if not os.path.exists(LearningDataPath):
   os.makedirs(LearningDataPath)

In [4]:
n_steps_in = 10
n_steps_out = 1
output_features = 2

In [5]:
# LR = 0.0005
BATCH_SIZE = 100
N_EPOCH = 30

In [6]:
def get_simulation_data():
    org_dataset = pd.read_csv(PreparedDatasetPath, parse_dates=True)
    org_dataset.Date = pd.to_datetime(org_dataset.Date, dayfirst=True)
    org_dataset = org_dataset.set_index(org_dataset.Date)
    org_dataset = org_dataset.drop("ZigZag", axis='columns')

    org_dataset = org_dataset.drop("Return_rate", axis='columns')
    org_dataset = org_dataset.drop("RSI_7", axis='columns')
    org_dataset = org_dataset.drop("RSI_14", axis='columns')
    org_dataset = org_dataset.drop("MA_7", axis='columns')
    org_dataset = org_dataset.drop("MA_14", axis='columns')

    sim_dataset = org_dataset[str(SimulationYear)+"-01-02":str(SimulationYear)+"-12-31"]
    sim_dataset = sim_dataset.reset_index(drop=True)
    sim_dataset.to_csv(SimulationDatasetPath, index=False)

    return sim_dataset, len(sim_dataset)

## 1. Przygotowanie danych

In [7]:
dataset = pd.read_csv(PreparedDatasetPath, parse_dates=True)
dataset.Date = pd.to_datetime(dataset.Date, dayfirst=True)
dataset = dataset.set_index(dataset.Date)
dataset = dataset.drop("Date", axis='columns')
dataset

Unnamed: 0_level_0,Open,High,Low,Close,ZigZag,Return_rate,RSI_7,RSI_14,MA_7,MA_14
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-18,1.01110,1.01550,1.00720,1.01400,1,1.002769,36.106221,41.847381,1.019714,1.024307
2000-01-19,1.01410,1.01650,1.00850,1.01170,1,0.997732,32.411299,39.100201,1.016786,1.023271
2000-01-20,1.01160,1.01880,1.00730,1.01690,1,1.005140,46.777470,47.492823,1.014700,1.022393
2000-01-21,1.01660,1.01930,1.00500,1.00870,1,0.991936,33.627629,38.485863,1.012514,1.020707
2000-01-23,1.00280,1.00450,1.00260,1.00430,1,0.995638,28.595350,34.684738,1.011386,1.018836
...,...,...,...,...,...,...,...,...,...,...
2022-12-07,1.04670,1.05493,1.04427,1.05114,0,1.004223,64.103095,64.267401,1.050111,1.043287
2022-12-08,1.05115,1.05643,1.04892,1.05551,0,1.004157,69.392195,66.614241,1.052003,1.045037
2022-12-09,1.05549,1.05878,1.05032,1.05325,0,0.997859,63.726914,64.263552,1.052074,1.045859
2022-12-11,1.05286,1.05371,1.05176,1.05204,0,0.998851,60.634794,62.982086,1.051797,1.046666


In [8]:
dataset = dataset[str(LearningYearStart)+"-01-02":str(SimulationYear)+"-12-31"]
dataset

Unnamed: 0_level_0,Open,High,Low,Close,ZigZag,Return_rate,RSI_7,RSI_14,MA_7,MA_14
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2012-01-02,1.29418,1.29681,1.29168,1.29517,1,1.000827,36.436361,35.847328,1.297531,1.301094
2012-01-03,1.29519,1.30762,1.29512,1.30476,1,1.007404,54.870125,44.511377,1.297374,1.301135
2012-01-04,1.30478,1.30719,1.28974,1.29326,1,0.991186,39.033340,37.901071,1.295419,1.300498
2012-01-05,1.29327,1.29434,1.27702,1.27911,1,0.989059,27.598652,31.669051,1.293856,1.298976
2012-01-06,1.27910,1.28121,1.26972,1.27188,1,0.994348,23.495626,29.041435,1.290561,1.296164
...,...,...,...,...,...,...,...,...,...,...
2021-12-27,1.13228,1.13336,1.13025,1.13292,0,1.000565,60.157393,52.099889,1.131183,1.129533
2021-12-28,1.13292,1.13329,1.12891,1.13125,0,0.998526,52.577906,49.445781,1.131696,1.129549
2021-12-29,1.13129,1.13681,1.12731,1.13593,0,1.004137,66.413362,56.182433,1.132731,1.130094
2021-12-30,1.13593,1.13594,1.12979,1.13219,0,0.996708,52.211341,50.402185,1.132673,1.130557


In [9]:
sim_dataset, TestLength= get_simulation_data()
sim_dataset

Unnamed: 0,Date,Open,High,Low,Close
0,2021-01-03,1.22382,1.22523,1.22269,1.22495
1,2021-01-04,1.22495,1.23091,1.22382,1.22522
2,2021-01-05,1.22521,1.23051,1.22468,1.22982
3,2021-01-06,1.22982,1.23488,1.22652,1.23377
4,2021-01-07,1.23378,1.23438,1.22445,1.22686
...,...,...,...,...,...
307,2021-12-27,1.13228,1.13336,1.13025,1.13292
308,2021-12-28,1.13292,1.13329,1.12891,1.13125
309,2021-12-29,1.13129,1.13681,1.12731,1.13593
310,2021-12-30,1.13593,1.13594,1.12979,1.13219


In [10]:
X_value = pd.DataFrame(dataset.iloc[:, :])
X_value = X_value.drop("ZigZag", axis='columns')
y_value = pd.DataFrame(dataset.ZigZag)

In [11]:
n_features = X_value.shape[1]
print(n_features)

9


In [12]:
X_value

Unnamed: 0_level_0,Open,High,Low,Close,Return_rate,RSI_7,RSI_14,MA_7,MA_14
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2012-01-02,1.29418,1.29681,1.29168,1.29517,1.000827,36.436361,35.847328,1.297531,1.301094
2012-01-03,1.29519,1.30762,1.29512,1.30476,1.007404,54.870125,44.511377,1.297374,1.301135
2012-01-04,1.30478,1.30719,1.28974,1.29326,0.991186,39.033340,37.901071,1.295419,1.300498
2012-01-05,1.29327,1.29434,1.27702,1.27911,0.989059,27.598652,31.669051,1.293856,1.298976
2012-01-06,1.27910,1.28121,1.26972,1.27188,0.994348,23.495626,29.041435,1.290561,1.296164
...,...,...,...,...,...,...,...,...,...
2021-12-27,1.13228,1.13336,1.13025,1.13292,1.000565,60.157393,52.099889,1.131183,1.129533
2021-12-28,1.13292,1.13329,1.12891,1.13125,0.998526,52.577906,49.445781,1.131696,1.129549
2021-12-29,1.13129,1.13681,1.12731,1.13593,1.004137,66.413362,56.182433,1.132731,1.130094
2021-12-30,1.13593,1.13594,1.12979,1.13219,0.996708,52.211341,50.402185,1.132673,1.130557


In [13]:
y_value = tf.keras.utils.to_categorical(y_value)

In [14]:
y_value

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [15]:
# Normalized the data
X_scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaler.fit(X_value)
X_scale_dataset = X_scaler.fit_transform(X_value)
dump(X_scaler, open(LearningDataPath+"/X_scaler.pkl", "wb"))

In [20]:
def get_X_y(X_data, y_data):
    X = list()
    y = list()

    length = len(X_data)
    for i in range(0, length-n_steps_in, 1):
        X_value = X_data[i: i + n_steps_in][:, :]
        # y_value = y_data[i + n_steps_in: i + (n_steps_in + n_steps_out)][:, :]
        y_value = y_data[i + n_steps_in]
        if len(X_value) == n_steps_in and len(y_value) == output_features:
            X.append(X_value)
            y.append(y_value)

    return np.array(X), np.array(y)

In [21]:
def predict_index(dataset, X_train, n_steps_in, n_steps_out):

    train_predict_index = dataset.iloc[n_steps_in : X_train.shape[0] + n_steps_in + n_steps_out - 1, :].index
    test_predict_index = dataset.iloc[X_train.shape[0] + n_steps_in:, :].index

    return train_predict_index, test_predict_index

In [22]:
def split_train_test(data):
    train_size = len(X) - TestLength
    data_train = data[0:train_size]
    data_test = data[train_size:]
    return data_train, data_test

In [23]:
X, y = get_X_y(X_scale_dataset, y_value)
X_train, X_test, = split_train_test(X)
y_train, y_test, = split_train_test(y)
train_predict_index, test_predict_index, = predict_index(dataset, X_train, n_steps_in, n_steps_out)
print('X shape: ', X.shape)
print('y shape: ', y.shape)
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)
print('index_train shape:', train_predict_index.shape)
print('index_test shape:', test_predict_index.shape)

X shape:  (3102, 10, 9)
y shape:  (3102, 2)
X_train shape:  (2790, 10, 9)
y_train shape:  (2790, 2)
X_test shape:  (312, 10, 9)
y_test shape:  (312, 2)
index_train shape: (2790,)
index_test shape: (312,)


In [24]:
train_predict_index

DatetimeIndex(['2012-01-13', '2012-01-15', '2012-01-16', '2012-01-17',
               '2012-01-18', '2012-01-19', '2012-01-20', '2012-01-22',
               '2012-01-23', '2012-01-24',
               ...
               '2020-12-20', '2020-12-21', '2020-12-22', '2020-12-23',
               '2020-12-24', '2020-12-27', '2020-12-28', '2020-12-29',
               '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', name='Date', length=2790, freq=None)

In [183]:
test_predict_index

DatetimeIndex(['2021-01-03', '2021-01-04', '2021-01-05', '2021-01-06',
               '2021-01-07', '2021-01-08', '2021-01-10', '2021-01-11',
               '2021-01-12', '2021-01-13',
               ...
               '2021-12-21', '2021-12-22', '2021-12-23', '2021-12-24',
               '2021-12-26', '2021-12-27', '2021-12-28', '2021-12-29',
               '2021-12-30', '2021-12-31'],
              dtype='datetime64[ns]', name='Date', length=312, freq=None)

In [187]:
np.save(LearningDataPath+"/X_train.npy", X_train)
np.save(LearningDataPath+"/y_train.npy", y_train)
np.save(LearningDataPath+"/X_test.npy", X_test)
np.save(LearningDataPath+"/y_test.npy", y_test)
np.save(LearningDataPath+"/train_predict_index.npy", train_predict_index)
np.save(LearningDataPath+"/test_predict_index.npy", test_predict_index)

## 2. Wytrenowanie klasyfikatora

In [188]:
# Load data
X_train = np.load(LearningDataPath+"/X_train.npy", allow_pickle=True)
y_train = np.load(LearningDataPath+"/y_train.npy", allow_pickle=True)
X_test = np.load(LearningDataPath+"/X_test.npy", allow_pickle=True)
y_test = np.load(LearningDataPath+"/y_test.npy", allow_pickle=True)

In [189]:
input_dim = X_train.shape[1]     # 7
input_feature_size = X_train.shape[2]  # 5
output_dim = y_train.shape[1]    # 1

In [190]:
print(output_dim)

2


In [191]:
y_train

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [192]:
def basic_GRU(input_dim, output_dim, input_feature_size):
    model = models.Sequential()
    model.add(GRU(units=63, return_sequences=True, activation='relu', input_shape=(input_dim, input_feature_size)))
    model.add(Dropout(0.2))
    model.add(GRU(units=250, return_sequences=False))
    model.add(Dense(output_dim, activation='softmax'))

    model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=N_EPOCH, batch_size=BATCH_SIZE, verbose=2, shuffle=False)

    return model, history

In [193]:
def basic_LSTM(input_dim, output_dim, input_feature_size):
    model = models.Sequential()
    model.add(LSTM(units=64, return_sequences=True, input_shape=(input_dim, input_feature_size)))
    model.add(LSTM(units=32))
    model.add(Dropout(0.2))
    model.add(Dense(output_dim, activation='softmax'))

    model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=N_EPOCH, batch_size=BATCH_SIZE, verbose=2, shuffle=False)

    return model, history

In [194]:
## 61.92% ###

# def basic_LSTM(input_dim, output_dim, input_feature_size):
#     model = models.Sequential()
#     model.add(LSTM(units=63, return_sequences=True, activation='relu', input_shape=(input_dim, input_feature_size)))
#     model.add(Dropout(0.2))
#     model.add(LSTM(250, return_sequences=False))
#     model.add(Dropout(0.2))
#     model.add(Dense(output_dim, activation='softmax'))
#
#     model.compile(optimizer='adam',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])
#     history = model.fit(X_train, y_train, epochs=N_EPOCH, batch_size=BATCH_SIZE, verbose=2, shuffle=False)
#
#     return model, history

In [195]:
# def basic_LSTM(input_dim, output_dim, input_feature_size):
#     model = models.Sequential()
#     model.add(LSTM(units=50, return_sequences=True, input_shape=(input_dim, input_feature_size)))
#     # model.add(Dropout(0.20))
#     model.add(LSTM(40, return_sequences=False))
#     model.add(Dropout(0.20))
#     model.add(Dense(output_dim, activation='softmax'))
#
#     model.compile(optimizer='adam',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])
#     history = model.fit(X_train, y_train, epochs=N_EPOCH, batch_size=BATCH_SIZE, verbose=2, shuffle=False)
#
#     return model, history

In [196]:
# model, history = basic_GRU(input_dim, output_dim, input_feature_size)
model, history = basic_LSTM(input_dim, output_dim, input_feature_size)
print(model.summary())

Epoch 1/30
28/28 - 4s - loss: 0.6898 - accuracy: 0.5441 - 4s/epoch - 155ms/step
Epoch 2/30
28/28 - 0s - loss: 0.6822 - accuracy: 0.5516 - 278ms/epoch - 10ms/step
Epoch 3/30
28/28 - 0s - loss: 0.6770 - accuracy: 0.5659 - 303ms/epoch - 11ms/step
Epoch 4/30
28/28 - 0s - loss: 0.6709 - accuracy: 0.5774 - 308ms/epoch - 11ms/step
Epoch 5/30
28/28 - 0s - loss: 0.6638 - accuracy: 0.6004 - 324ms/epoch - 12ms/step
Epoch 6/30
28/28 - 0s - loss: 0.6559 - accuracy: 0.6158 - 302ms/epoch - 11ms/step
Epoch 7/30
28/28 - 0s - loss: 0.6485 - accuracy: 0.6233 - 325ms/epoch - 12ms/step
Epoch 8/30
28/28 - 0s - loss: 0.6433 - accuracy: 0.6258 - 278ms/epoch - 10ms/step
Epoch 9/30
28/28 - 0s - loss: 0.6359 - accuracy: 0.6419 - 274ms/epoch - 10ms/step
Epoch 10/30
28/28 - 0s - loss: 0.6316 - accuracy: 0.6466 - 273ms/epoch - 10ms/step
Epoch 11/30
28/28 - 0s - loss: 0.6284 - accuracy: 0.6509 - 285ms/epoch - 10ms/step
Epoch 12/30
28/28 - 0s - loss: 0.6226 - accuracy: 0.6631 - 289ms/epoch - 10ms/step
Epoch 13/30
28/

In [197]:
score = model.evaluate(X_test, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.5857523679733276
Test accuracy: 0.7115384340286255


In [202]:
# classificatorName = 'Classificator{:.0f}.h5'.format(score[1]*100)
# model.save('Models/'+classificatorName)
model.save(LearningDataPath+"/Classification10Days.h5")
model.save(LearningDataPath+"/Classification10Days"+str(round(score[1]*100))+"p.h5")