In [1]:
import os
import pandas as pd
import logging
import datetime
import joblib
import mlflow.keras
from tensorflow.keras.models import load_model
from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
from ml_investing_wne.xtb.xAPIConnector import APIClient, APIStreamClient, loginCommand
from ml_investing_wne.data_engineering.prepare_dataset import prepare_processed_dataset
import ml_investing_wne.config as config
from ml_investing_wne.train_test_val_split import train_test_val_split
from ml_investing_wne.helper import confusion_matrix_plot, compute_profitability_classes, check_hours
import importlib

build_model = getattr(importlib.import_module('ml_investing_wne.cnn.{}'.format(config.model)), 'build_model')

logger = logging.getLogger()
logger.setLevel(logging.INFO)

2022-09-25 08:44:09.969565: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-25 08:44:09.969611: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
start = datetime.datetime(2019, 9, 17, 1, 0, 0, 0)
symbol = 'EURCHF'
sc_x = joblib.load(os.path.join(config.package_directory, 'models',
                                   'sc_x_{}_{}.save'.format(config.currency, config.freq)))

In [3]:
client = APIClient()

# connect to RR socket, login
loginResponse = client.execute(loginCommand(userId=config.userId, password=config.password))
logger.info(str(loginResponse))

[2022-09-25 08:44:21,949][connect:76] Socket connected
[2022-09-25 08:44:21,951][_waitingSend:90] Sent: b'{"command": "login", "arguments": {"userId": 1590700, "password": "Symelgitara7", "appName": ""}}'
[2022-09-25 08:44:22,846][_read:110] Received: {'status': True, 'streamSessionId': 'ca005bfffe1f3af5-00004e4e-00025a67-3684e3f69a11a443-8dbf7ae2'}
[2022-09-25 08:44:22,848][<module>:5] {'status': True, 'streamSessionId': 'ca005bfffe1f3af5-00004e4e-00025a67-3684e3f69a11a443-8dbf7ae2'}


In [4]:
# check if user logged in correctly
if (loginResponse['status'] == False):
    print('Login failed. Error code: {0}'.format(loginResponse['errorCode']))

# get ssId from login response
ssid = loginResponse['streamSessionId']

In [5]:
resp = client.commandExecute('getChartLastRequest', {'info': {"period": 60, "start": int(start.timestamp() * 1000),
                                                              "symbol": symbol}})

[2022-09-25 08:44:27,904][_waitingSend:90] Sent: b'{"command": "getChartLastRequest", "arguments": {"info": {"period": 60, "start": 1568682000000, "symbol": "EURCHF"}}}'
[2022-09-25 08:44:34,699][_read:110] Received: {'status': True, 'returnData': {'rateInfos': [{'ctm': 1599688800000, 'ctmString': 'Sep 10, 2020, 12:00:00 AM', 'open': 107682.0, 'close': 7.0, 'high': 23.0, 'low': -35.0, 'vol': 7566.0}, {'ctm': 1599692400000, 'ctmString': 'Sep 10, 2020, 1:00:00 AM', 'open': 107687.0, 'close': -29.0, 'high': 4.0, 'low': -32.0, 'vol': 5109.0}, {'ctm': 1599696000000, 'ctmString': 'Sep 10, 2020, 2:00:00 AM', 'open': 107662.0, 'close': 24.0, 'high': 36.0, 'low': -31.0, 'vol': 13289.0}, {'ctm': 1599699600000, 'ctmString': 'Sep 10, 2020, 3:00:00 AM', 'open': 107687.0, 'close': 57.0, 'high': 59.0, 'low': -21.0, 'vol': 13551.0}, {'ctm': 1599703200000, 'ctmString': 'Sep 10, 2020, 4:00:00 AM', 'open': 107745.0, 'close': 7.0, 'high': 16.0, 'low': -38.0, 'vol': 8974.0}, {'ctm': 1599706800000, 'ctmStri

In [21]:
df = pd.DataFrame(resp['returnData']['rateInfos'])
df['datetime'] = pd.to_datetime(df['ctm'], unit='ms')
df['close'] = (df['open'] + df['close'])/100000
df['high'] = (df['open'] + df['high'])/100000
df['low'] = (df['open'] + df['low'])/100000
df['open'] = df['open']/100000

In [22]:
df.sort_values(by='datetime', inplace=True)

In [23]:
df.head(24)

Unnamed: 0,ctm,ctmString,open,close,high,low,vol,datetime
0,1599688800000,"Sep 10, 2020, 12:00:00 AM",1.07682,1.07689,1.07705,1.07647,7566.0,2020-09-09 22:00:00
1,1599692400000,"Sep 10, 2020, 1:00:00 AM",1.07687,1.07658,1.07691,1.07655,5109.0,2020-09-09 23:00:00
2,1599696000000,"Sep 10, 2020, 2:00:00 AM",1.07662,1.07686,1.07698,1.07631,13289.0,2020-09-10 00:00:00
3,1599699600000,"Sep 10, 2020, 3:00:00 AM",1.07687,1.07744,1.07746,1.07666,13551.0,2020-09-10 01:00:00
4,1599703200000,"Sep 10, 2020, 4:00:00 AM",1.07745,1.07752,1.07761,1.07707,8974.0,2020-09-10 02:00:00
5,1599706800000,"Sep 10, 2020, 5:00:00 AM",1.07752,1.07714,1.07752,1.07704,5970.0,2020-09-10 03:00:00
6,1599710400000,"Sep 10, 2020, 6:00:00 AM",1.07715,1.07733,1.07756,1.07693,5854.0,2020-09-10 04:00:00
7,1599714000000,"Sep 10, 2020, 7:00:00 AM",1.07734,1.0771,1.07748,1.07702,12140.0,2020-09-10 05:00:00
8,1599717600000,"Sep 10, 2020, 8:00:00 AM",1.0771,1.07711,1.07759,1.07643,24600.0,2020-09-10 06:00:00
9,1599721200000,"Sep 10, 2020, 9:00:00 AM",1.07709,1.07507,1.07713,1.075,36233.0,2020-09-10 07:00:00


In [24]:
df['datetime'] = df['datetime'].dt.tz_localize('GMT').dt.tz_convert('US/Eastern').dt.tz_localize(None)
df = df.loc[~((df['datetime']>=datetime.datetime(2021,12,25,12,0,0)) & (df['datetime']<datetime.datetime(2022,1,1,0,0,0)))].copy()

In [25]:
df = df.set_index('datetime')
df.drop(columns=['ctm', 'ctmString', 'vol'], inplace=True)
df = df[['open', 'high', 'low', 'close']]

df = df.resample(config.freq).agg({'open': 'first',
                                               'high': 'max',
                                               'low': 'min',
                                               'close': 'last'
                                               })

df.dropna(inplace=True)
df['y_pred'] = df['close'].shift(-config.steps_ahead) / df['close']
df['y_pred'] = [1 if y > 1 else 0 for y in df['y_pred']]
df['datetime'] = df.index



In [26]:
# df['hour'] = df.datetime.dt.hour
# df.groupby('hour')['y_pred'].mean()

In [27]:
df = prepare_processed_dataset(df=df)
#X, y, X_val, y_val, X_test, y_test, y_cat, y_val_cat, y_test_cat, train = train_test_val_split(df, config.seq_len, sc_x)
X, y, X_val, y_val, X_test, y_test, y_cat, y_val_cat, y_test_cat, train = train_test_val_split(df, sc_x=sc_x)

[2022-09-25 08:50:44,473][prepare_processed_dataset:89] exported to /home/jupyter/ml_investing_wne/src/ml_investing_wne/data/processed/EURCHF/EURCHF_processed_720min.csv
[2022-09-25 08:50:44,489][split_sequences:24] first sequence begins: 2020-09-30 12:00:00
[2022-09-25 08:50:44,490][split_sequences:25] first sequence ends: 2020-11-30 12:00:00
[2022-09-25 08:50:44,496][split_sequences:30] last sequence begins: 2021-07-30 12:00:00
[2022-09-25 08:50:44,497][split_sequences:31] last sequence ends: 2021-09-29 12:00:00
[2022-09-25 08:50:44,505][split_sequences:24] first sequence begins: 2021-08-13 12:00:00
[2022-09-25 08:50:44,506][split_sequences:25] first sequence ends: 2021-10-13 12:00:00
[2022-09-25 08:50:44,508][split_sequences:30] last sequence begins: 2021-10-26 00:00:00
[2022-09-25 08:50:44,508][split_sequences:31] last sequence ends: 2021-12-24 12:00:00
[2022-09-25 08:50:44,510][split_sequences:24] first sequence begins: 2021-11-14 12:00:00
[2022-09-25 08:50:44,511][split_sequences

In [28]:
df.loc[(df['datetime']>=datetime.datetime(2021,11,14,12,0,0)) & (df['datetime']<datetime.datetime(2022,9,1,0,0,0))].shape

(447, 42)

In [39]:
#df.head(10)
df.loc[df['datetime']>=datetime.datetime(2021,11,14,12,0,0)].head(96)
# 1.07307
# 1.07361
# 1.07459

Unnamed: 0,datetime,open,high,low,close,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,...,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos
639,2021-11-14 12:00:00,1.05440,1.05444,1.05283,1.05325,0,1.053650,1.053681,1.369000e-07,1.054164,...,1.055513,0.255857,0.161124,0.999554,12,6,-2.449294e-16,1.0,-2.449294e-16,1.0
640,2021-11-15 00:00:00,1.05324,1.05428,1.05212,1.05270,0,1.053223,1.053191,2.606333e-07,1.053736,...,1.055296,0.296173,0.168043,0.999478,0,0,0.000000e+00,1.0,0.000000e+00,1.0
641,2021-11-15 12:00:00,1.05271,1.05283,1.05008,1.05187,1,1.052607,1.052530,4.826333e-07,1.053104,...,1.054615,0.286972,0.091677,0.999212,12,0,-2.449294e-16,1.0,0.000000e+00,1.0
642,2021-11-16 00:00:00,1.05187,1.05498,1.05153,1.05381,0,1.052793,1.053170,9.474333e-07,1.053070,...,1.054506,0.272821,0.757571,1.001844,0,1,0.000000e+00,1.0,8.660254e-01,0.5
643,2021-11-16 12:00:00,1.05382,1.05428,1.05062,1.05262,0,1.052767,1.052895,9.570333e-07,1.052850,...,1.054152,0.247235,0.411641,0.998871,12,1,-2.449294e-16,1.0,8.660254e-01,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
730,2022-01-11 00:00:00,1.05049,1.05071,1.04894,1.04959,1,1.049350,1.048747,1.580800e-06,1.046820,...,1.053220,1.222808,0.716396,0.999162,0,1,0.000000e+00,1.0,8.660254e-01,0.5
731,2022-01-11 12:00:00,1.04962,1.05027,1.04912,1.05002,0,1.050027,1.049384,1.936333e-07,1.048192,...,1.053752,1.060810,0.664399,1.000410,12,1,-2.449294e-16,1.0,8.660254e-01,0.5
732,2022-01-12 00:00:00,1.05003,1.05050,1.04369,1.04396,1,1.047857,1.046672,1.143423e-05,1.048406,...,1.053156,0.906198,0.032031,0.994229,0,2,0.000000e+00,1.0,8.660254e-01,-0.5
733,2022-01-12 12:00:00,1.04397,1.04657,1.04396,1.04635,0,1.046777,1.046511,9.317433e-06,1.048078,...,1.053116,0.961322,0.328493,1.002289,12,2,-2.449294e-16,1.0,8.660254e-01,-0.5


In [14]:
mlflow.set_experiment(experiment_name=symbol + '_xtb_retrain_' + config.model + '_' + str(config.nb_classes))
early_stop = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
model_path_final = os.path.join(config.package_directory, 'models',
                               '{}_{}_xtb_retrain_{}.h5'.format(config.model, symbol, config.freq))
model_checkpoint = ModelCheckpoint(filepath=model_path_final, monitor='val_accuracy', verbose=1, save_best_only=True)
csv_logger = CSVLogger(os.path.join(config.package_directory, 'logs', 'keras_log.csv'), append=True, separator=';')
callbacks = [early_stop, model_checkpoint, csv_logger]

In [None]:
model = load_model(os.path.join(config.package_directory, 'models', 'production',
                        '{}_{}_{}_{}_{}_{}'.format(config.model, 'hist_data',
                                                   config.currency, config.freq,
                                                   str(config.steps_ahead),
                                                   config.seq_len)))

In [30]:
if len(config.currency) > 1:
    model = load_model(os.path.join(config.package_directory, 'models', 'production',
                        '{}_{}_{}_{}_{}_{}'.format(config.model, 'hist_data_retrain',
                                                   config.currency, config.freq,
                                                   str(config.steps_ahead),
                                                   config.seq_len)))
else:
    model = build_model(input_shape=(X.shape[1], X.shape[2]), nb_classes=config.nb_classes)


2022-09-25 08:51:04.113397: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-09-25 08:51:04.113439: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-09-25 08:51:04.113461: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (python-20220811): /proc/driver/nvidia/version does not exist
2022-09-25 08:51:04.114007: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [31]:
model.evaluate(X, y_cat)

2022-09-25 08:51:06.685737: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)




[0.6979491710662842, 0.5443037748336792]

In [32]:
model.evaluate(X_val, y_val_cat)



[0.6687977910041809, 0.643478274345398]

In [33]:
model.evaluate(X_test, y_test_cat)



[0.6973540782928467, 0.48571428656578064]

In [20]:
history = model.fit(X, y_cat, batch_size=64, epochs=config.epochs, verbose=2,
                    validation_data=(X_val, y_val_cat), callbacks=callbacks)

Epoch 1/100
10/10 - 7s - loss: 0.6926 - accuracy: 0.5262 - val_loss: 0.6709 - val_accuracy: 0.5621

Epoch 00001: val_accuracy improved from -inf to 0.56213, saving model to /home/jupyter/ml_investing_wne/src/ml_investing_wne/models/transformer_learnable_encoding_USDCHF_xtb_retrain_720min.h5
Epoch 2/100
10/10 - 3s - loss: 0.6910 - accuracy: 0.5459 - val_loss: 0.6722 - val_accuracy: 0.5680

Epoch 00002: val_accuracy improved from 0.56213 to 0.56805, saving model to /home/jupyter/ml_investing_wne/src/ml_investing_wne/models/transformer_learnable_encoding_USDCHF_xtb_retrain_720min.h5
Epoch 3/100
10/10 - 3s - loss: 0.6838 - accuracy: 0.5443 - val_loss: 0.6699 - val_accuracy: 0.5503

Epoch 00003: val_accuracy did not improve from 0.56805
Epoch 4/100
10/10 - 3s - loss: 0.6784 - accuracy: 0.5672 - val_loss: 0.6653 - val_accuracy: 0.5325

Epoch 00004: val_accuracy did not improve from 0.56805
Epoch 5/100
10/10 - 3s - loss: 0.6786 - accuracy: 0.5689 - val_loss: 0.6720 - val_accuracy: 0.5503

Epo

In [34]:
y_pred = model.predict(X_test)
y_pred_class = y_pred.argmax(axis=-1)

if 'JPY' in config.currency:
    df['cost'] = (config.pips / 100) / df['close']
else:
    df['cost'] = (config.pips / 10000) / df['close']

start_date = joblib.load(os.path.join(config.package_directory, 'models',
                                      'first_sequence_ends_{}_{}_{}.save'.format('test',
                                                                                 config.currency,
                                                                                 config.freq)))
end_date = joblib.load(os.path.join(config.package_directory, 'models',
                                    'last_sequence_ends_{}_{}_{}.save'.format('test',
                                                                              config.currency,
                                                                              config.freq)))
lower_bounds = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
upper_bounds = [1 - lower for lower in lower_bounds]

for lower_bound, upper_bound in zip(lower_bounds, upper_bounds):
    portfolio_result, hit_ratio, time_active = compute_profitability_classes(df, y_pred, start_date,
                                                                             end_date, lower_bound,
                                                                             upper_bound)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  'Europe/Warsaw').dt.tz_localize(None)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction['hour_waw'] = prediction['datetime_waw'].dt.time
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction['prediction'] = y_pred[:, 1]
A value is trying to be set on a copy of a slice from a DataFrame.


In [15]:
y_pred

array([[0.536691  , 0.46330896],
       [0.53811723, 0.4618828 ],
       [0.61306053, 0.3869394 ],
       [0.39297333, 0.60702676],
       [0.52660555, 0.47339442],
       [0.48486406, 0.51513594],
       [0.5214261 , 0.47857395],
       [0.47500965, 0.5249904 ],
       [0.5269088 , 0.4730911 ],
       [0.46403438, 0.5359656 ],
       [0.5329735 , 0.4670264 ],
       [0.5270202 , 0.4729798 ],
       [0.5394048 , 0.4605952 ],
       [0.6032142 , 0.39678583],
       [0.39381197, 0.606188  ],
       [0.5145443 , 0.4854557 ],
       [0.48097858, 0.51902133],
       [0.507765  , 0.49223498],
       [0.4651756 , 0.53482443],
       [0.5183756 , 0.48162434],
       [0.45674977, 0.54325026],
       [0.5246932 , 0.47530678],
       [0.49126068, 0.5087393 ],
       [0.5054294 , 0.4945706 ],
       [0.5859575 , 0.41404253],
       [0.3744184 , 0.62558156],
       [0.5099476 , 0.4900524 ],
       [0.49622414, 0.5037759 ],
       [0.540905  , 0.45909503],
       [0.48020884, 0.51979107],
       [0.

In [27]:
import numpy as np
check = np.concatenate([y_pred, y_test_cat], axis=1)
import pandas as pd
check = pd.DataFrame(check)

In [28]:
check.columns = ['p_down', 'p_up','down','up']
check['correct'] = 0
check.loc[((check['p_up']>0.5) & (check['up']==1.0)) | ((check['p_down']>0.5) & (check['down']==1.0)), 'correct']=1

In [29]:
check[0:20]

Unnamed: 0,p_down,p_up,down,up,correct
0,0.536691,0.463309,0.0,1.0,0
1,0.538117,0.461883,1.0,0.0,1
2,0.613061,0.386939,0.0,1.0,0
3,0.392973,0.607027,0.0,1.0,1
4,0.526606,0.473394,1.0,0.0,1
5,0.484864,0.515136,0.0,1.0,1
6,0.521426,0.478574,1.0,0.0,1
7,0.47501,0.52499,1.0,0.0,0
8,0.526909,0.473091,0.0,1.0,0
9,0.464034,0.535966,0.0,1.0,1


In [14]:
import pandas as pd
pd.set_option('display.max_rows', 500)

In [15]:
df.loc[(df['datetime']>=datetime.datetime(2021,11,14,12,0,0)) & (df['datetime']<datetime.datetime(2022,9,1,0,0,0))]['datetime'].reset_index()

Unnamed: 0,index,datetime
0,639,2021-11-14 12:00:00
1,640,2021-11-15 00:00:00
2,641,2021-11-15 12:00:00
3,642,2021-11-16 00:00:00
4,643,2021-11-16 12:00:00
5,644,2021-11-17 00:00:00
6,645,2021-11-17 12:00:00
7,646,2021-11-18 00:00:00
8,647,2021-11-18 12:00:00
9,648,2021-11-19 00:00:00
