In [8]:
import random
import numpy as np
import tensorflow as tf
import mlflow.keras

from ml_investing_wne import config
from ml_investing_wne.data_engineering.load_data import get_hist_data
from ml_investing_wne.data_engineering.prepare_dataset import prepare_processed_dataset
from ml_investing_wne.train_test_val_split import train_test_val_split
from ml_investing_wne.helper import get_ml_flow_experiment_name, get_callbacks, \
    get_final_model_path, evaluate_model
from ml_investing_wne.models import model_factory
from ml_investing_wne.utils import get_logger
from ml_investing_wne.data_engineering.crypto_factory import CryptoFactory
from ml_investing_wne.data_engineering.prepare_dataset import prepare_processed_dataset
random.seed(config.seed)
np.random.seed(config.seed)
tf.random.set_seed(config.seed)

logger = get_logger()

In [2]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [3]:

if config.RUN_TYPE == 'forex':
    if config.provider == 'hist_data':
        df = get_hist_data(currency=config.currency)
    else:
        logger.error('not implemented')
elif config.RUN_TYPE == 'crypto':
    crypto = CryptoFactory(config.provider, config.currency)
    crypto.generate_volumebars(frequency=1000)
    # crypto.time_aggregation(freq=config.freq)
    # df = crypto.df_time_aggregated
    df = crypto.df_volume_bars
    # crypto.run_3_barriers()
     #df = crypto.df_3_barriers
    logger.info(f'df head: {df.head()}')
df = prepare_processed_dataset(df=df, add_target=True)


2022-12-18 22:27:52,740 - ml_investing_wne.data_engineering.crypto_factory - head of raw dataset:                 q       p       s              t           d  side  \
9035029  3.300000  170.38  ethusd  1554296895913  2019-04-03     0   
9035030  5.652455  170.38  ethusd  1554296902715  2019-04-03     0   
9035031  6.214196  170.38  ethusd  1554296903048  2019-04-03     0   
9035032  6.078305  170.38  ethusd  1554296903556  2019-04-03     0   
9035033  1.626045  170.38  ethusd  1554296922824  2019-04-03     0   

             buy_id     sell_id                datetime  
9035029  3068696456  3068689528 2019-04-03 13:08:15.913  
9035030  3068696910  3068689528 2019-04-03 13:08:22.715  
9035031  3068696932  3068689528 2019-04-03 13:08:23.048  
9035032  3068696970  3068689528 2019-04-03 13:08:23.556  
9035033  3068698401  3068689528 2019-04-03 13:08:42.824  
2022-12-18 22:28:22,735 - root - df head:                            open    high     low   close
datetime                           

In [4]:
df.head()

Unnamed: 0_level_0,open,high,low,close,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,EMA_5,VAR_5,SMA_10,EMA_10,VAR_10,SMA_13,EMA_13,VAR_13,SMA_20,EMA_20,VAR_20,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,RSI_10,RSI_6,STOCHk_14_3_3,STOCHd_14_3_3,WILLR_14,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
2019-04-03 17:48:32.688,173.5,174.3,173.25,174.06,1.003907,173.886667,173.801257,0.112533,173.312,173.772793,0.74027,174.307,173.77558,2.129468,174.179231,173.662608,1.797224,173.529,173.136424,3.909441,1.229937,-0.519207,1.749144,54.228419,53.687279,53.00561,52.706935,48.41238,-44.966443,171.772889,173.312,174.851111,1.776116,0.742997,1.003228,17,2,-0.997669,-0.068242,0.866025,-0.5
2019-04-03 17:52:05.209,174.06,175.03,174.06,174.74,0.990042,174.1,174.270628,0.3856,173.842,174.095195,0.52562,174.218,173.950929,1.947018,174.335385,173.816522,1.617127,173.7595,173.289146,3.322637,1.220882,-0.42261,1.643492,56.367026,56.646441,58.374297,55.57047,52.260833,-35.838926,172.545087,173.842,175.138913,1.49206,0.846207,1.003907,17,2,-0.997669,-0.068242,0.866025,-0.5
2019-04-03 17:58:04.437,174.83,175.0,172.18,173.0,0.998497,173.933333,173.635314,0.768933,173.88,173.73013,0.4348,173.912,173.778033,1.630818,174.223846,173.699876,1.751159,173.905,173.261608,2.622016,1.061071,-0.465937,1.527008,49.937397,47.937901,43.215023,53.333333,53.870246,-59.194631,172.700441,173.88,175.059559,1.356751,0.126979,0.990042,17,2,-0.997669,-0.068242,0.866025,-0.5
2019-04-03 18:05:39.522,173.0,173.58,172.42,172.74,1.007294,173.493333,173.187657,1.182533,173.608,173.400087,0.65512,173.506,173.5893,0.67356,174.119231,173.562751,1.921508,174.071,173.211931,1.605746,0.90303,-0.499182,1.402212,49.037287,46.744772,41.292239,47.427293,52.110365,-62.684564,172.160111,173.608,175.055889,1.667999,0.200253,0.998497,18,2,-0.979084,0.203456,0.866025,-0.5
2019-04-03 18:37:28.022,172.73,174.33,172.62,174.0,1.002874,173.246667,173.593829,0.442533,173.708,173.600058,0.67812,173.519,173.663972,0.685766,173.993846,173.625215,1.715459,173.9665,173.286985,1.379971,0.869431,-0.426225,1.295656,53.419155,53.038462,53.360134,44.116331,48.292319,-45.771812,172.234913,173.708,175.181087,1.696049,0.599112,1.007294,18,2,-0.979084,0.203456,0.866025,-0.5


In [8]:
df = prepare_processed_dataset(df=df, add_target=True)

2022-12-18 20:32:05,354 - ml_investing_wne.data_engineering.prepare_dataset - exported to /Users/i0495036/Documents/sandbox/ml_investing_wne/ml_investing_wne/src/ml_investing_wne/data/processed/ETHUSD/ETHUSD_processed_15min.csv


In [10]:
df.head()

Unnamed: 0_level_0,open,high,low,close,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,EMA_5,VAR_5,SMA_10,EMA_10,VAR_10,SMA_13,EMA_13,VAR_13,SMA_20,EMA_20,VAR_20,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,RSI_10,RSI_6,STOCHk_14_3_3,STOCHd_14_3_3,WILLR_14,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
2019-04-03 17:48:32.688,173.5,174.3,173.25,174.06,1.003907,173.886667,173.801257,0.112533,173.312,173.772793,0.74027,174.307,173.77558,2.129468,174.179231,173.662608,1.797224,173.529,173.136424,3.909441,1.229937,-0.519207,1.749144,54.228419,53.687279,53.00561,52.706935,48.41238,-44.966443,171.772889,173.312,174.851111,1.776116,0.742997,1.003228,17,2,-0.997669,-0.068242,0.866025,-0.5
2019-04-03 17:52:05.209,174.06,175.03,174.06,174.74,0.990042,174.1,174.270628,0.3856,173.842,174.095195,0.52562,174.218,173.950929,1.947018,174.335385,173.816522,1.617127,173.7595,173.289146,3.322637,1.220882,-0.42261,1.643492,56.367026,56.646441,58.374297,55.57047,52.260833,-35.838926,172.545087,173.842,175.138913,1.49206,0.846207,1.003907,17,2,-0.997669,-0.068242,0.866025,-0.5
2019-04-03 17:58:04.437,174.83,175.0,172.18,173.0,0.998497,173.933333,173.635314,0.768933,173.88,173.73013,0.4348,173.912,173.778033,1.630818,174.223846,173.699876,1.751159,173.905,173.261608,2.622016,1.061071,-0.465937,1.527008,49.937397,47.937901,43.215023,53.333333,53.870246,-59.194631,172.700441,173.88,175.059559,1.356751,0.126979,0.990042,17,2,-0.997669,-0.068242,0.866025,-0.5
2019-04-03 18:05:39.522,173.0,173.58,172.42,172.74,1.007294,173.493333,173.187657,1.182533,173.608,173.400087,0.65512,173.506,173.5893,0.67356,174.119231,173.562751,1.921508,174.071,173.211931,1.605746,0.90303,-0.499182,1.402212,49.037287,46.744772,41.292239,47.427293,52.110365,-62.684564,172.160111,173.608,175.055889,1.667999,0.200253,0.998497,18,2,-0.979084,0.203456,0.866025,-0.5
2019-04-03 18:37:28.022,172.73,174.33,172.62,174.0,1.002874,173.246667,173.593829,0.442533,173.708,173.600058,0.67812,173.519,173.663972,0.685766,173.993846,173.625215,1.715459,173.9665,173.286985,1.379971,0.869431,-0.426225,1.295656,53.419155,53.038462,53.360134,44.116331,48.292319,-45.771812,172.234913,173.708,175.181087,1.696049,0.599112,1.007294,18,2,-0.979084,0.203456,0.866025,-0.5


In [4]:
logger.info(f' df shape before merge wiith 3 barriers additional info is {df.shape}')
df = df.merge(crypto.df_3_barriers_additional_info[['datetime', 'time_step']], on='datetime', how='inner')
logger.info(f' df shape after merge wiith 3 barriers additional info is {df.shape}')

2022-12-18 15:25:37,636 - root -  df shape before merge wiith 3 barriers additional info is (22505, 41)
2022-12-18 15:25:37,664 - root -  df shape after merge wiith 3 barriers additional info is (22505, 43)


In [5]:
df.head()

Unnamed: 0,datetime,open,close,high,low,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,EMA_5,VAR_5,SMA_10,EMA_10,VAR_10,SMA_13,EMA_13,VAR_13,SMA_20,EMA_20,VAR_20,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,RSI_10,RSI_6,STOCHk_14_3_3,STOCHd_14_3_3,WILLR_14,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos,time_step
0,2019-04-05 00:00:00,157.7,158.67,159.41,157.7,1,157.88,158.07671,0.5233,157.688,157.764167,1.44082,156.956,157.958818,4.887471,158.046923,158.448886,8.03214,159.504,159.924495,9.659404,-2.992995,0.659079,-3.652074,44.781036,47.408148,54.221027,49.456235,46.228536,-41.751527,155.540764,157.688,159.835236,2.723399,0.728666,1.006023,0,4,0.0,1.0,-0.866025,-0.5,4
1,2019-04-05 01:00:00,158.5,157.85,159.17,157.84,1,158.08,157.963355,0.2653,158.076,157.792778,0.46888,156.621,157.939033,2.850299,157.707692,158.363331,6.431536,159.249,159.726924,9.109946,-2.804287,0.678229,-3.482517,42.934468,44.663934,48.645731,52.240326,49.961709,-50.101833,156.851085,158.076,159.300915,1.549779,0.407749,0.994832,1,4,0.269797,0.962917,-0.866025,-0.5,3
2,2019-04-05 02:00:00,157.95,158.16,158.4,156.99,1,158.226667,158.061677,0.171433,157.93,157.915186,0.27835,156.868,157.979208,2.949373,157.427692,158.334284,4.968069,158.9995,159.577693,8.3059,-2.599753,0.706211,-3.305964,43.876677,45.97748,50.934518,53.733876,51.810146,-46.94501,156.98622,157.93,158.87378,1.195187,0.62185,1.001964,2,4,0.519584,0.854419,-0.866025,-0.5,2
3,2019-04-05 03:00:00,158.21,157.12,158.33,156.2,1,157.71,157.590839,0.2851,157.904,157.650124,0.32593,157.18,157.822989,1.934333,157.13,158.160815,3.82245,158.7235,159.343627,7.714098,-2.492841,0.650498,-3.143339,41.406661,42.239912,43.185908,48.472505,51.482236,-57.535642,156.882738,157.904,158.925262,1.293523,0.116161,0.993424,3,4,0.730836,0.682553,-0.866025,-0.5,1
4,2019-04-05 04:00:00,157.34,163.84,164.84,157.34,0,159.706667,160.715419,13.083733,159.128,159.713416,7.25377,158.007,158.916991,5.814801,157.333077,158.972127,6.149373,158.7705,159.771853,8.171531,-1.844602,1.03899,-2.883592,57.89891,63.526769,73.933513,62.36964,54.858674,-8.410429,154.31011,159.128,163.94589,6.055364,0.989011,1.04277,4,4,0.887885,0.460065,-0.866025,-0.5,11


In [5]:
X, y, X_val, y_val, X_test, y_test, y_cat, y_val_cat, y_test_cat, _ = train_test_val_split(df, 
                         nb_classes=config.nb_classes, freq=config.freq,
                         seq_len=config.seq_len, steps_ahead=config.steps_ahead,
                         train_end=config.train_end, val_end=config.val_end,
                         test_end=config.test_end, binarize_target=True, time_step=False)

2022-12-18 22:29:04,140 - ml_investing_wne.train_test_val_split - didnt find time step in the dataset
2022-12-18 22:29:04,480 - ml_investing_wne.train_test_val_split - first sequence begins: 2019-04-03 17:48:32.688000
2022-12-18 22:29:04,481 - ml_investing_wne.train_test_val_split - first sequence ends: 2019-04-03 21:19:23.757000
2022-12-18 22:29:04,671 - ml_investing_wne.train_test_val_split - last sequence begins: 2021-03-30 09:43:17.824000
2022-12-18 22:29:04,672 - ml_investing_wne.train_test_val_split - last sequence ends: 2021-03-30 22:23:38.227000
2022-12-18 22:29:04,845 - ml_investing_wne.train_test_val_split - first sequence begins: 2021-03-30 22:23:38.227000
2022-12-18 22:29:04,845 - ml_investing_wne.train_test_val_split - first sequence ends: 2021-03-31 15:16:31.640000
2022-12-18 22:29:04,878 - ml_investing_wne.train_test_val_split - last sequence begins: 2021-06-30 04:54:38.700000
2022-12-18 22:29:04,878 - ml_investing_wne.train_test_val_split - last sequence ends: 2021-06-3

In [6]:
X.shape

(37199, 24, 40)

In [7]:
mlflow.tensorflow.autolog()
mlflow.set_experiment(experiment_name=get_ml_flow_experiment_name())
callbacks = get_callbacks()
model = model_factory(X)
history = model.fit(X, y_cat, batch_size=config.batch, epochs=15, verbose=2,
                    validation_data=(X_val, y_val_cat), callbacks=callbacks)
model.save(get_final_model_path())

2022-12-18 22:29:14.820689: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022/12/18 22:29:15 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '1c9212b47bf54811a8177843a8911f9a', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow
2022-12-18 22:29:15.266903: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-12-18 22:29:15.266924: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2022-12-18 22:29:15.267114: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-12-18 22:29:15.468351: I tensorflow/compiler/mlir/ml

Epoch 1/15


2022-12-18 22:29:18.682675: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-12-18 22:29:18.682689: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2022-12-18 22:29:18.755939: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2022-12-18 22:29:18.761599: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-12-18 22:29:18.767470: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: /var/folders/fy/kbq5nlv945s7pr1wvh78h5840000gn/T/tmpoh8f3tl0/train/plugins/profile/2022_12_18_22_29_18

2022-12-18 22:29:18.771022: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to /var/folders/fy/kbq5nlv945s7pr1wvh78h5840000gn/T/tmpoh8f3tl0/train/plugins/profile/2022_12_18_22_29_18/macC02X74LNJG5J.trace.json.gz
2022-12-18 22:29:18.784540: I tensorflow/core/profiler/rpc/client/sav

582/582 - 50s - loss: 0.6998 - accuracy: 0.5004 - val_loss: 0.7037 - val_accuracy: 0.5056

Epoch 00001: val_accuracy improved from -inf to 0.50563, saving model to /Users/i0495036/Documents/sandbox/ml_investing_wne/ml_investing_wne/src/ml_investing_wne/models/transformer_learnable_encoding_Bitstamp_ETHUSD_5min_1.h5
Epoch 2/15
582/582 - 63s - loss: 0.6947 - accuracy: 0.5039 - val_loss: 0.7275 - val_accuracy: 0.5117

Epoch 00002: val_accuracy improved from 0.50563 to 0.51171, saving model to /Users/i0495036/Documents/sandbox/ml_investing_wne/ml_investing_wne/src/ml_investing_wne/models/transformer_learnable_encoding_Bitstamp_ETHUSD_5min_1.h5
Epoch 3/15
582/582 - 64s - loss: 0.6935 - accuracy: 0.5110 - val_loss: 0.6984 - val_accuracy: 0.4917

Epoch 00003: val_accuracy did not improve from 0.51171
Epoch 4/15
582/582 - 69s - loss: 0.6935 - accuracy: 0.5058 - val_loss: 0.6992 - val_accuracy: 0.5153

Epoch 00004: val_accuracy improved from 0.51171 to 0.51528, saving model to /Users/i0495036/D

2022-12-18 22:45:21.093705: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2022-12-18 22:45:24,580 - absl - Found untraced functions such as embedding_layer_call_fn, embedding_layer_call_and_return_conditional_losses, query_layer_call_fn, query_layer_call_and_return_conditional_losses, key_layer_call_fn while saving (showing 5 of 125). These functions will not be directly callable after loading.


INFO:tensorflow:Assets written to: /var/folders/fy/kbq5nlv945s7pr1wvh78h5840000gn/T/tmpgjiti4xh/model/data/model/assets


2022-12-18 22:45:26,116 - tensorflow - Assets written to: /var/folders/fy/kbq5nlv945s7pr1wvh78h5840000gn/T/tmpgjiti4xh/model/data/model/assets
2022-12-18 22:45:40,170 - absl - Found untraced functions such as embedding_layer_call_fn, embedding_layer_call_and_return_conditional_losses, query_layer_call_fn, query_layer_call_and_return_conditional_losses, key_layer_call_fn while saving (showing 5 of 125). These functions will not be directly callable after loading.


INFO:tensorflow:Assets written to: /Users/i0495036/Documents/sandbox/ml_investing_wne/ml_investing_wne/src/ml_investing_wne/models/production/transformer_learnable_encoding_Bitstamp_ETHUSD_5min_1_24/assets


2022-12-18 22:45:41,355 - tensorflow - Assets written to: /Users/i0495036/Documents/sandbox/ml_investing_wne/ml_investing_wne/src/ml_investing_wne/models/production/transformer_learnable_encoding_Bitstamp_ETHUSD_5min_1_24/assets


In [14]:
df['cost'] = (config.pips / 10000) / df['close']

In [15]:
import joblib
def load_test_dates():

    name = f'test_{config.currency}_{config.freq}.save'

    start_date = joblib.load(os.path.join(config.package_directory, 'models',
                                        f'first_sequence_ends_{name}'))
    end_date = joblib.load(os.path.join(config.package_directory, 'models',
                                        f'last_sequence_ends_{name}'))

    return start_date, end_date

In [16]:
test_loss, test_acc = model.evaluate(X_test, y_test_cat)
logger.info('Test accuracy : %.4f', test_acc)
logger.info('Test loss : %.4f', test_loss)
mlflow.log_metric("test_acc", test_acc)
mlflow.log_metric("test_loss", test_loss)
mlflow.log_metric("test_loss", test_loss)
mlflow.set_tag('currency', config.currency)
mlflow.set_tag('frequency', config.freq)
mlflow.set_tag('steps_ahead', config.steps_ahead)
mlflow.log_metric('y_distribution', y.mean())
mlflow.log_metric('y_val_distribution', y_val.mean())
mlflow.log_metric('y_test_distribution', y_test.mean())
mlflow.log_metric('cost', config.pips)
mlflow.log_metric('seq_len', config.seq_len)
y_pred = model.predict(X_test)
y_pred_class = y_pred.argmax(axis=-1)
start_date, end_date = load_test_dates()
lower_bounds = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
upper_bounds = [1 - lower for lower in lower_bounds]



2022-12-18 21:07:15,932 - root - Test accuracy : 0.5163
2022-12-18 21:07:15,934 - root - Test loss : 0.6951


In [17]:
df.head()

Unnamed: 0,datetime,open,high,low,close,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,EMA_5,VAR_5,SMA_10,EMA_10,VAR_10,SMA_13,EMA_13,VAR_13,SMA_20,EMA_20,VAR_20,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,RSI_10,RSI_6,STOCHk_14_3_3,STOCHd_14_3_3,WILLR_14,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos,cost
0,2019-04-03 17:48:32.688,173.5,174.3,173.25,174.06,1.003907,173.886667,173.801257,0.112533,173.312,173.772793,0.74027,174.307,173.77558,2.129468,174.179231,173.662608,1.797224,173.529,173.136424,3.909441,1.229937,-0.519207,1.749144,54.228419,53.687279,53.00561,52.706935,48.41238,-44.966443,171.772889,173.312,174.851111,1.776116,0.742997,1.003228,17,2,-0.997669,-0.068242,0.866025,-0.5,0.0
1,2019-04-03 17:52:05.209,174.06,175.03,174.06,174.74,0.990042,174.1,174.270628,0.3856,173.842,174.095195,0.52562,174.218,173.950929,1.947018,174.335385,173.816522,1.617127,173.7595,173.289146,3.322637,1.220882,-0.42261,1.643492,56.367026,56.646441,58.374297,55.57047,52.260833,-35.838926,172.545087,173.842,175.138913,1.49206,0.846207,1.003907,17,2,-0.997669,-0.068242,0.866025,-0.5,0.0
2,2019-04-03 17:58:04.437,174.83,175.0,172.18,173.0,0.998497,173.933333,173.635314,0.768933,173.88,173.73013,0.4348,173.912,173.778033,1.630818,174.223846,173.699876,1.751159,173.905,173.261608,2.622016,1.061071,-0.465937,1.527008,49.937397,47.937901,43.215023,53.333333,53.870246,-59.194631,172.700441,173.88,175.059559,1.356751,0.126979,0.990042,17,2,-0.997669,-0.068242,0.866025,-0.5,0.0
3,2019-04-03 18:05:39.522,173.0,173.58,172.42,172.74,1.007294,173.493333,173.187657,1.182533,173.608,173.400087,0.65512,173.506,173.5893,0.67356,174.119231,173.562751,1.921508,174.071,173.211931,1.605746,0.90303,-0.499182,1.402212,49.037287,46.744772,41.292239,47.427293,52.110365,-62.684564,172.160111,173.608,175.055889,1.667999,0.200253,0.998497,18,2,-0.979084,0.203456,0.866025,-0.5,0.0
4,2019-04-03 18:37:28.022,172.73,174.33,172.62,174.0,1.002874,173.246667,173.593829,0.442533,173.708,173.600058,0.67812,173.519,173.663972,0.685766,173.993846,173.625215,1.715459,173.9665,173.286985,1.379971,0.869431,-0.426225,1.295656,53.419155,53.038462,53.360134,44.116331,48.292319,-45.771812,172.234913,173.708,175.181087,1.696049,0.599112,1.007294,18,2,-0.979084,0.203456,0.866025,-0.5,0.0


In [9]:
prediction = df.loc[(df.datetime >= start_date) & (df.datetime <= end_date)]
prediction

Unnamed: 0,datetime,open,close,high,low,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,...,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos,cost
18735,2021-07-01 00:00:00,35059.71,34722.86,35071.34,34722.86,0,34876.606667,34820.805209,24742.590621,34805.520,...,1.691118,0.359566,0.991028,0,3,0.000000e+00,1.000000,1.224647e-16,-1.0,0.0
18736,2021-07-01 01:00:00,34742.43,34957.77,35039.0,34742.43,0,34905.953333,34889.287605,26720.849421,34877.732,...,1.274744,0.680022,1.006765,1,3,2.697968e-01,0.962917,1.224647e-16,-1.0,0.0
18737,2021-07-01 02:00:00,34933.7,34314.85,34940.87,34224.27,0,34665.160000,34602.068802,105833.499087,34780.488,...,2.934141,0.04372,0.981609,2,3,5.195840e-01,0.854419,1.224647e-16,-1.0,0.0
18738,2021-07-01 03:00:00,34323.34,34130.0,34419.21,34130.0,0,34467.540000,34366.034401,188786.470287,34632.542,...,4.103620,0.146393,0.994613,3,3,7.308360e-01,0.682553,1.224647e-16,-1.0,0.0
18739,2021-07-01 04:00:00,34116.87,34342.53,34343.58,34053.76,0,34262.460000,34354.282201,13350.784287,34493.602,...,3.498151,0.374799,1.006227,4,3,8.878852e-01,0.460065,1.224647e-16,-1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22233,2021-11-25 19:00:00,59170.62,58915.91,59309.25,58723.51,0,59097.673333,59020.969800,25738.502241,59095.446,...,0.706772,0.070148,0.995915,19,3,-8.878852e-01,0.460065,1.224647e-16,-1.0,0.0
22234,2021-11-25 20:00:00,58910.1,58917.7,59149.98,58864.54,0,58997.060000,58969.334900,19323.396108,59052.818,...,0.833842,0.225597,1.00003,20,3,-7.308360e-01,0.682553,1.224647e-16,-1.0,0.0
22235,2021-11-25 21:00:00,58895.83,58863.09,59109.68,58757.34,0,58898.900000,58916.212450,962.568108,59014.762,...,0.979998,0.237747,0.999073,21,3,-5.195840e-01,0.854419,1.224647e-16,-1.0,0.0
22236,2021-11-25 22:00:00,58866.49,58937.88,58952.38,58586.64,0,58906.223333,58927.046225,1497.171441,58958.430,...,0.696096,0.449928,1.001271,22,3,-2.697968e-01,0.962917,1.224647e-16,-1.0,0.0


In [10]:
crypto.df_3_barriers_additional_info.reset_index(inplace=True)

In [11]:
prediction = prediction.merge(crypto.df_3_barriers_additional_info, on='datetime', how='left')
prediction['y_pred'] = prediction['prc_change']

In [12]:
prediction['y_pred'] = prediction['prc_change']

In [13]:
upper_bound = 0.6
lower_bound = 0.4


# recreate target as continous variable
# df['y_pred'] = df['close'].shift(-config.steps_ahead) / df['close'] - 1
# new_start = config.val_end + config.seq_len * datetime.timedelta(minutes=int(''.join(filter(str.isdigit, config.freq))))

if config.provider == 'hist_data':
    prediction['datetime_local'] = prediction['datetime'].dt.tz_localize('US/Eastern').dt.tz_convert(
        'Europe/London').dt.tz_localize(None)
else:
    prediction['datetime_local'] = prediction['datetime']
prediction['hour_local'] = prediction['datetime_local'].dt.time
prediction['prediction'] = y_pred[:, 1]
conditions = [
    (prediction['prediction'] <= lower_bound),
    (prediction['prediction'] > lower_bound) & (prediction['prediction'] <= upper_bound),
    (prediction['prediction'] > upper_bound)
]
values = [0, 0.5, 1]
prediction['trade'] = np.select(conditions, values)

prediction.reset_index(inplace=True)
# drop last row for which we don't have a label - this works only for one step ahead prediction
#prediction.drop(prediction.tail(1).index, inplace=True)
 

In [16]:
prediction.loc[1046]

index                                  1046
datetime                2021-08-13 20:00:00
open                                46467.5
close                              47604.28
high                               47929.18
low                                46434.79
y_pred                             0.001233
SMA_3                          46866.363333
EMA_3                          47048.488187
VAR_3                         409279.093228
SMA_5                             46737.638
EMA_5                          46856.989847
VAR_5                         236007.349875
SMA_10                             46582.25
EMA_10                         46588.083012
VAR_10                        142578.692605
SMA_13                         46506.082308
EMA_13                         46450.170706
VAR_13                        128800.101933
SMA_20                           46126.8435
EMA_20                         46186.182523
VAR_20                        422734.722031
MACD_12_26_9                    

In [5]:
import datetime

In [6]:
crypto.df_3_barriers[crypto.df_3_barriers.index>datetime.datetime(2021,8,14,4,0,0)]

Unnamed: 0_level_0,open,close,high,low,y_pred
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-08-14 05:00:00,47492.5,47570.06,47607.07,47456.61,1
2021-08-14 06:00:00,47555.3,47550.0,47703.49,47532.9,1
2021-08-14 07:00:00,47550.0,47663.03,47763.88,47526.0,1
2021-08-14 08:00:00,47661.99,47638.67,47739.43,47560.47,1
2021-08-14 09:00:00,47660.18,47000.0,48190.0,46317.05,0
...,...,...,...,...,...
2021-11-26 07:00:00,57585.33,56997.67,57672.26,56661.09,
2021-11-26 08:00:00,56960.23,55251.91,56960.23,54377.58,
2021-11-26 09:00:00,55218.27,54917.08,55514.82,54619.36,
2021-11-26 10:00:00,54800.0,54412.14,54950.0,54356.43,


In [36]:
crypto.df_3_barriers.y_pred.value_counts()

0    11907
1    11042
Name: y_pred, dtype: int64

In [9]:
df.loc[df['datetime']>datetime.datetime(2021,8,14,1,0,0)]

Unnamed: 0,datetime,open,close,high,low,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,...,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos
20013,2021-08-14 02:00:00,47609.86,47563.72,47661.84,47428.17,0,47605.340000,47595.933253,1982.990795,47655.730,...,47868.198356,0.891680,0.283474,0.999238,2,5,5.195840e-01,0.854419,-8.660254e-01,0.5
20014,2021-08-14 03:00:00,47546.99,47585.89,47625.0,47444.32,1,47583.203333,47590.911626,334.473229,47652.498,...,47868.594935,0.906970,0.345884,1.000466,3,5,7.308360e-01,0.682553,-8.660254e-01,0.5
20015,2021-08-14 04:00:00,47556.17,47487.9,47702.14,47471.02,1,47545.836667,47539.405813,2640.370229,47577.962,...,47685.251005,0.451003,0.080283,0.997941,4,5,8.878852e-01,0.460065,-8.660254e-01,0.5
20016,2021-08-14 05:00:00,47492.5,47570.06,47607.07,47456.61,1,47547.950000,47554.732907,2767.149095,47561.514,...,47639.346040,0.327290,0.5549,1.00173,5,5,9.790841e-01,0.203456,-8.660254e-01,0.5
20017,2021-08-14 06:00:00,47555.3,47550.0,47703.49,47532.9,1,47535.986667,47552.366453,1834.846529,47551.514,...,47619.181913,0.284609,0.488813,0.999578,6,5,9.976688e-01,-0.068242,-8.660254e-01,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22503,2021-11-25 20:00:00,58910.1,58917.7,59149.98,58864.54,0,58997.060000,58969.334900,19323.396108,59052.818,...,59299.021470,0.833842,0.225597,1.00003,20,3,-7.308360e-01,0.682553,1.224647e-16,-1.0
22504,2021-11-25 21:00:00,58895.83,58863.09,59109.68,58757.34,0,58898.900000,58916.212450,962.568108,59014.762,...,59303.933644,0.979998,0.237747,0.999073,21,3,-5.195840e-01,0.854419,1.224647e-16,-1.0
22505,2021-11-25 22:00:00,58866.49,58937.88,58952.38,58586.64,0,58906.223333,58927.046225,1497.171441,58958.430,...,59163.633589,0.696096,0.449928,1.001271,22,3,-2.697968e-01,0.962917,1.224647e-16,-1.0
22506,2021-11-25 23:00:00,58945.17,58990.14,59364.85,58842.0,0,58930.370000,58958.593112,4077.725708,58924.944,...,59006.811525,0.277871,0.89818,1.000887,23,3,-2.449294e-16,1.000000,1.224647e-16,-1.0


In [15]:
# INITIALIZE PORTFOLIO
budget = 100
transaction = None
triple_barrier = True
i = 0# ITERATE OVER PREDICTIONS
# cost is added once as it represents spread
while i < prediction.shape[0]:
    print(i)
    if prediction.loc[i, 'trade'] == 1:
        # add transaction cost if position changes
        if transaction != 'buy':
            budget = budget * (1 - prediction.loc[i, 'cost'])
        transaction = 'buy'
        budget = budget + budget * prediction.loc[i, 'y_pred']
        prediction.loc[i, 'budget'] = budget
        prediction.loc[i, 'transaction'] = transaction
        i = i + config.steps_ahead
    elif prediction.loc[i, 'trade'] == 0:
        # add transaction cost if position changes
        if transaction != 'sell':
            budget = budget * (1 - prediction.loc[i, 'cost'])
        transaction = 'sell'
        budget = budget + budget * (-prediction.loc[i, 'y_pred'])
        prediction.loc[i, 'budget'] = budget
        prediction.loc[i, 'transaction'] = transaction
        i = i + config.steps_ahead
    elif prediction.loc[i, 'trade'] == 0.5:
        if transaction in ['buy', 'sell']:
            # budget = budget * (1 - prediction.loc[i, 'cost']) # spread is included once in transaction costs
            transaction = None
        prediction.loc[i, 'budget'] = budget
        prediction.loc[i, 'transaction'] = transaction
        if triple_barrier:
            next_date = prediction.loc[i ,'barrier_touched_date']
            i = prediction.loc[prediction['datetime']==next_date].index[0]
        else:    
            i = i + 1 # SUMMARIZE RESULTS

hits = prediction.loc[((prediction['transaction'] == 'buy') & (prediction['y_pred'] > 0)) |
                      ((prediction['transaction'] == 'sell') & (prediction['y_pred'] < 0))].shape[0]
transactions = prediction.loc[prediction['transaction'].isin(['buy', 'sell'])].shape[0]
try:
    hits_ratio = hits / transactions
except ZeroDivisionError:
    hits_ratio = 0
share_of_time_active = round(prediction.loc[prediction['transaction'].isin(['buy', 'sell'])].shape[0] * \
                             config.steps_ahead / prediction.shape[0], 2)
logger.info('''share_of_time_active for bounds %.2f-%.2f is %.2f and hit ratio is %.4f''',
            lower_bound, upper_bound, share_of_time_active, hits_ratio)
logger.info('Portfolio result:  %.2f', budget)

0
3
6
17
19
24
25
29
31
34
37
48
55
56
57
68
70
77
78
79
80
81
92
93
94
96
98
107
114
124
130
141
143
148
159
165
167
168
175
186
192
194
205
213
216
218
229
232
233
244
249
254
260
264
267
273
275
279
281
284
291
293
298
301
305
309
311
313
319
324
325
326
327
338
343
344
347
351
353
358
360
364
368
370
374
380
382
386
390
391
392
393
394
398
405
406
412
419
422
427
428
430
438
439
441
452
455
456
467
478
481
486
492
493
494
500
511
513
514
525
531
540
544
546
547
558
564
567
572
576
580
585
586
589
591
594
595
596
597
598
599
600
601
612
614
615
616
622
630
631
632
633
634
635
636
641
642
646
655
656
667
678
686
691
700
711
712
713
715
721
732
736
737
739
743
744
750
756
757
760
761
763
768
770
781
782
783
784
786
788
790
798
801
804
815
818
820
822
825
828
839
840
841
842
843
844
845
846
847
848
849
850
852
853
854
855
856
857
858
859
860
861
864
873
875
886
894
898
903
904
905
907
912
914
918
920
930
938
939
941
951
954
965
967
970
973
984
986
992
999
1000
1007
1014
1016
1026
1028


IndexError: index 0 is out of bounds for axis 0 with size 0

In [15]:
prediction

Unnamed: 0,index,datetime,open,close,high,low,y_pred,SMA_3,EMA_3,VAR_3,...,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos,cost,datetime_local,hour_local,prediction,trade
0,18678,2021-07-01 00:00:00,35059.71,34722.86,35071.34,34722.86,0,34876.606667,34820.805209,24742.590621,...,3,0.000000e+00,1.000000,1.224647e-16,-1.0,0.0,2021-07-01 00:00:00,00:00:00,0.437496,0.5
1,18679,2021-07-01 01:00:00,34742.43,34957.77,35039.0,34742.43,0,34905.953333,34889.287605,26720.849421,...,3,2.697968e-01,0.962917,1.224647e-16,-1.0,0.0,2021-07-01 01:00:00,01:00:00,0.372683,0.0
2,18680,2021-07-01 02:00:00,34933.7,34314.85,34940.87,34224.27,1,34665.160000,34602.068802,105833.499087,...,3,5.195840e-01,0.854419,1.224647e-16,-1.0,0.0,2021-07-01 02:00:00,02:00:00,0.546845,0.5
3,18681,2021-07-01 03:00:00,34323.34,34130.0,34419.21,34130.0,0,34467.540000,34366.034401,188786.470287,...,3,7.308360e-01,0.682553,1.224647e-16,-1.0,0.0,2021-07-01 03:00:00,03:00:00,0.589272,0.5
4,18682,2021-07-01 04:00:00,34116.87,34342.53,34343.58,34053.76,0,34262.460000,34354.282201,13350.784287,...,3,8.878852e-01,0.460065,1.224647e-16,-1.0,0.0,2021-07-01 04:00:00,04:00:00,0.539866,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3494,22172,2021-11-25 19:00:00,59170.62,58915.91,59309.25,58723.51,0,59097.673333,59020.969800,25738.502241,...,3,-8.878852e-01,0.460065,1.224647e-16,-1.0,0.0,2021-11-25 19:00:00,19:00:00,0.469894,0.5
3495,22173,2021-11-25 20:00:00,58910.1,58917.7,59149.98,58864.54,0,58997.060000,58969.334900,19323.396108,...,3,-7.308360e-01,0.682553,1.224647e-16,-1.0,0.0,2021-11-25 20:00:00,20:00:00,0.467702,0.5
3496,22174,2021-11-25 21:00:00,58895.83,58863.09,59109.68,58757.34,0,58898.900000,58916.212450,962.568108,...,3,-5.195840e-01,0.854419,1.224647e-16,-1.0,0.0,2021-11-25 21:00:00,21:00:00,0.455801,0.5
3497,22175,2021-11-25 22:00:00,58866.49,58937.88,58952.38,58586.64,0,58906.223333,58927.046225,1497.171441,...,3,-2.697968e-01,0.962917,1.224647e-16,-1.0,0.0,2021-11-25 22:00:00,22:00:00,0.449707,0.5


In [29]:
prediction['y_pred_2'] = [1 if y > 0 else 0 for y in prediction['y_pred']]

In [16]:
prediction.groupby(['y_pred','trade'])['close'].count()

y_pred  trade
0       0.0        82
        0.5      1416
        1.0       317
1       0.0        54
        0.5      1200
        1.0       430
Name: close, dtype: int64

In [24]:
(1316+525)/prediction.shape[0]

0.526150328665333

In [17]:
(82+430)/(82+317+54+430)

0.579841449603624

In [18]:
prediction.y_pred.mean()

0.4812803658188054

In [9]:
import pandas as pd

In [15]:
s = pd.Series([1, 2, 3])
s

0    1
1    2
2    3
dtype: int64

In [16]:
s.update(pd.Series(np.repeat(10, len(s))))

In [17]:
s

0    10
1    10
2    10
dtype: int64

In [18]:
a = pd.Series(data=np.repeat(10, len(s)), index=s.index, name='close')
a

0    10
1    10
2    10
Name: close, dtype: int64