In [1]:
import datetime
import os
import random
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
import tensorflow as tf
import mlflow.keras
import importlib
import joblib
from sklearn.metrics import roc_auc_score, f1_score

import ml_investing_wne.config as config
from ml_investing_wne.data_engineering.load_data import get_hist_data
from ml_investing_wne.data_engineering.prepare_dataset import prepare_processed_dataset
from ml_investing_wne.train_test_val_split import train_test_val_split
from ml_investing_wne.helper import confusion_matrix_plot, compute_profitability_classes
from ml_investing_wne.utils import get_logger

seed = 12345
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
                   
logger = get_logger()

2022-10-22 11:15:46.311946: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-10-22 11:15:46.311987: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
df = get_hist_data(currency=config.currency)
df = prepare_processed_dataset(df=df)
X, y, X_val, y_val, X_test, y_test, y_cat, y_val_cat, y_test_cat, train = train_test_val_split(df)


2022-10-22 11:16:18,861 - ml_investing_wne.data_engineering.prepare_dataset - exported to /home/jupyter/ml_investing_wne/src/ml_investing_wne/data/processed/EURCHF/EURCHF_processed_720min.csv
2022-10-22 11:16:18,919 - ml_investing_wne.train_test_val_split - first sequence begins: 2010-01-24 12:00:00
2022-10-22 11:16:18,920 - ml_investing_wne.train_test_val_split - first sequence ends: 2010-03-25 00:00:00
2022-10-22 11:16:18,945 - ml_investing_wne.train_test_val_split - last sequence begins: 2019-10-30 12:00:00
2022-10-22 11:16:18,947 - ml_investing_wne.train_test_val_split - last sequence ends: 2019-12-30 12:00:00
2022-10-22 11:16:19,001 - ml_investing_wne.train_test_val_split - first sequence begins: 2019-11-13 12:00:00
2022-10-22 11:16:19,002 - ml_investing_wne.train_test_val_split - first sequence ends: 2020-01-14 00:00:00
2022-10-22 11:16:19,006 - ml_investing_wne.train_test_val_split - last sequence begins: 2020-10-29 12:00:00
2022-10-22 11:16:19,006 - ml_investing_wne.train_test_

In [3]:
model = load_model(os.path.join(config.package_directory, 'models', 'production',
                        '{}_{}_{}_{}_{}_{}'.format(config.model, 'hist_data',
                                                   config.currency, config.freq,
                                                   str(config.steps_ahead),
                                                   config.seq_len)))

2022-10-22 11:16:23.484972: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-10-22 11:16:23.485027: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-10-22 11:16:23.485066: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (python-20220811): /proc/driver/nvidia/version does not exist
2022-10-22 11:16:23.485416: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
model.evaluate(X_val, y_val_cat)

2022-10-22 11:16:49.532274: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)




[0.6796161532402039, 0.5960144996643066]

In [5]:
model.evaluate(X_test, y_test_cat)



[0.6759060621261597, 0.6000000238418579]

In [6]:
df.head(2)

Unnamed: 0,datetime,open,high,low,close,y_pred,SMA_3,EMA_3,VAR_3,SMA_5,...,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,roc_1,hour,weekday,hour_sin,hour_cos,weekday_sin,weekday_cos
0,2010-01-24 12:00:00,1.4724,1.474,1.4714,1.4715,0,1.472133,1.471879,7.233333e-07,1.47142,...,1.473481,0.280094,0.519411,0.998914,12,6,-2.449294e-16,1.0,-2.449294e-16,1.0
1,2010-01-25 00:00:00,1.4716,1.4733,1.4712,1.4715,0,1.472033,1.471689,8.533333e-07,1.47162,...,1.473468,0.251126,0.467529,1.0,0,0,0.0,1.0,0.0,1.0


In [7]:
# this part is for backtesting

In [8]:
start_date = joblib.load(os.path.join(config.package_directory, 'models',
                                      'first_sequence_ends_{}_{}_{}.save'.format('test',
                                                                                 config.currency,
                                                                                 config.freq)))
end_date = joblib.load(os.path.join(config.package_directory, 'models',
                                    'last_sequence_ends_{}_{}_{}.save'.format('test',
                                                                              config.currency,
                                                                              config.freq)))
print(start_date, end_date)

2021-01-15 12:00:00 2021-12-24 12:00:00


In [9]:
df['change'] = df['close'].shift(-config.steps_ahead) / df['close'] - 1
prediction = df.loc[(df.datetime >= start_date) & (df.datetime <= end_date), ['datetime', 'open', 'high', 'low', 'close', 'y_pred', 'change']]
prediction.shape

(540, 7)

In [10]:
y_pred = model.predict(X_test)
y_pred_class = y_pred.argmax(axis=-1)

In [11]:
prediction['prediction_1'] = y_pred[:,1]

In [24]:
prediction.to_csv(os.path.join(config.processed_data_path, config.currency,
                               'Backtest_{}_{}.csv'.format(config.freq, config.model)), index=False)