In [1]:
import datetime
import os
import random
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
import tensorflow as tf
import mlflow.keras
import importlib
import joblib
from sklearn.metrics import roc_auc_score, f1_score

import ml_investing_wne.config as config
from ml_investing_wne.data_engineering.load_data import get_hist_data
from ml_investing_wne.data_engineering.prepare_dataset import prepare_processed_dataset
from ml_investing_wne.train_test_val_split import train_test_val_split
from ml_investing_wne.helper import confusion_matrix_plot, compute_profitability_classes
from ml_investing_wne.utils import get_logger

2022-08-22 14:40:58.889612: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-22 14:40:58.889666: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
seed = 12345
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [3]:
build_model = getattr(importlib.import_module('ml_investing_wne.cnn.{}'.format(config.model)),
                      'build_model')

# autlog ends run after keras.fit, hence all logging later will be added to another run. This is
# inconvenient, but the alternative is to manually create run and replicate functionalities of autlog
mlflow.tensorflow.autolog()

df = get_hist_data(currency=config.currency)
df = prepare_processed_dataset(df=df, features=False)
X, y, X_val, y_val, X_test, y_test, y_cat, y_val_cat, y_test_cat, train = train_test_val_split(df)




In [4]:
df.head()

Unnamed: 0,datetime,open,high,low,close,y_pred
0,2010-01-03 17:00:00,1.4832,1.4832,1.4822,1.4824,1
1,2010-01-03 18:00:00,1.4824,1.4836,1.4823,1.4834,0
2,2010-01-03 19:00:00,1.4833,1.4847,1.4807,1.4825,1
3,2010-01-03 20:00:00,1.4825,1.4831,1.4814,1.4829,0
4,2010-01-03 21:00:00,1.483,1.4832,1.4811,1.4824,0


In [5]:
X.shape

(62058, 96, 4)

In [6]:
early_stop = EarlyStopping(monitor='val_accuracy', patience=config.patience, restore_best_weights=True)
csv_logger = CSVLogger(os.path.join(config.package_directory, 'logs', 'keras_log.csv'), append=True,
                       separator=';')
callbacks = [early_stop, csv_logger]

In [7]:
model = build_model(input_shape=(config.seq_len, 4), head_size=64, num_heads=4, ff_dim=64,
                    num_transformer_blocks=4, mlp_units=[128], mlp_dropout=0.25, dropout=0.25)

2022-08-22 14:42:27.362564: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-08-22 14:42:27.362611: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-08-22 14:42:27.362634: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (python-20220811): /proc/driver/nvidia/version does not exist
2022-08-22 14:42:27.362880: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
history = model.fit(X, y_cat, batch_size=64, epochs=config.epochs, verbose=2,
                    validation_data=(X_val, y_val_cat), callbacks=callbacks)

2022/08/22 14:42:28 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'bbc6d10eac044278993961a66f9424c7', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow
2022-08-22 14:42:28.870302: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-08-22 14:42:28.870393: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2022-08-22 14:42:28.871102: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-08-22 14:42:29.353351: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/100


2022-08-22 14:42:34.600074: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-08-22 14:42:34.600135: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2022-08-22 14:42:35.063088: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2022-08-22 14:42:35.069495: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-08-22 14:42:35.081602: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: /tmp/tmp9ex32rh5/train/plugins/profile/2022_08_22_14_42_35

2022-08-22 14:42:35.087961: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to /tmp/tmp9ex32rh5/train/plugins/profile/2022_08_22_14_42_35/python-20220811.trace.json.gz
2022-08-22 14:42:35.101539: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: /tmp/tmp9ex32rh5/train/plugins/profile/2022_08_22_

970/970 - 420s - loss: 0.7089 - accuracy: 0.4960 - val_loss: 0.6932 - val_accuracy: 0.4947
Epoch 2/100
970/970 - 403s - loss: 0.6934 - accuracy: 0.5035 - val_loss: 0.6932 - val_accuracy: 0.5053
Epoch 3/100
970/970 - 403s - loss: 0.6931 - accuracy: 0.5065 - val_loss: 0.6932 - val_accuracy: 0.5053
Epoch 4/100
970/970 - 403s - loss: 0.6931 - accuracy: 0.5058 - val_loss: 0.6931 - val_accuracy: 0.5053
Epoch 5/100
970/970 - 405s - loss: 0.6933 - accuracy: 0.5049 - val_loss: 0.6931 - val_accuracy: 0.5053
Epoch 6/100
970/970 - 407s - loss: 0.6931 - accuracy: 0.5040 - val_loss: 0.6931 - val_accuracy: 0.5053
Epoch 7/100
970/970 - 402s - loss: 0.6931 - accuracy: 0.5057 - val_loss: 0.6931 - val_accuracy: 0.5053
Epoch 8/100
970/970 - 403s - loss: 0.6931 - accuracy: 0.5054 - val_loss: 0.6931 - val_accuracy: 0.5053
Epoch 9/100
970/970 - 404s - loss: 0.6933 - accuracy: 0.5031 - val_loss: 0.6931 - val_accuracy: 0.5053
Epoch 10/100
970/970 - 404s - loss: 0.6931 - accuracy: 0.5045 - val_loss: 0.6931 - va

2022-08-22 16:37:20.682430: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /tmp/tmp0_3wxao2/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmp0_3wxao2/model/data/model/assets
