In [1]:
import mlflow
import mlflow.keras
from numpy import ndarray
import pandas as pd
import sys
import tensorflow as tf

%load_ext lab_black

sys.path.append("/Users/renero/Documents/SideProjects/trader/src")
sys.path.append("/Users/renero/Documents/SideProjects/trader/src/predictor")
sys.path.append("/Users/renero/Documents/SideProjects/trader/src/utils")

from cs_dictionary import CSDictionary
from cs_nn import CS_NN
from sequences import sequences
from ticks import Ticks
from metrics import metrics

  class IteratorBase(collections.Iterator, trackable.Trackable,
  class DatasetV2(collections.Iterable, tracking_base.Trackable,


In [2]:
argv = [
    "",
    "-c",
    "../src/predictor/params.dax.yaml",
    "-f",
    "../data/^GDAXI/dax_2018_2019.csv",
    "--window",
    "8",
    "--epochs",
    "400",
    "train",
]
params = CSDictionary(args=argv)

  and should_run_async(code)


2020-09-02 14:42:17 - INFO  - CSDictionary:__init__           - Using configuration parameters from: ../src/predictor/params.dax.yaml


# Load the data

In [3]:
ticks = Ticks(params, params.input_file, scale=True)

X_train, y_train, X_test, y_test = sequences.prepare(
    ticks.data, timesteps=params.window_size, test_size=params.test_size
)
print(f"X_train {X_train.shape}, y_train{y_train.shape}")
print(f"X_test {X_test.shape}, y_test{y_test.shape}")

X_train (424, 8, 4), y_train(424, 1)
X_test (48, 8, 4), y_test(48, 1)


# Create the LSTM

In [4]:
def build_model(
    params: CSDictionary, num_features: int, num_target_labels: int):
    tf.random.set_seed(params.seed)
    nn = CS_NN(params, None, "move")
    nn.build_model(
        params.window_size, num_features, num_target_labels, params.summarize)
    return nn


def do_train(
    experiment_id: int,
    params: CSDictionary,
    nn: CS_NN,
    X_train: ndarray,
    y_train: ndarray,
    X_test: ndarray,
    y_test: ndarray,
):
    with mlflow.start_run(experiment_id=experiment_id):
        mlflow.keras.autolog()
        nn.train(X_train, y_train)
        mlflow.log_params(params)

        yhat = nn.predict(X_test)
        params.log.info(f"Predictions (yhat): {yhat.shape}")

        n_predictions = int(X_test.shape[0])
        params.log.info(f"n_predictions: {n_predictions}")

        Y = y_test.reshape(n_predictions,)
        Yhat = yhat.reshape(n_predictions,)
        results = pd.DataFrame({"y": Y, "yhat": Yhat,}).round(2)
        me = metrics.mean_error(results)
        tp = metrics.trend_performance(results)
        # mlflow.log_param("tf_seed", params.seed)
        mlflow.log_metric("trend_perf", tp)
        params.log.info(f"Mean Err.: {me:5.3f}, Trend perf.: {tp:4.2f}")

    return results

  and should_run_async(code)


# Call this for single step training

In [5]:
# rnn = build_model(
#     params, num_features=X_train.shape[2], num_target_labels=y_train.shape[1]
# )
# result = do_train(params, rnn, X_train, y_train, X_test, y_test)

# Hyper parameter search (grid)

In [10]:
experiment_id = mlflow.create_experiment('grid_search')
for i in range(4):
    ts = 2**i
    for j in range(4):
        bs = 2**j
        params.batch_size=bs
        params.window_size=ts

        X_train, y_train, X_test, y_test = sequences.prepare(ticks.data, timesteps=params.window_size, test_size=params.test_size)
        params.log.info(f"X_train {X_train.shape}, y_train{y_train.shape}")
        params.log.info(f"X_test {X_test.shape}, y_test{y_test.shape}")
        params.log.info(
            f'w_size: {params.window_size}; batch_size: {params.batch_size}')

        rnn = build_model(
            params, 
            num_features=X_train.shape[2], 
            num_target_labels=y_train.shape[1]
        )
        result = do_train(experiment_id, params, rnn, X_train, y_train, X_test, y_test)

2020-09-02 14:43:16 - INFO  - NA:<module>                     - X_train (431, 1, 4), y_train(431, 1)
2020-09-02 14:43:16 - INFO  - NA:<module>                     - X_test (48, 1, 4), y_test(48, 1)
2020-09-02 14:43:16 - INFO  - NA:<module>                     - w_size: 1; batch_size: 1
2020-09-02 14:46:35 - INFO  - NA:do_train                     - Predictions (yhat): (48, 1)
2020-09-02 14:46:35 - INFO  - NA:do_train                     - n_predictions: 48
2020-09-02 14:46:35 - INFO  - NA:do_train                     - Mean Err.: 0.091, Trend perf.: 0.48
2020-09-02 14:46:35 - INFO  - NA:<module>                     - X_train (431, 1, 4), y_train(431, 1)
2020-09-02 14:46:35 - INFO  - NA:<module>                     - X_test (48, 1, 4), y_test(48, 1)
2020-09-02 14:46:35 - INFO  - NA:<module>                     - w_size: 1; batch_size: 2
2020-09-02 14:48:58 - INFO  - NA:do_train                     - Predictions (yhat): (48, 1)
2020-09-02 14:48:58 - INFO  - NA:do_train                   

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 19)

# Plot results

In [None]:
# metrics.plot_and_compare(result)