In [None]:
using Random

Random.seed!(1234);

In [None]:
DATA_STORE = "../data/assets.h5";

## Create a stock return series to predict asset price moves

To develop our trading strategy, we use the daily stock returns for some 995 US stocks for the eight year period from 2010 to 2017, and the features developed in Chapter 12 that include volatility and momentum factors as well as lagged returns with cross-sectional and sectoral rankings.

In [None]:
results_path = "results";

if !isdir(results_path)
    mkdir(results_path);
end 

pwd()

In [None]:
using DataFrames, CSV;

input = "data.csv";
data = DataFrame(CSV.File(input));
# data = data[1:10000, :];

In [None]:
outcomes = names(data[:, r"fwd"]);
outcomes

In [None]:
outcome= "r01_fwd";

In [None]:
X_cv = data[:, Not(outcomes)];
y_cv = data[:, ["symbol", "date", outcome]];

In [None]:
describe(data)

In [None]:
# unique(X_cv[:, "symbol"])
X_cv[:, "symbol"] |> unique |> length

## Automate model generation

The following `make_model` function illustrates how to flexibly define various architectural elements for the search process. The dense_layers argument defines both the depth and width of the network as a list of integers. We also use dropout for regularization, expressed as a float in the range [0, 1] to define the probability that a given unit will be excluded from a training iteration.

In [None]:
using Flux


function make_model(dense_layers, input_dim, activation, dropout)
    # '''
    # input_dim: X_cv.shape[1]

    # Creates a multi-layer perceptron model

    # dense_layers: List of layer sizes; one number per layer
    # '''

    dense1_input_dim = input_dim;
    dense1_output_dim = dense_layers[1];
    dense1 = Flux.Dense(
        dense1_input_dim,
        dense1_output_dim,
        activation);
    
    dense2_input_dim = dense1_output_dim;
    dense2_output_dim = dense_layers[2];
    dense2 = Flux.Dense(
        dense2_input_dim,
        dense2_output_dim,
        activation);

    dropout1 = Flux.Dropout(dropout);
    dense3 = Flux.Dense(
        dense2_output_dim,
        1,
        sigmoid);

    model = Flux.Chain(
        dense1,
        dense2,
        dropout1,
        dense3);

    return model;

end;

### ML Flow Experiment Tracking

MLflow is a platform to streamline machine learning development, including tracking experiments, packaging code into reproducible runs, and sharing and deploying models. MLflow offers a set of lightweight APIs that can be used with any existing machine learning application or library (TensorFlow, PyTorch, XGBoost, etc), wherever you currently run ML code (e.g. in notebooks, standalone applications or the cloud)

In [None]:
using PyCall

mlflow = pyimport("mlflow")

MLF_EXPERIMENT_NAME = "Optimizing a NN Architecture For Trading"
MLF_EXPERIMENT_ID = 0

try
    MLF_EXPERIMENT_ID = mlflow.get_experiment_by_name(MLF_EXPERIMENT_NAME).experiment_id
catch e
    MLF_EXPERIMENT_ID = mlflow.create_experiment(MLF_EXPERIMENT_NAME)
end

mlflow.set_experiment(experiment_id=MLF_EXPERIMENT_ID)

### Define CV Parameters

Now we just need to define our Keras classifier using the make_model function, set cross-validation (see chapter 6 on The Machine Learning Process and following for the OneStepTimeSeriesSplit), and the parameters that we would like to explore. 

We pick several one- and two-layer configurations, relu and tanh activation functions, and different dropout rates. We could also try out different optimizers (but did not run this experiment to limit what is already a computationally intensive effort):

In [None]:
dense_layer_opts = [(16, 8), (32, 16), (32, 32), (64, 32)];
activation_opts = [Flux.tanh];
dropout_opts = [0, .1, .2];
batch_size = [64, 256];

In [None]:
using IterTools, Random

param_grid = IterTools.product(
    dense_layer_opts,
    activation_opts,
    dropout_opts,
    batch_size,
) |> collect;

parameters_grid = Random.shuffle(param_grid);

In [None]:
predict(model, x) = map(x -> x[1] - 1, argmax(model(x), dims=1));
optimizer = Flux.RMSProp();
epochs = 10;

### Training Process

In [None]:
using Flux, StatsBase, MLUtils, MLBase

function performance_evaluation_dict(
    ŷ,
    y,
    phase,
)
    ŷ, y = vec(ŷ), vec(y);

    error_rate = MLBase.errorrate(ŷ, y);

    accuracy = MLBase.correctrate(ŷ, y);

    loss = Flux.binarycrossentropy(ŷ, y);

    roc_nums::ROCNums = MLBase.roc(ŷ, y);

    fpr = MLBase.false_positive_rate(roc_nums);
    fnr = MLBase.false_negative_rate(roc_nums);

    f1_score = MLBase.f1score(roc_nums);

    metrics_dict = Dict(
        "$phase error_rate" => error_rate,
        "$phase accuracy" => accuracy,
        "$phase loss" => loss,
        "$phase fpr" => fpr,
        "$phase fnr" => fnr,
        "$phase f1_score" => f1_score,
    );

    metrics_dict
end;

In [None]:
function train_loop(train_data, model, optimiser, batch_size)
    Flux.trainmode!(model)

    for (x, y) ∈ eachobs(train_data, batchsize=batch_size)
        # ... train supervised model on minibatches here
        grads = gradient(Flux.params(model)) do
            training_loss = Flux.binarycrossentropy(model(x), y)

            # Code inserted here will be differentiated, unless you need that gradient information
            # it is better to do the work outside this block.

            return training_loss
        end

        # Insert whatever code you want here that needs training_loss, e.g. logging.
        # logging_callback(training_loss)
        # Insert what ever code you want here that needs gradient.
        # E.g. logging with TensorBoardLogger.jl as histogram so you can see if it is becoming huge.

        Flux.update!(optimiser, Flux.params(model), grads)

        # Here you might like to check validation set accuracy, and break out to do early stopping.
    end

    X, Y = train_data

    Y = map(x -> x[1] - 1, argmax(Y, dims=1))
    Ŷ = predict(model, X)

    Y, Ŷ
end;

function test_loop(test_data, model, optimiser, batch_size)
    Flux.testmode!(model)

    for (x, y) ∈ eachobs(test_data, batchsize=batch_size)
        # # ... train supervised model on minibatches here
        # grads = gradient(Flux.params(model)) do
        #     training_loss = Flux.binarycrossentropy(model(x), y)

        #     # Code inserted here will be differentiated, unless you need that gradient information
        #     # it is better to do the work outside this block.

        #     return training_loss
        # end

        # # Insert whatever code you want here that needs training_loss, e.g. logging.
        # # logging_callback(training_loss)
        # # Insert what ever code you want here that needs gradient.
        # # E.g. logging with TensorBoardLogger.jl as histogram so you can see if it is becoming huge.

        # Flux.update!(optimiser, Flux.params(model), grads)

        # # Here you might like to check validation set accuracy, and break out to do early stopping.
    end

    X, Y = test_data;

    Y = map(x -> x[1] - 1, argmax(Y, dims=1));

    Y, Ŷ;
end;

### Train-Test Split

We split the data into a training set for cross-validation, and keep the last 12 months with data as holdout test:

In [None]:
n_splits = 12;
train_period_length = 21 * 12 * 4;
test_period_length = 21 * 3;

In [None]:
using ResumableFunctions

struct MultipleTimeSeriesCV

    n_splits::Int
    train_pl::Int
    test_pl::Int
    lookahead
    date_idx::String
    shuffle::Bool

    MultipleTimeSeriesCV(
        n_splits::Int,
        train_pl::Int,
        test_pl::Int) = new(n_splits, train_pl, test_pl, 0, "date", true)

end

get_rows(data) = rownumber.(eachrow(data));

@resumable function split(
    cv::MultipleTimeSeriesCV,
    X,
    y=nothing)

    n_splits = cv.n_splits
    lookahead = cv.lookahead
    test_length = cv.test_pl
    train_length = cv.train_pl
    shuffle = cv.shuffle
    date_idx = cv.date_idx

    unique_dates = X[:, "date"] |> unique;
    days = sort(unique_dates, rev=true);

    split_idx = [];
    for i ∈ 1:n_splits
        test_end_idx = i * test_length;
        test_start_idx = test_end_idx + test_length;
        train_end_idx = test_start_idx + lookahead - 1;
        train_start_idx = train_end_idx + train_length + lookahead - 1;

        push!(split_idx, (train_start_idx, train_end_idx, test_start_idx, test_end_idx));
    end

    dates = X[:, [date_idx]];

    for (train_start, train_end, test_start, test_end) ∈ split_idx
        mask = (dates[:, date_idx] .> days[train_start]) .& (dates[:, date_idx] .< days[train_end]);
        train_idx = get_rows(dates[mask, :]);

        mask = (dates[:, date_idx] .> days[test_start]) .& (dates[:, date_idx] .< days[test_end]);
        test_idx = get_rows(dates[mask, :]);

        if shuffle Random.shuffle(train_idx) end

        @yield (train_idx, test_idx)
    end
end;

function get_n_splits(cv)
    cv.n_splits
end;

cv = MultipleTimeSeriesCV(
    n_splits,
    train_period_length,
    test_period_length,
);

### BSON Model Saving

In [None]:
using BSON

In [None]:
ic = DataFrame();
mlflow.end_run()

for parameters ∈ parameters_grid
    dense_layers, activation, dropout, batch_size = parameters

    start_time = time()
    for (fold, (train_data_idx, val_data_idx)) ∈ enumerate(split(cv, X_cv))

        mlflow.start_run(run_name="Fold $(fold)");

        index_columns = ["symbol", "date"]

        x_train, y_train = X_cv[train_data_idx, Not(index_columns)] |> Matrix, y_cv[train_data_idx, :]
        x_val, y_val = X_cv[val_data_idx, Not(index_columns)] |> Matrix, y_cv[val_data_idx, :]

        preds = rename(y_val, "r01_fwd" => "actual")

        r = DataFrames.combine(DataFrames.groupby(y_val, "date"), "date" .=> length)

        input_dim = size(x_train, 2)

        model = make_model(
            dense_layers,
            input_dim,
            activation,
            dropout,
        )

        indices_train = y_train[:, index_columns]
        indices_val = y_val[:, index_columns]

        y_train = y_train[:, Not(index_columns)] |> Matrix
        y_val = y_val[:, Not(index_columns)] |> Matrix

        # Note that Transpose is needed to make the model fit the data.
        train_data = (x_train', y_train')
        train_loader = DataLoader(train_data, batchsize=batch_size)
        for epoch = 1:10
            print("Epoch: $epoch, ");
            
            evalcb = Flux.throttle(15) do
                # Show loss
                BSON.@save "model-checkpoint.bson" model
            end

            loss(x, y) = Flux.mse(model(x), y);
            Flux.train!(loss, Flux.params(), train_loader, optimiser, cb=evalcb);
            ŷ_val = model(x_val');

            preds[:, "$epoch"] = vec(ŷ_val');

            x = DataFrames.combine(
                DataFrames.groupby(preds, "date"),
                ["actual", "$epoch"] => ((a, e) -> (StatsBase.corspearman(a, e))) => "$epoch");

            r[:, "$epoch"] = x[:, "$epoch"];
        end

        r[:, "dense_layers"] .= string(dense_layers);
        r[:, "activation"] .= string(activation);
        r[:, "dropout"] .= dropout;
        r[:, "batch_size"] .= batch_size;
        r[:, "fold"] .= fold;
        
        append!(ic, r);

        mlflow.end_run();
    end
    end_time = time();

    process_time = end_time - start_time;
    print("$process_time \n");
end

output = "scores.csv";
CSV.write(output, ic);