In [3]:
from __future__ import print_function
import pandas as pd
import numpy as np
import os
import time
import h5py
import helper as hl
from sklearn.externals import joblib
from tensorflow.python.keras.layers import Input, GRU, Dense, Concatenate, TimeDistributed
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras import backend as T
import configparser
import helper as hl
import tensorflow as tf
from tensorflow import set_random_seed
import warnings
np.random.seed(27)
set_random_seed(27)
pd.set_option('display.max_colwidth', 1500)
pd.set_option('display.notebook_repr_html', True)
pd.set_option('display.width', 900)
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [4]:
tf.VERSION

'1.13.1'

In [5]:
class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',
                                   trainable=True)
        
        # Be sure to call this at the end
        super(AttentionLayer, self).build(input_shape)

    def call(self, inputs, verbose=False):
        
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            assert_msg = "States must be a list. However states {} is of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            batch_size = encoder_out_seq.shape[0]
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch_size*en_seq_len, latent_dim
            reshaped_enc_outputs = T.reshape(encoder_out_seq, (batch_size * en_seq_len, en_hidden))
            # <= batch_size*en_seq_len, latent_dim
            W_a_dot_s = T.reshape(T.dot(reshaped_enc_outputs, self.W_a), (batch_size, en_seq_len, en_hidden))
            if verbose:
                print('wa.s>',W_a_dot_s.shape)

            """ Computing hj.Ua """
            U_a_dot_h = T.expand_dims(T.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>',U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            reshaped_Ws_plus_Uh = T.tanh(T.reshape(W_a_dot_s + U_a_dot_h, (batch_size * en_seq_len, en_hidden)))
            if verbose:
                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = T.reshape(T.dot(reshaped_Ws_plus_Uh, self.V_a), (batch_size, en_seq_len))
            # <= batch_size, en_seq_len
            e_i = T.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """
            # <= batch_size, hidden_size
            c_i = T.sum(encoder_out_seq * T.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        # We are not using initial states, but need to pass something to T.rnn funciton
        fake_state_c = T.zeros(shape=(encoder_out_seq.shape[0], encoder_out_seq.shape[-1]))
        fake_state_e = T.zeros(shape=(encoder_out_seq.shape[0], encoder_out_seq.shape[1]))

        """ 
        Computing energy outputs
        """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = T.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )

        """
        Computing context vectors
        """
        last_out, c_outputs, _ = T.rnn(
            context_step, e_outputs, [fake_state_c],
        )

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]

In [6]:
class Fit():

    def __init__(self):

        # Path to options file.
        self.path_options = 'options.ini'

        # Load config file.
        config = configparser.ConfigParser()                                             # define config object.
        config.optionxform = str                                                         # hold keys register
        config.read(self.path_options)                                                   # read config file.

        # Path variables.
        self.path_data = config['PATH'].get('DataObj')                                   # address: data object
        self.path_alphas = config['PATH'].get('Alphas')                                  # address: alphas
        self.path_events = config['PATH'].get('Events')                                  # address: events
        self.path_tensors = config['PATH'].get('Tensors')                                # address: tensors
        self.path_positions = config['PATH'].get('PositionsObj')                         # address: predictions

        # Load data object
        self.data = joblib.load(self.path_data)                                          # data object

        # Internal variables.
        self.alpha_storage = None                                                        # alpha model storage
        self.beta_storage = None                                                         # beta model storage
        self.selected_alphas = None                                                      # selected alphas
        self.start_transfer_learning = False                                             # start tf mark
        self.freeze_control = 0                                                          # layer freeze control

    # Fn: (1)
    # Model is trained on each refit day and
    # then predictions are made.
    def run_model(self):

        # Load config file.
        config = configparser.ConfigParser()                                             # define config object.
        config.optionxform = str                                                         # hold keys register
        config.read(self.path_options)                                                   # read config file.

        # Create positions folder is it doesn`t exist.
        if not os.path.exists(self.path_positions.replace('positions.pickle', '')):
            os.makedirs(self.path_positions.replace('positions.pickle', ''))

        print('')
        print('Fit model:')
        """        
        ----------------------------------------------------------------------------------------------------------                                                     
                                                         DATA
                                        Load all necessary data variables and 
                                                    model parameters.                                                                                                          
        ----------------------------------------------------------------------------------------------------------
        """
        # ---------------------------
        #            DATA
        # ----------------------------
        # Dates. arr.
        dates = self.data['dates']
        # Industry data. arr.
        industry = self.data['industry.sector']
        # Selected alphas. arr.
        alphas_list = os.listdir(self.path_alphas)
        events_list = os.listdir(self.path_events)
        # Data holders sizes. val.
        numstocks = self.data['numstocks']
        numdates = self.data['numdates']
        print('Days available: {}'.format(len(dates)))

        # -------------------------------
        #          PARAMETERS
        # -------------------------------

        # General
        lookback = 200                                # days for training
        fit_startdate = config['DEFAULT'].getint('FitStartDate')                        # {20100104: 20180205}
        delay = config['DEFAULT'].getint('Delay')                                       # delay days
        refit_freq = config['DEFAULT'].getint('RefitFreq')                              # refit frequency

        # Target value
        ndays = config['FIT'].getint('TargetNDays')                                     # return mean value
        ret_cap = config['FIT'].getfloat('TargetRetCap')                                # return cap.
        min_target = config['FIT'].getfloat('TargetMinValOr')                           # target values min cap
        min_target_z = config['FIT'].getfloat('TargetMinValZc')                         # target values max cap
        industry_balancing = config['FIT'].getboolean('TargetIndBalancing')             # perform industry balancing

        # Alpha adjustment
        min_alpha = config['FIT'].getfloat('AlphaMin')                                  # alpha values min cap
        max_alpha = config['FIT'].getfloat('AlphaMax')                                  # alpha values max cap

        # Model
        loss_adj = config['FIT'].getfloat('ModelLossAdj')                               # stock loss fn. adj
        reg_ffn = config['FIT'].getfloat('ModelRegFFN')                                 # reg. ffn
        reg_conv = config['FIT'].getfloat('ModelRegConvNet')                            # reg. con
        reg_gru = config['FIT'].getfloat('ModelRegGRU')                                 # reg. rnn
        layer_capacity = config['FIT'].getint('ModelLayerCapacity')                     # neurons n. resnet
        n_layer_1 = config['FIT'].getint('ModelNeuronsLayer1')                          # neurons n. 1-st layer
        n_layer_2 = config['FIT'].getint('ModelNeuronsLayer2')                          # neurons n. 2-nd layer
        deep_features_n = config['FIT'].getint('ModelDeepFeaturesNumber')               # deep features number
        ch_layer_1 = config['FIT'].getint('ModelChannelLayer1')                         # channels n. 1-st layer
        ch_layer_2 = config['FIT'].getint('ModelChannelLayer2')                         # channels n. 2-nd layer
        ch_layer_3 = config['FIT'].getint('ModelChannelLayer3')                         # channels n. 3-rd layer
        ch_reduction = config['FIT'].getint('ModelChannelSpatialReduction')             # ch. n. spatial reduction
        kernel_conv1d = config['FIT'].getint('ModelKernelConv1D')                       # conv1d kernel size
        kernel_conv2d = tuple([np.int(i) for i in                                       # conv2d kernel size
                               config['FIT'].get('ModelKernelConv2D').split(',')])      # .......
        dropout_rate = config['FIT'].getfloat('ModelDropout')                           # dropout rate
        leaky_slope = config['FIT'].getfloat('ModelLeakySlope')                         # activation fn. slope
        learning_rate = config['FIT'].getfloat('ModelLearningRate')                     # learning rate
        epochs_n_general = config['FIT'].getint('ModelEpochsNumberGeneral')             # epochs n. general fit
        epochs_n_fn = config['FIT'].getint('ModelEpochsNumberFineTuning')               # epochs n. fine tuning
        batches_n = config['FIT'].getint('ModelBatchesNumber')                          # batches n.
        rnn_cells = [np.int(i) for i in                                                 # rnn cells
                     config['FIT'].get('ModelRNNLayers').split(',')]                    # .......
        mc_sampling_n = config['FIT'].getint('ModelMCDropoutSamplingNumber')            # sampling n.
        ret_sizing = config['FIT'].getfloat('ModelPredictionReturnSizing')              # total return check
        ret_adj = config['FIT'].getfloat('ModelPredictionReturnAdj')                    # adjust t+1 day return
        s2s_weight = config['FIT'].getfloat('ModelSeq2SeqWeight')                       # seq2seq model weight
        am_weight = config['FIT'].getfloat('ModelAlphaWeight')                          # alpha model weight

        # D-SAE
        dae_learning_rate = config['FIT'].getfloat('DAELearningRate')                   # autoencoder lr.
        dae_epochs = config['FIT'].getint('DAEEpochsNumber')                            # autoencoder epochs n.
        dae_reg = config['FIT'].getfloat('DAEReg')                                      # autoencoder reg. kernel
        dae_batch_n = config['FIT'].getint('DAEBatchesNumber')                          # autoencoder batches n.
        dae_layer_1 = config['FIT'].getint('DAENeuronsLayer1')                          # autoencoder neurons n. 1st
        dae_features = config['FIT'].getint('DAEFeaturesNumber')                        # autoencoder features

        # Tensor
        tensor_or_periods = config['TENSOR'].getint('TensorOrTimesteps')                # tensor or. timesteps
        tensor_sc_periods = config['TENSOR'].getint('TensorScTimesteps')                # tensor sc. timesteps
        tensor_sc_comps = config['TENSOR'].getint('TensorScComponents')                 # tensor sc. components

        # Other
        adv_period = config['FIT'].getint('ADVPeriod')                                  # adv mean for n. days
        scaler_original = config['FIT'].getint('ScalerOriginal')                        # scaler original data
        bayesian_ridge_iterations = config['FIT'].getint('BayesianRidgeIter')           # bayesian ridge iter.
        gradient_boosting_estimators = config['FIT'].getint('GBoostingEstimators')      # g. boosting estimators
        gradient_boosting_lr = config['FIT'].getfloat('GBoostingLr')                    # g. boosting lr.
        gradient_boosting_min_imp = config['FIT'].getint('GBoostingMinImportance')      # g. boosting min. imp.

        # Transfer learning
        activate_tf = config['FIT'].getboolean('ModelActivateTransferLearning')         # transfer learning activation
        epochs_n_tf = config['FIT'].getint('ModelEpochsNumberTransferLearning')         # epochs n. transfer lr.
        freeze_period = config['FIT'].getint('ModelLayersFreezePeriod')                 # layers freeze period
        alpha_freeze = ['dense_8', 'dense_11', 'conv1d_1']                              # freeze layers (conv1d_2?)
        beta_freeze = ['dense_17', 'dense_20', 'conv2d_1']                              # freeze layers (conv2d_2?)

        # Refit mask. arr.
        refit_dates_mask = hl.fn_refit_mask(dates, fit_startdate, refit_freq)
        # ---------------------------------
        fit_start_idx = np.where(self.data['dates'] > fit_startdate)[0]
        assert (fit_start_idx[0] - lookback - 10 > 0), 'Fit start date is not correct, please select later date.'
        assert (len(fit_start_idx) > 0), 'Fit start date is not correct, please select earlier date.'

        # -------------------------------
        #           VARIABLES
        # -------------------------------

        # TARGET: original returns matrix
        ret_mat = hl.handle_returns(self.data['close'], ndays, ret_cap,
                                    min_target, industry_balancing, industry)
        target_1 = hl.ts_delay(ret_mat, -ndays - delay)
        target_2 = hl.ts_delay(ret_mat, -ndays - delay - 1)
        target_3 = hl.ts_delay(ret_mat, -ndays - delay - 2)

        # TARGET: zscored returns matrix
        ret_mat_z = hl.handle_returns_z(self.data['close'], ndays, ret_cap,
                                        min_target_z, industry_balancing, industry)
        target_z = hl.ts_delay(ret_mat_z, -ndays - delay)

        # REGIME: average market return
        close = self.data['close']
        ret_average = (close - hl.ts_delay(close, ndays)) / hl.ts_delay(close, ndays)
        ret_average = np.nanmean(ret_average, axis=0)
        regime_mat = np.zeros((close.shape[0], close.shape[1]), dtype=np.float32)
        for i in range(regime_mat.shape[1]):
            regime_mat[:, i] = ret_average[i]
        regime_mat = hl.nan_to_zero(regime_mat)
        regime_mat = (regime_mat > 0) * 1.0 + (regime_mat < 0) * (-1.0)

        # LIQUIDITY: ranked average trading volume
        adv_mat = hl.ts_mean(self.data['close'] * self.data['volume'], adv_period)
        adv_mat = hl.cs_rank(hl.zero_to_nan(adv_mat))
        adv_mat = hl.nan_to_zero(adv_mat)
        adv_mat[adv_mat == 0] = 1

        # TENSOR (original): time series data holder.
        # available tensors (open, high, low, close)
        tensor_fields_original = ['open', 'high', 'low', 'close']
        tensor_data_original = np.empty((numstocks, numdates,
                                         tensor_or_periods, len(tensor_fields_original)))
        for ii, field in enumerate(tensor_fields_original):
            with h5py.File('data/pp_data/tensors/' + field + '_original.h5', 'r') as hf:
                tensor_data_original[:, :, :, ii] = hf[field + '_original'][:]

        # TENSOR (wavelet): time series data holder.
        # available tensors (open, high, low, close, adv)
        tensor_fields_wavelet = ['open', 'high', 'low', 'close']
        tensor_data_wavelet = np.empty((numstocks, numdates,
                                        tensor_sc_periods, tensor_sc_comps,
                                        len(tensor_fields_wavelet)))
        for jj, field in enumerate(tensor_fields_wavelet):
            with h5py.File('data/pp_data/tensors/' + field + '_wavelet.h5', 'r') as hf:
                tensor_data_wavelet[:, :, :, :, jj] = hf[field + '_wavelet'][:]

        # Delete config.
        del config

        # Data size
        data_size = (target_1.nbytes + target_2.nbytes + target_3.nbytes + target_z.nbytes
                     + adv_mat.nbytes + tensor_data_original.nbytes + tensor_data_wavelet.nbytes) / (1024 ** 3)
        print('Data size: {:.2f}Gb'.format(data_size))
        """

        FUNCTIONS
        Functions used to prepare data and train the model.

            1.  fe: De-noising stacked autoencoder.
            2.  fs: GBoosting based features importance filter.
            3.  fe: Stacking Bayesian regression prediction.
            4.  nn: Activation function (GeLU).
            5.  nn: Loss function.
            6.  nn: Loss function (+total return).
            7.  nn: Residual block.
            8.  nn: Squeeze-and-excitation network.
            9.  nn: Feature pyramid network block.
            10. nn: Spatial reduction block.
            11. ALPHA model.
            12. BETA model.

        """     

        # Fn: (5)
        # Neural network: loss function.
        def nn_loss_sk(y_true, y_pred):
            loss = K.switch(K.less(y_true * y_pred, 0),
                            loss_adj * y_pred ** 2 - K.sign(y_true) * y_pred + K.abs(y_true),
                            K.square(y_pred - y_true))
            return K.mean(loss, axis=-1)

        # Fn: (6)
        # Neural network: loss function (including total return).
        def nn_loss_sk_total(y_true, y_pred):
            # Day 1 loss.
            loss_1 = K.switch(K.less(y_true[:, 0] * y_pred[:, 0], 0),
                              loss_adj * y_pred[:, 0] ** 2 - K.sign(y_true[:, 0]) * y_pred[:, 0] + K.abs(y_true[:, 0]),
                              K.square(y_pred[:, 0] - y_true[:, 0]))
            # Day 2 loss.
            loss_2 = K.switch(K.less(y_true[:, 1] * y_pred[:, 1], 0),
                              loss_adj * y_pred[:, 1] ** 2 - K.sign(y_true[:, 1]) * y_pred[:, 1] + K.abs(y_true[:, 1]),
                              K.square(y_pred[:, 1] - y_true[:, 1]))
            # Day 3 loss.
            loss_3 = K.switch(K.less(y_true[:, 2] * y_pred[:, 2], 0),
                              loss_adj * y_pred[:, 2] ** 2 - K.sign(y_true[:, 2]) * y_pred[:, 2] + K.abs(y_true[:, 2]),
                              K.square(y_pred[:, 2] - y_true[:, 2]))
            # Total return loss.
            loss_4 = K.switch(K.less(y_true[:, 3] * y_pred[:, 3], 0),
                              loss_adj * y_pred[:, 3] ** 2 - K.sign(y_true[:, 3]) * y_pred[:, 3] + K.abs(y_true[:, 3]),
                              K.square(y_pred[:, 3] - y_true[:, 3]))
            # Loss value.
            loss = loss_1 * 0.4 + loss_2 * 0.2 + loss_3 * 0.2 + loss_4 * 0.3
            return K.mean(loss, axis=-1)
    
        # Fn: (11)
        # First model (alpha) used to predict stock prices.
        # 2-rRNet with DeepConv features + Seq2Seq DeepConv-GRU.
        # (Fine-tuning 3 lvl)
        def nn_model_alpha(alpha_nn, tensor_nn, regime_nn, ones_nn, decoder_nn, target_nn, adv_nn):

            # Reshape target value to match ED req.
            target_rnn = target_nn.reshape(target_nn.shape[0], target_nn.shape[1], 1)

            # Input
            input_alpha = Input(shape=(alpha_nn.shape[1],))
            input_tensor = Input(shape=(tensor_nn.shape[1], tensor_nn.shape[2]))
            input_regime = Input(shape=(regime_nn.shape[1],))
            input_ones = Input(shape=(ones_nn.shape[1],))
            input_decoder = Input(shape=(None, target_rnn.shape[2]))

            # ---------------------------------------------
            #             DEEP CONV. NET.
            #    (Features for FFN and Input for ED-GRU)
            # ---------------------------------------------
            # DeepConvNet architecture
            conv_node = Conv1D(nb_filter=ch_layer_1, filter_length=kernel_conv1d,
                               kernel_regularizer=regularizers.l2(reg_conv))(input_tensor)
            conv_node = LeakyReLU(leaky_slope)(conv_node)
            conv_node = BatchNormalization()(conv_node)
            conv_node = Conv1D(nb_filter=ch_layer_2, filter_length=kernel_conv1d,
                               kernel_regularizer=regularizers.l2(reg_conv))(conv_node)
            conv_node = LeakyReLU(leaky_slope)(conv_node)
            conv_node = BatchNormalization()(conv_node)
            conv_node = nn_senet_block(conv_node)                       # Squeeze-and-excitation

            # Deep Conv. Net. extracted features.
            deep_features = GlobalAveragePooling1D()(conv_node)
            deep_features = Dense(deep_features_n)(deep_features)
            deep_features = Activation('linear')(deep_features)

            # Input to encoder-decoder model.
            input_encoder = MaxPooling1D()(conv_node)

            # ---------------------------------------------
            #              Regime RESNET
            # ---------------------------------------------
            # Pre-layer: adding deep features to alphas and normalization input.
            input_norm = layers.concatenate([input_alpha, deep_features])
            input_norm = BatchNormalization()(input_norm)
            # 1st node.
            res_note = Dense(layer_capacity, kernel_regularizer=regularizers.l2(reg_ffn))(input_norm)
            res_note = Dropout(dropout_rate)(res_note, training=True)
            res_note = LeakyReLU(leaky_slope)(res_note)
            res_note = BatchNormalization()(res_note)
            res_note = nn_residual_block(res_note, layer_capacity)      # Residual Block
            res_note = Dense(4)(res_note)
            res_note = Activation('linear')(res_note)
            # 2nd node.
            ord_node = Dense(n_layer_1, kernel_regularizer=regularizers.l2(reg_ffn))(input_norm)
            ord_node = Dropout(dropout_rate)(ord_node, training=True)
            ord_node = LeakyReLU(leaky_slope)(ord_node)
            ord_node = BatchNormalization()(ord_node)
            ord_node = Dense(n_layer_2, kernel_regularizer=regularizers.l2(reg_ffn))(ord_node)
            ord_node = Dropout(dropout_rate)(ord_node, training=True)
            ord_node = Activation('gelu')(ord_node)
            ord_node = BatchNormalization()(ord_node)
            ord_node = Dense(4)(ord_node)
            ord_node = Activation('linear')(ord_node)
            # Market regimes.
            reg_one = Activation('hard_sigmoid')(input_regime)
            reg_one = layers.multiply([reg_one, res_note])
            reg_two = layers.subtract([input_ones, reg_one])
            reg_two = layers.multiply([reg_two, ord_node])
            # Union.
            reg_out = layers.add([reg_one, reg_two])
            reg_out = Dense(4, activation='linear')(reg_out)

            # ---------------------------------------------
            #             ENCODER-DECODER GRU
            # ---------------------------------------------
            # Encoder architecture
            encoder_cells = []
            for hidden_neurons in rnn_cells:
                encoder_cells.append(GRUCell(hidden_neurons,
                                             kernel_regularizer=regularizers.l2(reg_gru)))
            encoder = RNN(encoder_cells, return_state=True)
            encoder_outputs_and_states = encoder(input_encoder)
            encoder_states = encoder_outputs_and_states[1:]
            # Decoder architecture
            decoder_cells = []
            for hidden_neurons in rnn_cells:
                decoder_cells.append(GRUCell(hidden_neurons,
                                             kernel_regularizer=regularizers.l2(reg_gru)))
            decoder = RNN(decoder_cells, return_sequences=True, return_state=True)
            decoder_outputs_and_states = decoder(input_decoder, initial_state=encoder_states)
            # Out
            decoder_outputs = decoder_outputs_and_states[0]
            decoder_outputs = Dense(1, activation='linear')(decoder_outputs)

            # Output layers
            out_single = reg_out
            out_multi = decoder_outputs

            # Optimizer.
            adam_opt = optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

            # Model final in-out structure.
            model = Model(inputs=[input_alpha, input_tensor, input_regime, input_ones, input_decoder],
                          outputs=[out_single, out_multi])
            model.compile(optimizer=adam_opt,
                          loss=[nn_loss_sk_total, nn_loss_sk],
                          loss_weights=[1 - s2s_weight, s2s_weight])
            # model.summary()

            # Train the model.
            print('      training neural network - ALPHA')

            # GENERAL FIT <1st>
            # Fit works with all input data.
            model.fit(x=[alpha_nn, tensor_nn, regime_nn, ones_nn, decoder_nn],
                      y=[target_nn, target_rnn[:, :-1]],
                      epochs=epochs_n_general, batch_size=int(alpha_nn.shape[0] / batches_n),
                      shuffle=False, verbose=2)

            # FINE-TUNING <2nd>
            # Fit takes as input only the latest values trying to capture
            # the most relevant market information.
            latest_index = int(target_nn.shape[0] * 0.1)
            model.fit(x=[alpha_nn[latest_index:], tensor_nn[latest_index:],
                         regime_nn[latest_index:], ones_nn[latest_index:],
                         decoder_nn[latest_index:]],
                      y=[target_nn[latest_index:], target_rnn[latest_index:, :-1]],
                      epochs=epochs_n_fn, batch_size=int(alpha_nn.shape[0] / batches_n),
                      shuffle=False, verbose=2)

            # FINE-TUNING <3rd>
            # Fit takes as input only the most liquid stocks trying
            # to focus only on predicting them.
            liquid_idx = (adv_nn > 1.25)
            model.fit(x=[alpha_nn[liquid_idx], tensor_nn[liquid_idx],
                         regime_nn[liquid_idx], ones_nn[liquid_idx],
                         decoder_nn[liquid_idx]],
                      y=[target_nn[liquid_idx], target_rnn[liquid_idx, :-1]],
                      epochs=epochs_n_fn, batch_size=int(alpha_nn.shape[0] / batches_n),
                      shuffle=False, verbose=2)

            # FINE-TUNING <4th>
            # Fit takes model predictions and use only bad predicted data points.
            # --> first let`s take model predictions and rank them.
            model_prediction = model.predict([alpha_nn, tensor_nn, regime_nn, ones_nn, decoder_nn])
            # 2-RRNet predictions handling.
            pred_ffn = model_prediction[0]
            pred_ffn = pred_ffn / scaler_original
            pred_ffn = hl.nn_handle_multi_day_predictions(pred_ffn, ret_sizing, ret_adj)
            # S2S-DeepConv-GRU predictions handling.
            pred_en = model_prediction[1].reshape(model_prediction[1].shape[0], model_prediction[1].shape[1])
            pred_en = pred_en / scaler_original
            pred_en = hl.nn_handle_multi_day_predictions(pred_en, ret_sizing, ret_adj)
            # Final prediction.
            prediction_internal = hl.cs_zscore(pred_ffn) * (1 - s2s_weight) + hl.cs_zscore(pred_en) * s2s_weight
            # --> now define pnl var.
            pnl_predicted = prediction_internal * target_nn[:, 0]
            pnl_predicted = hl.nan_to_zero(hl.cs_rank(pnl_predicted))
            pnl_predicted[pnl_predicted == 0] = 1
            pnl_predicted_idx = (pnl_predicted < 1.5)
            # --> finally fit the model.
            model.fit(x=[alpha_nn[pnl_predicted_idx], tensor_nn[pnl_predicted_idx],
                         regime_nn[pnl_predicted_idx], ones_nn[pnl_predicted_idx],
                         decoder_nn[pnl_predicted_idx]],
                      y=[target_nn[pnl_predicted_idx], target_rnn[pnl_predicted_idx, :-1]],
                      epochs=epochs_n_fn, batch_size=int(alpha_nn.shape[0] / batches_n),
                      shuffle=False, verbose=2)

            return model
       

        """        
        ----------------------------------------------------------------------------------------------------------
                                                        MODEL
                                    Training loop goes through each refit date
                                        to fit the model on historical data.
        ----------------------------------------------------------------------------------------------------------
        """
     
        # Define dictionary to store trained models.
        model_dict = dict()

        # Walk through each day.
        for di, date in enumerate(dates):
            if di <= lookback or date <= fit_startdate:
                continue

            # Fit the model.
            if refit_dates_mask[di]:
                print('')
                print('  Fitting on {}'.format(date))

                # Start and end indices
                idx_start = di - delay - lookback
                idx_end = di - delay - 1

                # PREPROCESS DATA.
                # ---------------------------------------------
                # Target 1st day.
                target_reg = np.copy(target_1[:, idx_start: idx_end])
                target_reg[:, -1 - delay:] = np.nan
                target_reg = target_reg.reshape(-1, order='F')
                target_valids = np.isfinite(target_reg)
                target_reg = target_reg[target_valids]
                # Target 2nd day.
                target_2_reg = np.copy(target_2[:, idx_start: idx_end])
                target_2_reg[:, -1 - delay:] = np.nan
                target_2_reg = target_2_reg.reshape(-1, order='F')
                target_2_reg = target_2_reg[target_valids]
                # Target 3rd day.
                target_3_reg = np.copy(target_3[:, idx_start: idx_end])
                target_3_reg[:, -1 - delay:] = np.nan
                target_3_reg = target_3_reg.reshape(-1, order='F')
                target_3_reg = target_3_reg[target_valids]
                # Target (Zscore)
                target_reg_z = np.copy(target_z[:, idx_start: idx_end])
                target_reg_z[:, -1 - delay:] = np.nan
                target_reg_z = target_reg_z.reshape(-1, order='F')
                target_reg_z = target_reg_z[target_valids]

                # Tensor original
                or_reg = np.copy(tensor_data_original[:, idx_start: idx_end])
                or_reg[:, -1 - delay:] = np.nan
                or_reg = or_reg.reshape(or_reg.shape[0] * or_reg.shape[1],
                                        tensor_or_periods, len(tensor_fields_original), order='F')
                or_reg = or_reg[target_valids]

                # Tensor scalogram
                sc_reg = np.copy(tensor_data_wavelet[:, idx_start: idx_end])
                sc_reg[:, -1 - delay:] = np.nan
                sc_reg = sc_reg.reshape(sc_reg.shape[0] * sc_reg.shape[1], tensor_sc_periods,
                                        tensor_sc_comps, len(tensor_fields_wavelet), order='F')
                sc_reg = sc_reg[target_valids]

                # Regime var.
                regime_var = np.copy(regime_mat[:, idx_start: idx_end])
                regime_var[:, -1 - delay:] = np.nan
                regime_var_di = regime_var.reshape(-1, order='F')
                regime_reg = regime_var_di[target_valids]
                # Liquidity var.
                adv_var = np.copy(adv_mat[:, idx_start: idx_end])
                adv_var[:, -1 - delay:] = np.nan
                adv_var_di = adv_var.reshape(-1, order='F')
                adv_reg = adv_var_di[target_valids]
                # -----------------------------------------------

                # Load alphas.
                alpha_reg = np.empty((target_reg.shape[0], len(alphas_list)), dtype=np.float64, order='F')
                for i, alpha_name in enumerate(alphas_list):
                    temp_alpha = joblib.load(self.path_alphas + alpha_name)
                    alpha = np.copy(temp_alpha[:, idx_start: idx_end])
                    alpha = hl.zero_to_nan(alpha)
                    alpha = hl.nan_to_zero(alpha.reshape(-1, order='F')[target_valids])
                    alpha_reg[:, i] = alpha
                # Load events.
                event_reg = np.empty((target_reg.shape[0], len(events_list)), dtype=np.int32, order='F')
                for i, event_name in enumerate(events_list):
                    temp_event = joblib.load(self.path_events + event_name)
                    event = np.copy(temp_event[:, idx_start: idx_end])
                    event = hl.zero_to_nan(event)
                    event = hl.nan_to_zero(event.reshape(-1, order='F')[target_valids])
                    event_reg[:, i] = event

                # Cap alphas min/max values and unite.
                alpha_reg[alpha_reg > max_alpha] = max_alpha
                alpha_reg[alpha_reg < -max_alpha] = -max_alpha
                alpha_reg[np.abs(alpha_reg) < min_alpha] = 0
                alpha_reg = hl.nan_to_zero(alpha_reg)
                target_reg = hl.nan_to_zero(target_reg)

                # Break condition
                if alpha_reg.size == 0 or target_reg.size == 0:
                    continue
                assert alpha_reg.shape[0] == target_reg.shape[0], 'Shapes doesn`t match'
                assert or_reg.shape[0] == target_reg.shape[0], 'Shapes doesn`t match'
                assert sc_reg.shape[0] == target_reg.shape[0], 'Shapes doesn`t match'

                print('      vars. loaded')

                """                
                    Pre-model.   
                    Brief description:
                        - Prepare data to feed the model.   
                    Structure:
                        - Feature Eng: D-SAE model extracts high level deep features from given alpha matrix.
                        - Feature selection: GBoosting based feature importance filter drops irrelevant features.
                        - Stacking (alpha): add events to alpha matrix.
                        - Stacking (alpha): Bayesian Ridge model predicts target values and stack 
                        predictions to alpha matrix. 
                        - Stacking (target): add two more days to target matrix (original).
                        - Stacking (target): add total return (3 days) to target values (original).

                """
               
                # Stacking events:
                #     - EPS data.
                #     - DIV data.
                alpha_reg = np.hstack((alpha_reg, event_reg))

                # Stack multiple predictions (3 days)
                target_reg = np.hstack((target_reg.reshape(-1, 1), target_2_reg.reshape(-1, 1)))
                target_reg = np.hstack((target_reg, target_3_reg.reshape(-1, 1)))
                
                # Scale original target and tensor values.
                target_reg = target_reg * scaler_original
                or_reg = or_reg * scaler_original

                # Final check
                target_reg = hl.nan_to_zero(target_reg)
                decoder_reg = np.zeros((alpha_reg.shape[0], 3, 1))

                break
            
        return target_reg, or_reg, decoder_reg



In [7]:
target, tensor, dec = Fit().run_model()

print(target.shape)
print(tensor.shape)
print(dec.shape)



Fit model:
Days available: 2037
Data size: 1.93Gb

  Fitting on 20160104
      vars. loaded
(82651, 3)
(82651, 20, 4)
(82651, 3, 1)


In [8]:
tensor_reg = np.copy(tensor)
ret_reg = np.copy(target)
dec_reg = np.copy(dec)

In [34]:
from tensorflow.python.keras.layers import Activation

n_batches = 200
days_seq = 3
learning_rate = 0.002
adam_lr_decay = 0.005
rnn_neurons_1 = 10
reg_gru = 0.01
n_epochs = 4
numstocks = 599


 # Select relevant days to predict.
ret_reg = ret_reg[:, :1]
# ------------------

# Inputs
input_tensor = Input(shape=(tensor_reg.shape[1], tensor_reg.shape[2]))
input_encoder = input_tensor

# ---------------------------------------------
#                   LSTM
# ---------------------------------------------
node_lstm = GRU(rnn_neurons_1, return_sequences=True)(input_encoder)
node_lstm = Activation('tanh')(node_lstm)
node_lstm = GRU(rnn_neurons_1, return_sequences=False)(input_encoder)
node_lstm = Activation('tanh')(node_lstm)
node_lstm = Dense(1, activation='linear')(node_lstm)

# Output layers
out_gru = node_lstm

# Define the model
model = Model(inputs=[input_tensor],
              outputs=[out_gru])
model.compile(optimizer='adam',
              loss=['mse'])
model.summary()
print('done defining network')

# Train second model
median_data_index = int(ret_reg.shape[0] / 2)
batch_size = int(ret_reg.shape[0] / n_batches)

model.fit(x=[tensor_reg],
          y=[ret_reg],
          epochs=n_epochs - 2,
          batch_size=batch_size,
          shuffle=True, verbose=0)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_36 (InputLayer)        (None, 20, 4)             0         
_________________________________________________________________
gru_17 (GRU)                 (None, 10)                450       
_________________________________________________________________
activation_4 (Activation)    (None, 10)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 11        
Total params: 461
Trainable params: 461
Non-trainable params: 0
_________________________________________________________________
done defining network


<tensorflow.python.keras.callbacks.History at 0x1fd5f626cc0>

In [35]:
tensor = tensor_reg[-1000:]
predicted_returns = model.predict([tensor])

In [36]:
print(predicted_returns.shape)

(1000, 1)


In [12]:
# Params
days_seq = 3
n_epochs = 4
batch_size = 10000

# Define decoder dummy.
decoder_reg = np.zeros((ret_reg.shape[0], days_seq, 1))

# Reshape target value to match ED req.
target_rnn = ret_reg.reshape(ret_reg.shape[0], ret_reg.shape[1], 1)

# Define an input sequence and process it.
encoder_inputs = Input(batch_shape=(batch_size, tensor_reg.shape[1], tensor_reg.shape[2]))
decoder_inputs = Input(batch_shape=(batch_size, None, target_rnn.shape[2]))

# Encoder GRU
encoder_gru = GRU(30, return_sequences=True, return_state=True)
encoder_out, encoder_state = encoder_gru(encoder_inputs)

# Set up the decoder GRU, using `encoder_states` as initial state.
decoder_gru = GRU(30, return_sequences=True, return_state=True)
decoder_out, decoder_state = decoder_gru(decoder_inputs, initial_state=encoder_state)

# Attention layer
attn_layer = AttentionLayer()
attn_out, attn_states = attn_layer([encoder_out, decoder_out])

# Concat attention input and decoder GRU output
decoder_concat_input = Concatenate(axis=-1)([decoder_out, attn_out])

# Dense layer
dense = Dense(target_rnn.shape[2], activation='linear')
dense_time = TimeDistributed(dense)
decoder_pred = dense_time(decoder_concat_input)

# Full model
full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred)
full_model.compile(optimizer='adam', loss='mse')
full_model.summary()

for ep in range(n_epochs):
    losses = []    
    for bi in range(0, target_rnn.shape[0] - batch_size, batch_size):
        tensor_seq = tensor_reg[bi:bi + batch_size]
        dec_seq = dec_reg[bi:bi + batch_size]
        target_seq = target_rnn[bi:bi + batch_size]        
        full_model.train_on_batch(x=[tensor_seq, dec_seq], 
                             y=[target_seq])
        l = full_model.evaluate(x=[tensor_seq, dec_seq], 
                           y=[target_seq],
                           batch_size=batch_size, 
                           verbose=1)
        losses.append(l)
    if (ep + 1) % 1 == 0:
        print("Loss in epoch {}: {}".format(ep + 1, np.mean(losses)))

        
""" Inference model """
batch_size = 32

""" Encoder (Inference) model """
encoder_inf_inputs = Input(batch_shape=(batch_size, tensor_reg.shape[1], tensor_reg.shape[2]))
encoder_inf_out, encoder_inf_state = encoder_gru(encoder_inf_inputs)
encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_state])

""" Decoder (Inference) model """
decoder_inf_inputs = Input(batch_shape=(batch_size, 3, target_rnn.shape[2]))
encoder_inf_states = Input(batch_shape=(batch_size, tensor_reg.shape[1], 30))
decoder_init_state = Input(batch_shape=(batch_size, 30))

decoder_inf_out, decoder_inf_state = decoder_gru(decoder_inf_inputs, initial_state=decoder_init_state)
attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out])
decoder_inf_concat = Concatenate(axis=-1)([decoder_inf_out, attn_inf_out])
decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat)
decoder_model = Model(inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs],
                      outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state])


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (10000, 20, 4)       0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            (10000, None, 1)     0                                            
__________________________________________________________________________________________________
gru_4 (GRU)                     [(10000, 20, 30), (1 3150        input_7[0][0]                    
__________________________________________________________________________________________________
gru_5 (GRU)                     [(10000, None, 30),  2880        input_8[0][0]                    
                                                                 gru_4[0][1]                      
__________

In [13]:
idx = 32
ret = ret_reg[-idx:]
tensor = tensor_reg[-idx:]
dec = dec_reg[-idx:]
enc_outs, enc_last_state = encoder_model.predict(tensor)
dec_state = enc_last_state
dec_out, attention, dec_state = decoder_model.predict([enc_outs, dec_state, dec])