In [None]:
import numpy as np
import tensorflow as tf
import copy
from sklearn import preprocessing
import datetime
import pickle

In [None]:
class NetAttributes:
    def __init__(self, n_neurons = 100, 
                 learning_rate = 0.003, 
                 num_layers = 1,
                 rnn_type = 2,
                 n_repeats = 2):
        self.n_neurons = n_neurons;
        self.learning_rate = learning_rate;
        self.num_layers = num_layers;
        self.rnn_type = rnn_type;
        self.n_repeats = n_repeats
        self.n_steps = None
        self.n_inputs = None
        self.n_outputs = 1
        
    def set_input_dimension(self, n_steps, n_inputs):
        self.n_steps = n_steps
        self.n_inputs = n_inputs


In [None]:
class NetStates:
    def __init__(self):
        self.prediction_states = None
        self.training_states = None
    

In [30]:
class StatefulLstmModel:
    def __init__(self,
                n_neurons=100,
                learning_rate=0.002,
                num_layers=2,
                rnn_type=1,
                n_repeats=30):

        self.net_attributes = NetAttributes(n_neurons,
                                   learning_rate,
                                   num_layers,
                                   rnn_type,
                                   n_repeats)
        self.net_states = NetStates()
        self.model_initialized = False
        self.sess = None
    
    def __del__(self):
        if self.sess != None:
            self.sess.close()
    
    def get_batch(self, seq_index, data_train_input, data_train_output):
        X_batch = data_train_input[seq_index:seq_index+1]
        y_batch = data_train_output[seq_index:seq_index+1]
        return X_batch, y_batch
    
    
    def initialize_layers(self):
        layers = None
        net_attributes = self.net_attributes
        if net_attributes.rnn_type == 0:
            layers = [tf.nn.rnn_cell.BasicLSTMCell(net_attributes.n_neurons) 
              for _ in range(net_attributes.num_layers)]
        elif net_attributes.rnn_type == 1:
            layers = [tf.nn.rnn_cell.LSTMCell(net_attributes.n_neurons, use_peepholes=False) 
              for _ in range(net_attributes.num_layers)]
        elif net_attributes.rnn_type == 2:
            layers = [tf.nn.rnn_cell.LSTMCell(net_attributes.n_neurons, use_peepholes=True) 
              for _ in range(net_attributes.num_layers)]
        else:
            print("WRONG")
        return layers
    
    def reset_graph(self, seed=42):
        tf.reset_default_graph()
        tf.set_random_seed(seed)
        np.random.seed(seed)
    
    def create_model(self):
        net_attributes = self.net_attributes
        self.X = tf.placeholder(tf.float32, [None, net_attributes.n_steps, net_attributes.n_inputs])
        self.y = tf.placeholder(tf.float32, [None, net_attributes.n_steps, net_attributes.n_outputs])
        layers = self.initialize_layers()
        cell = tf.nn.rnn_cell.MultiRNNCell(layers)
        self.init_state = tf.placeholder(tf.float32, [net_attributes.num_layers, 2, 1, net_attributes.n_neurons])
        
        state_per_layer_list = tf.unstack(self.init_state, axis=0)
        rnn_tuple_state = tuple(
            [tf.nn.rnn_cell.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1])
             for idx in range(net_attributes.num_layers)]
        )
        
        rnn_outputs, self.new_states = tf.nn.dynamic_rnn(cell, self.X, dtype=tf.float32, 
                                                    initial_state=rnn_tuple_state)
        
        stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, net_attributes.n_neurons])
        stacked_outputs = tf.layers.dense(stacked_rnn_outputs, net_attributes.n_outputs)
        self.outputs = tf.reshape(stacked_outputs, [-1, net_attributes.n_steps, net_attributes.n_outputs])
        
        self.loss = tf.reduce_mean(tf.square(self.outputs - self.y))
        optimizer = tf.train.AdamOptimizer(learning_rate=net_attributes.learning_rate)
        self.training_op = optimizer.minimize(self.loss)

        self.init = tf.global_variables_initializer()
        self.model_initialized = True
    
    # train the model, input is the training data for one cycle
    # input is in the shape: [days, steps, features], the features are 
    # 1. diff, 2. volume. 3. timesteps.
    def fit(self, data_train_input, data_train_output, prediction_period):
        net_attributes = self.net_attributes
        net_states = self.net_states
        n_inputs = data_train_input.shape[2]
        n_steps = data_train_input.shape[1]

        net_attributes.set_input_dimension(n_steps, n_inputs)
        batch_size = 1
        days = data_train_input.shape[0]
        
        self.reset_graph()
        self.create_model()
        my_loss_train_list = []
        sess = tf.Session()
        # TODO: load from file.

        self.init.run(session=sess)
        # if this is the first time of fit?
        if self.net_states.training_states == None:
            init_states = np.zeros((net_attributes.num_layers, 2, 1, net_attributes.n_neurons))
        else:
            init_states = self.net_states.training_states
            
        for repeat in range(net_attributes.n_repeats):
            rnn_states = copy.deepcopy(init_states)
            for seq in range(days):
                X_batch, y_batch = self.get_batch(seq, data_train_input, data_train_output)
                feed_dict = {
                        self.X: X_batch,
                        self.y: y_batch,
                        self.init_state: rnn_states}
                my_op, rnn_states, my_loss_train, my_outputs = sess.run([self.training_op, 
                          self.new_states, 
                          self.loss, 
                          self.outputs], feed_dict=feed_dict)

                my_loss_train_list.append(my_loss_train)
                # last repeat , remember the sates
                if seq+1 == prediction_period and repeat == net_attributes.n_repeats-1:
                    # next training loop starts from here
                    training_states = copy.deepcopy(rnn_states)
                my_loss_train_avg = sum(my_loss_train_list) / len(my_loss_train_list)

            print("{} repeat={} training finished, training MSE={}".format(
                datetime.datetime.now().time(),
                repeat, my_loss_train_avg))
        
        self.net_states.training_states = training_states
        self.net_states.prediction_states = rnn_states
        self.sess = sess
        return
    
    def predict_base(self, data_test_input, data_test_output=None):
        net_attributes = self.net_attributes
        net_states = self.net_states
        days = data_test_input.shape[0]
        
        rnn_states = copy.deepcopy(net_states.prediction_states)
        #X, y, init_state, init, training_op, new_states, loss, outputs = self.create_model()
        sess = self.sess
        
        my_loss_test_list = []
        input_shape = data_test_input.shape
        outputs_all_days = np.zeros((input_shape[0], input_shape[1], 1))
        for seq in range(days):
            if data_test_output is None:
                feed_dict = {
                    self.X: data_test_input[seq:seq+1],
                    self.init_state: rnn_states,
                }

                rnn_states, my_outputs = sess.run([self.new_states, self.outputs], feed_dict=feed_dict)
            else:
                feed_dict = {
                    self.X: data_test_input[seq:seq+1],
                    self.y: data_test_output[seq:seq+1],
                    self.init_state: rnn_states,
                }

                rnn_states, my_outputs, my_loss_test = sess.run([self.new_states, 
                                                                 self.outputs, self.loss], feed_dict=feed_dict)
                print("Predicting seq:{} testing MSE: {}".format(seq, my_loss_test))
            outputs_all_days[seq] = my_outputs
            
        
        return outputs_all_days
    
    def predict(self, data_test_input):
        return self.predict_base(data_test_input)
        
    def predict_and_verify(self, data_test_input, data_test_output):
        return self.predict_base(data_test_input, data_test_output)
      
    def get_attributes_filename(self, path):
        if path[-1] != '/':
            path += '/'
        return path + 'net_attributes.pkl'
    
    def get_path(self, path, date):
        if path[-1] != '/':
            path += '/'
        return path + date + '/'
    
    def get_states_filename(self, path, date):
        return self.get_path(path, date) + 'net_states.pkl'
    
    def get_model_filename(self, path, date):
        return self.get_path(path, date) + '/tf_session.ckpt'
    
    def save(self, path, date):
        saver = tf.train.Saver()
        save_path = saver.save(self.sess, self.get_model_filename(path, date))
        with open(self.get_attributes_filename(path), 'wb') as f:
            # Pickle the 'data' dictionary using the highest protocol available.
            pickle.dump(self.net_attributes, f, pickle.HIGHEST_PROTOCOL)
        with open(self.get_states_filename(path, date), 'wb') as f:
            pickle.dump(self.net_states, f, pickle.HIGHEST_PROTOCOL)
        print("Model saved in path: %s" % path)
        
            
    def load(self, path, date):
        # TODO: if date is none, load the latest.
        
        # restore hyper-params
        with open(self.get_attributes_filename(path), 'rb') as f:
            self.net_attributes = pickle.load(f)

        # restore states
        with open(self.get_states_filename(path), 'rb') as f:
            self.net_states = pickle.load(f)
        
        # 2. restore graph
        if self.model_initialized == False:
            self.reset_graph()
            self.create_model()
        
        # 3. restore session
        saver = tf.train.Saver()
        self.sess = tf.Session()
        saver.restore(self.sess, self.get_model_filename(path))
        print("Model restored.")

In [31]:
class TimeFormat:
    NONE = 0
    DAY = 1
    WEEK = 2

class DataManipulator:
    def __init__(self, beta, ema, time_format, volume_input, use_centralized_bid, 
                split_daily_data, n_training_days):
        self.beta = beta
        self.ema = ema
        self.time_format = time_format
        self.volume_input = volume_input
        self.use_centralized_bid = use_centralized_bid
        self.split_daily_data = split_daily_data
        self.n_training_days = n_training_days
        self.scaler_input = None
        self.scaler_output = None
        
    def volume_transform(self, volume_series):
        # all the volumes must bigger than 0
        assert(np.all(volume_series>=0))
        return  np.log(volume_series.astype('float')+1)

    def inverse_transform_output(self, scaled_outputs):
        ori_shape = scaled_outputs.shape
        outputs_reshaped = scaled_outputs.reshape((ori_shape[0]*ori_shape[1], 
                                                   ori_shape[2]))
        #outputs = np.exp(self.scaler_output.inverse_transform(outputs_reshaped)) - 1
        outputs = self.scaler_output.inverse_transform(outputs_reshaped)
        return outputs.reshape(ori_shape)
    
    
    def transform(self, data_all, n_inputs, n_outputs):
        orig_shape = data_all.shape
        data_train_reshape = data_all.astype('float').reshape((orig_shape[0] * orig_shape[1], orig_shape[2]))
        
        self.scaler_input = preprocessing.MinMaxScaler().fit(data_train_reshape[:,:n_inputs])
        data_train_input_scaled = self.scaler_input.transform(data_train_reshape[:,:n_inputs])
        
        # the invalid step, we change it to zero!
        data_train_input_scaled[~np.any(data_train_reshape, axis=1)] = 0
        data_train_input = data_train_input_scaled.reshape(orig_shape[0], orig_shape[1], n_inputs)
        
        self.scaler_output = preprocessing.MinMaxScaler().fit(data_train_reshape[:,-n_outputs:])
        data_train_output_scaled = self.scaler_output.transform(data_train_reshape[:,-n_outputs:])
        # the invalid step, we change it to zero!
        data_train_output_scaled[~np.any(data_train_reshape, axis=1)] = 0
        data_train_output = data_train_output_scaled.reshape(orig_shape[0], orig_shape[1], n_outputs)
        
        return data_train_input, data_train_output

    def prep_test_data(self, input_path):
        return
    
    def prep_training_data(self, input_path, stock_index):
        # load numpy file
        npy_file_name = input_path + "/ema{}_beta{}_{}.npy".format(self.ema, self.beta, stock_index)
        input_np_data = np.load(npy_file_name, allow_pickle=True)
        
        # date list
        date_list = []
        for i in range(self.n_training_days):    
            date = input_np_data[i][0][5].date().strftime("%y%m%d")
            date_list.append(date_list)
        
        
        # check if we have days more than training period
        assert(input_np_data.shape[0] >= self.n_training_days)
        # the diff is the mandatory
        input_columns = [2]
        
        time_format = self.time_format
        
        if time_format == TimeFormat.DAY:
            input_columns += [0]
        elif time_format == TimeFormat.WEEK:
            input_columns += [1]
        
        if self.volume_input == 1:
            input_columns += [3]
        
        output_columns = [4]
        timestamp_column = [5]
        price_column = [6]
        input_np_data = input_np_data[:,:,input_columns + output_columns + timestamp_column + price_column]
        
        # we must tranform the volume for it is too big.
        if self.volume_input == 1:
            input_np_data[:,:,-4] = self.volume_transform(input_np_data[:,:,-4])
        
        if self.use_centralized_bid == 0:
            # remove all the rows for centralized bid. it should be from 9.01 to 17.24, which is 516-12=504 steps
            input_np_data = input_np_data[:,7:-5,:]
            
        shape = input_np_data.shape
        n_training_sequences = self.n_training_days
        if self.split_daily_data == 1:
            assert(shape[1] % 2 == 0)
            input_np_data = input_np_data.reshape((shape[0]*2, 
                                                  int(shape[1]/2), 
                                                  shape[2]))
            # get the first date and last date
            n_training_sequences *= 2
            
        # to scale the data, but not the timestamp and price
        data_train_input, data_train_output = self.transform(input_np_data[:n_training_sequences,:,:-2], len(input_columns), 1)
        return data_train_input, data_train_output, input_np_data[:n_training_sequences,:,-2], input_np_data[:n_training_sequences,:,-1]

In [32]:
import numpy as np
from pathlib import Path
import pandas as pd
import GPy
import GPyOpt

class ValueModel:
    mixed_domain = [{'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20,160,20))},
          {'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001,0.002,0.003,0.004)},
          {'name': 'num_layers', 'type': 'discrete', 'domain': (1,2,3,4)},
          {'name': 'rnn_type', 'type': 'discrete', 'domain': (0,1,2)},
          {'name': 'learning_period', 'type': 'discrete', 'domain': (10,20,30,40)},
          {'name': 'prediction_period', 'type': 'discrete', 'domain': (1,2,3,5,10)},
          {'name': 'n_repeats', 'type': 'discrete', 'domain': (3,5,10,20,30,40)},
          {'name': 'beta', 'type': 'discrete', 'domain': (99, 98)},
          {'name': 'ema', 'type': 'discrete', 'domain': (1,5,10,20)},
          {'name': 'time_format', 'type': 'discrete', 'domain': (0,1,2)}, #1 for stepofday, 2 for stepofweek
          {'name': 'volume_input', 'type': 'discrete', 'domain': (0,1)},
          {'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (0,1)},
          {'name': 'split_daily_data', 'type': 'discrete', 'domain': (0,1)}
         ]
    
    mixed_domain_test = [{'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20,160,20))},
          {'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001,0.002,0.003,0.004)},
          {'name': 'num_layers', 'type': 'discrete', 'domain': (1,2,3,4)},
          {'name': 'rnn_type', 'type': 'discrete', 'domain': (0,1,2)},
          {'name': 'learning_period', 'type': 'discrete', 'domain': (10,20)},
          {'name': 'prediction_period', 'type': 'discrete', 'domain': (5,10)},
          {'name': 'n_repeats', 'type': 'discrete', 'domain': (3,5)},
          {'name': 'beta', 'type': 'discrete', 'domain': (99, 98)},
          {'name': 'ema', 'type': 'discrete', 'domain': (1,5,10,20)},
          {'name': 'time_format', 'type': 'discrete', 'domain': (0,1,2)}, #1 for stepofday, 2 for stepofweek
          {'name': 'volume_input', 'type': 'discrete', 'domain': (0,1)},
          {'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (0,1)},
          {'name': 'split_daily_data', 'type': 'discrete', 'domain': (0,1)}
         ]
    
    
    def __init__(self, stock_name, stock_index, n_training_days):
        self.stock_name = stock_name
        self.stock_index = stock_index
        self.n_training_days = n_training_days
        self.save_path = "model_{}_{}".format(stock_name, n_training_days)
        self.last_training_date = None
        self.model = None
        self.max_profit = -999.0
        return
    
    def get_parameter_str(self, X):
        parameter_str = ""
        for i in range(len(self.mixed_domain)):
            parameter_str += self.mixed_domain[i]["name"]
            parameter_str += ':'
            parameter_str += str(X[i])
            parameter_str += ','
        return parameter_str
    
    def get_max_steps(self, groups):
        max_steps = 0
        for index, df in groups:
            df_len = len(df)
            if df_len > max_steps:
                max_steps = df_len
        return max_steps

    
    def get_data_prep_desc_filename(self, path):
        return path + '/data_prep_desc.pkl'
    
    def optimize(self, input_csv_path, max_iter=300, is_test=False):
        if is_test == True:
            mixed_domain = self.mixed_domain_test
        else:
            mixed_domain = self.mixed_domain
        
        opt_handler = GPyOpt.methods.BayesianOptimization(f=self.opt_func,  # Objective function       
                                     domain=mixed_domain,          # Box-constraints of the problem
                                     initial_design_numdata = 20,   # Number data initial design
                                     acquisition_type='EI',        # Expected Improvement
                                     exact_feval = True)           # True evaluations, no sample noise
        opt_handler.run_optimization(max_iter, eps=0)
    
    def save(self):
        self.model.save(self.save_path, self.last_training_date)
        
    def load(self):
        save_path = self.save_path
        
        # iterate the path, and find out the latest date
        
    
    def opt_func(self, X_list):
        answer = np.zeros((X_list.shape[0], 1))
        for i in range(len(X_list)):
            print(self.get_parameter_str(X_list[i]))
            features = X_list[i]
            error, model, value_price = self.get_value_result(features)
            
            strategy_model = StrategyModel()
            strategy_model.optimize(value_price)
            profit, is_hold = strategy_model.get_best_result()
            profit_daily = (profit-1) / len(value_price)
            print("total profit={}, profit/day={} error={}".format(profit, profit_daily, error))
            #self.draw_step_profit_graph(self.step_profit_list, "step_profit_{}".format(answer[i][0]))
            #self.step_profit_list = []
            if profit > self.max_profit:
                print("find new opt:{}, {}".format(profit, self.get_parameter_str(X_list[i])))
                self.model = model
                self.save()
                self.max_profit = profit
                
                # save the data into file for further analysis
                np_data_to_save = np.concatenate((value_price, is_hold), axis=2)
                print("np_data_to_save")
                print(np_data_to_save.shape)
                np.save(self.save_path + '/best_policy_data.npy', np_data_to_save)
                
                # check the optimized strategy for this model
            answer[i][0] = -profit_daily
        return answer

    def sma(self, data, window):
        """
        Calculates Simple Moving Average
        http://fxtrade.oanda.com/learn/forex-indicators/simple-moving-average
        """
        if len(data) < window:
            return None
        return sum(data[-window:]) / float(window)
    
    def ema(self, data, window):
        if len(data) < 2 * window:
            raise ValueError("data is too short")
        c = 2.0 / (window + 1)
        current_ema = self.sma(data[-window*2:-window], window)
        for value in data[-window:]:
            current_ema = (c * value) + ((1 - c) * current_ema)
        return current_ema
    
    
    def get_value_result(self, features):
        n_neurons = int(features[0])
        learning_rate = features[1]
        num_layers = int(features[2])
        rnn_type = int(features[3])
        learning_period = int(features[4])
        prediction_period = int(features[5])
        n_repeats = int(features[6])
        beta = int(features[7])
        ema = int(features[8])
        time_format = int(features[9])
        volume_input = int(features[10])
        use_centralized_bid = int(features[11])
        #split_daily_data = int(features[12])
        split_daily_data = 0
        data_manipulator = DataManipulator(beta, ema, 
                                           time_format, 
                                           volume_input, 
                                           use_centralized_bid, 
                                           split_daily_data, 
                                           self.n_training_days)
        npy_path = 'npy_files'
        data_training_input, data_training_output, timestamps, price \
            = data_manipulator.prep_training_data(npy_path, self.stock_index)
        
        # get the date list.
        date_list = []
        for i in range(len(timestamps)):
            date = timestamps[i][0].strftime("%y%m%d")
            date_list.append(date)
        

        
        # now define the network
        model = StatefulLstmModel(n_neurons, learning_rate, num_layers, rnn_type, n_repeats)
        
        assert(self.n_training_days % prediction_period == 0)
        
        n_training_seq = self.n_training_days
        n_learning_seq = learning_period
        n_prediction_seq = prediction_period
        if split_daily_data == 1:
            n_training_seq *= 2
            n_learning_seq *= 2
            n_prediction_seq *= 2
            
        self.last_training_date = date_list[-1]
        daily_errors = []
        all_outputs = []
        print("start training: training_seq:{}, learning_seq:{}, prediction_seq:{} last_training_date:{}".format(n_training_seq, 
                                                                                           n_learning_seq, 
                                                                                           n_prediction_seq,
                                                                                           self.last_training_date))
        for i in range(0, n_training_seq-n_learning_seq+1, n_prediction_seq):
            learning_end = i + n_learning_seq
            print("start training from seq:{}({}) - seq:{}({})".format(i, date_list[i], learning_end-1, date_list[learning_end-1]))
            model.fit(data_training_input[i:learning_end], data_training_output[:learning_end], n_prediction_seq)
            prediction_end = learning_end + n_prediction_seq
            if prediction_end > n_training_seq:
                break
            
            print("start predicting from seq:{}({}) - seq:{}({})".format(learning_end, date_list[learning_end], 
                                                                       prediction_end-1, date_list[prediction_end-1]))
            
            outputs = model.predict_and_verify(data_training_input[learning_end:prediction_end], 
                                     data_training_output[learning_end:prediction_end])
            print("output.shape")
            print(outputs.shape)
            all_outputs.append(outputs)
            # calculate the error for every day
            y = data_training_output[learning_end:prediction_end]
            # error is a 1-D array for the every day error
            error = np.mean(np.square(outputs-y), axis=(1,2))
        
            daily_errors += error.tolist()
            
        np_all_outputs = np.array(all_outputs)
        print("np_all_outputs.shape")
        print(np_all_outputs.shape)
        shape = np_all_outputs.shape
        
        n_predicted_days = self.n_training_days - learning_period
        if split_daily_data == 1:
            steps_per_day = data_training_input.shape[1] * 2
        else:
            steps_per_day = data_training_input.shape[1]
        
        
        np_all_outputs = np_all_outputs.reshape((n_predicted_days, steps_per_day,1))
        np_all_outputs = data_manipulator.inverse_transform_output(np_all_outputs)
        
        print("np_all_outputs.shape")
        print(np_all_outputs.shape)
        shape = timestamps.shape
        timestamps = timestamps.reshape((self.n_training_days, steps_per_day, 1))
        price = price.reshape((self.n_training_days, steps_per_day, 1))
        
        print("timestamps.shape")
        print(timestamps.shape)
        value_with_timestamp_price = np.concatenate((timestamps[learning_period:],
                                               np_all_outputs,
                                               price[learning_period:]), axis=2)
        print("value_with_timestamp_price")
        print(value_with_timestamp_price.shape)
        ema = self.ema(daily_errors, int(len(daily_errors)/2))
        print("test finished, the ema of testing error:{}".format(ema))
        
        return ema, model, value_with_timestamp_price
    

In [36]:
class StrategyModel:
    mixed_domain = [{'name': 'buy_threshold', 'type': 'continuous', 'domain': (0.0, 0.005)},
                 {'name': 'sell_threshold', 'type': 'continuous', 'domain': (-0.005, 0.0)},
                 {'name': 'stop_loss', 'type': 'continuous', 'domain': (-0.01,-0.003)},
                 {'name': 'stop_gain', 'type': 'continuous', 'domain': (0.002, 0.01)},
                 {'name': 'min_hold_steps', 'type': 'discrete', 'domain': range(10,100)},
         ]
    def __init__(self):
        self.max_profit = -999.0
        return

    def optimize(self, input_data):
        self.input_data = input_data
        
        myBopt = GPyOpt.methods.BayesianOptimization(self.get_profit,  # Objective function       
                                             domain=self.mixed_domain,          # Box-constraints of the problem
                                             initial_design_numdata = 30,   # Number data initial design
                                             acquisition_type='EI',        # Expected Improvement
                                             exact_feval = True)           # True evaluations, no sample noise

        myBopt.run_optimization(100,eps=0)
        return 0
        
    # the input data is in shape (days, steps, [timestamp, value, price])
    def get_profit(self, X_list):    
        buy_threshold = X_list[0][0]
        sell_threshold = X_list[0][1]
        stop_loss = X_list[0][2]
        stop_gain = X_list[0][3]
        min_hold_steps = int(X_list[0][4])
        tot_profit = 1
        tot_stock_profit = 1
        buy_step = None
        max_trades = 3
        cost = 0.00015
        n_tot_trades = 0
        
        # to prepare the result data
        shape = self.input_data.shape
        is_hold = np.zeros((shape[0], shape[1], 1))
        
        for day_idx in range(len(self.input_data)):
            #print("starting day {}".format(day_idx))
            n_trades = 0
            daily_profit = 1
            state = 0
            daily_data = self.input_data[day_idx]
            for step in range(len(daily_data)):
                value = daily_data[step][1]
                price = daily_data[step][2]
                time = daily_data[step][0]
                
                if state == 0 and time.time().hour >= 9 and \
                    n_trades < max_trades and step < len(daily_data)-min_hold_steps:
                    if value > buy_threshold:
                        buy_price = price
                        buy_step = step
                        #print("buy at step {} price:{}".format(step, price))
                        state = 1

                elif state == 1:
                    profit = (price - buy_price)/buy_price
                    if (value < sell_threshold and 
                        step - buy_step > min_hold_steps) or step == len(daily_data)-1 or \
                        profit < stop_loss or \
                        profit > stop_gain:
                        
                        if profit < stop_loss:
                            n_trades = max_trades
                        
                        #print("sell at step {} price:{}".format(step, price))
                        profit -= cost
                        tot_profit *= (1+profit)
                        daily_profit *= (1 + profit)
                        state = 0
                        n_trades += 1
                
                if state == 1:
                    is_hold[day_idx][step] = 1
                else:
                    is_hold[day_idx][step] = 0
                    
            n_tot_trades += n_trades
            last = daily_data[-1][2]
            open = daily_data[0][2]
            stock_profit = (last - open) / open
            tot_stock_profit *= (1+stock_profit)
            #print("finishing day {}, daily_profit:{}".format(day_idx, daily_profit))
        #print("{}, n_tot_trades:{} profit:{}".format(X_list, n_tot_trades, tot_profit))
        if tot_profit > self.max_profit:
            print("find best profit:{}".format(tot_profit))
            self.max_profit = tot_profit
            self.is_hold = is_hold
        return -tot_profit
    
    def get_best_result(self):
        return self.max_profit, self.is_hold
        

In [37]:
value_model = ValueModel('Nordea', 5, 60)
value_model.optimize('.', is_test=False)

n_neurons:20.0,learning_rate:0.004,num_layers:4.0,rnn_type:2.0,learning_period:20.0,prediction_period:5.0,n_repeats:5.0,beta:98.0,ema:1.0,time_format:2.0,volume_input:0.0,use_centralized_bid:1.0,split_daily_data:1.0,
start training: training_seq:60, learning_seq:20, prediction_seq:5 last_training_date:190423
start training from seq:0(190128) - seq:19(190222)
17:11:43.325681 repeat=0 training finished, training MSE=0.08591298521641874
17:11:46.811481 repeat=1 training finished, training MSE=0.04363865963969147
17:11:50.473813 repeat=2 training finished, training MSE=0.02931775751724975
17:11:54.941420 repeat=3 training finished, training MSE=0.022120492982503494
17:11:58.283220 repeat=4 training finished, training MSE=0.017796424895350355
start predicting from seq:20(190225) - seq:24(190301)
Predicting seq:0 testing MSE: 0.00010310367360943928
Predicting seq:1 testing MSE: 0.0001566145510878414
Predicting seq:2 testing MSE: 0.00018916990666184574
Predicting seq:3 testing MSE: 0.00015729

17:15:08.657051 repeat=4 training finished, training MSE=0.015418190032360144
start predicting from seq:20(190225) - seq:24(190301)
Predicting seq:0 testing MSE: 0.00048753945156931877
Predicting seq:1 testing MSE: 0.0007488138508051634
Predicting seq:2 testing MSE: 0.0010324331233277917
Predicting seq:3 testing MSE: 0.0008387695415876806
Predicting seq:4 testing MSE: 0.0005623998586088419
output.shape
(5, 504, 1)
start training from seq:5(190204) - seq:24(190301)
17:15:11.028167 repeat=0 training finished, training MSE=0.07458765821065753
17:15:12.642415 repeat=1 training finished, training MSE=0.03858051346469438
17:15:14.236955 repeat=2 training finished, training MSE=0.026153383724643695
17:15:15.845798 repeat=3 training finished, training MSE=0.019916730369368453
17:15:17.489532 repeat=4 training finished, training MSE=0.016177692081546412
start predicting from seq:25(190304) - seq:29(190308)
Predicting seq:0 testing MSE: 0.009836184792220592
Predicting seq:1 testing MSE: 0.000851

Predicting seq:9 testing MSE: 0.00022183466353453696
output.shape
(10, 504, 1)
start training from seq:10(190211) - seq:29(190308)
17:17:26.522698 repeat=0 training finished, training MSE=0.14942620475194418
17:17:29.541241 repeat=1 training finished, training MSE=0.0767465175835241
17:17:32.595689 repeat=2 training finished, training MSE=0.05146716673043557
17:17:35.567777 repeat=3 training finished, training MSE=0.03872728354217543
17:17:38.571839 repeat=4 training finished, training MSE=0.031071463625266915
start predicting from seq:30(190311) - seq:39(190322)
Predicting seq:0 testing MSE: 0.0002920893603004515
Predicting seq:1 testing MSE: 0.00019121057994198054
Predicting seq:2 testing MSE: 0.0001480031933169812
Predicting seq:3 testing MSE: 0.00028734773513861
Predicting seq:4 testing MSE: 0.00028906442457810044
Predicting seq:5 testing MSE: 0.000393193302443251
Predicting seq:6 testing MSE: 0.00019304215675219893
Predicting seq:7 testing MSE: 0.00016250509361270815
Predicting se

Predicting seq:2 testing MSE: 0.0003487591748125851
Predicting seq:3 testing MSE: 0.000478649657452479
Predicting seq:4 testing MSE: 0.014023997820913792
output.shape
(5, 516, 1)
start training from seq:25(190304) - seq:44(190329)
17:21:16.508685 repeat=0 training finished, training MSE=0.21322879669605754
17:21:20.119957 repeat=1 training finished, training MSE=0.10827844351370004
17:21:23.678141 repeat=2 training finished, training MSE=0.07239910926728044
17:21:27.319529 repeat=3 training finished, training MSE=0.05438750607781913
17:21:30.919709 repeat=4 training finished, training MSE=0.043574553953149006
start predicting from seq:45(190401) - seq:49(190405)
Predicting seq:0 testing MSE: 0.0002696144219953567
Predicting seq:1 testing MSE: 0.0003413139493204653
Predicting seq:2 testing MSE: 0.00029737441218458116
Predicting seq:3 testing MSE: 0.0003637782938312739
Predicting seq:4 testing MSE: 8.637505379738286e-05
output.shape
(5, 516, 1)
start training from seq:30(190311) - seq:49

17:24:51.293502 repeat=0 training finished, training MSE=0.5069475691765547
17:24:54.451237 repeat=1 training finished, training MSE=0.3003378002438694
17:24:57.598552 repeat=2 training finished, training MSE=0.2055285201058723
17:25:00.748703 repeat=3 training finished, training MSE=0.15568023072555662
17:25:03.933993 repeat=4 training finished, training MSE=0.12494057256641099
start predicting from seq:40(190325) - seq:44(190329)
Predicting seq:0 testing MSE: 0.0003083236515522003
Predicting seq:1 testing MSE: 9.666087862569839e-05
Predicting seq:2 testing MSE: 0.00037484095082618296
Predicting seq:3 testing MSE: 0.0006783487624488771
Predicting seq:4 testing MSE: 0.010314013808965683
output.shape
(5, 516, 1)
start training from seq:35(190318) - seq:44(190329)
17:25:08.309213 repeat=0 training finished, training MSE=0.4651166057214141
17:25:11.441447 repeat=1 training finished, training MSE=0.28981407507089896
17:25:14.594136 repeat=2 training finished, training MSE=0.198786551779873

17:28:21.892208 repeat=0 training finished, training MSE=0.28936239283066245
17:28:25.734526 repeat=1 training finished, training MSE=0.1486713263395359
17:28:29.581121 repeat=2 training finished, training MSE=0.09938208156575759
17:28:33.431737 repeat=3 training finished, training MSE=0.07470520695242158
17:28:37.289111 repeat=4 training finished, training MSE=0.05985889703661087
np_all_outputs.shape
(4, 10, 516, 1)
np_all_outputs.shape
(40, 516, 1)
timestamps.shape
(60, 516, 1)
value_with_timestamp_price
(40, 516, 3)
test finished, the ema of testing error:0.0013968840873942426
find best profit:1.043697284475395
find best profit:1.0456650804428365
find best profit:1.047690326537571
find best profit:1.0488498903966634
find best profit:1.0488521006690785
find best profit:1.0514211795316666
find best profit:1.0537260892679265
find best profit:1.0555503989439337
find best profit:1.0565535664645422
find best profit:1.0636693241025745
find best profit:1.0639110730077521
find best profit:1.

17:31:00.557798 repeat=0 training finished, training MSE=0.6124646037817001
17:31:01.048472 repeat=1 training finished, training MSE=0.4186674620956182
17:31:01.542663 repeat=2 training finished, training MSE=0.2885798576123004
17:31:02.031858 repeat=3 training finished, training MSE=0.21924693344553817
17:31:02.517740 repeat=4 training finished, training MSE=0.17616844449803465
start predicting from seq:55(190415) - seq:59(190423)
Predicting seq:0 testing MSE: 0.0005187239148654044
Predicting seq:1 testing MSE: 0.0002877707884181291
Predicting seq:2 testing MSE: 0.0002268296229885891
Predicting seq:3 testing MSE: 0.004245702642947435
Predicting seq:4 testing MSE: 0.0005293034482747316
output.shape
(5, 516, 1)
start training from seq:50(190408) - seq:59(190423)
17:31:03.553748 repeat=0 training finished, training MSE=0.6164904087781906
17:31:04.045760 repeat=1 training finished, training MSE=0.42259890474379064
17:31:04.548284 repeat=2 training finished, training MSE=0.2916436142540382

17:34:18.164683 repeat=2 training finished, training MSE=0.04769706413972017
17:34:19.722906 repeat=3 training finished, training MSE=0.03590002228706908
17:34:21.317547 repeat=4 training finished, training MSE=0.02881669224381767
start predicting from seq:20(190225) - seq:29(190308)
Predicting seq:0 testing MSE: 0.0001175946817966178
Predicting seq:1 testing MSE: 0.00020444148685783148
Predicting seq:2 testing MSE: 0.00024192560522351414
Predicting seq:3 testing MSE: 0.0001821154437493533
Predicting seq:4 testing MSE: 0.00011354620073689148
Predicting seq:5 testing MSE: 0.0037860586307942867
Predicting seq:6 testing MSE: 0.0002411086024949327
Predicting seq:7 testing MSE: 0.00013221766857896
Predicting seq:8 testing MSE: 0.0002465071447659284
Predicting seq:9 testing MSE: 9.258355566998944e-05
output.shape
(10, 516, 1)
start training from seq:10(190211) - seq:29(190308)
17:34:24.358188 repeat=0 training finished, training MSE=0.1364977958088275
17:34:25.893316 repeat=1 training finish

17:37:43.177620 repeat=0 training finished, training MSE=0.060468524345196784
17:37:48.360666 repeat=1 training finished, training MSE=0.03351239761104807
17:37:53.501146 repeat=2 training finished, training MSE=0.022965912976845478
17:37:58.668096 repeat=3 training finished, training MSE=0.017551105005986757
17:38:03.817419 repeat=4 training finished, training MSE=0.014274097016023007
start predicting from seq:40(190325) - seq:44(190329)
Predicting seq:0 testing MSE: 0.000894807861186564
Predicting seq:1 testing MSE: 0.0006000215653330088
Predicting seq:2 testing MSE: 0.001771773910149932
Predicting seq:3 testing MSE: 0.002678525634109974
Predicting seq:4 testing MSE: 0.003171261865645647
output.shape
(5, 504, 1)
start training from seq:25(190304) - seq:44(190329)
17:38:10.541449 repeat=0 training finished, training MSE=0.06450763120083139
17:38:15.735323 repeat=1 training finished, training MSE=0.035398804261785696
17:38:20.911425 repeat=2 training finished, training MSE=0.0242327219

17:41:20.228679 repeat=0 training finished, training MSE=0.15460554565070198
17:41:21.306024 repeat=1 training finished, training MSE=0.08225607518397737
17:41:22.379911 repeat=2 training finished, training MSE=0.055653653022212285
17:41:23.427009 repeat=3 training finished, training MSE=0.04208157596694946
17:41:24.482351 repeat=4 training finished, training MSE=0.03392536621307954
start predicting from seq:45(190401) - seq:49(190405)
Predicting seq:0 testing MSE: 0.0008117593242786825
Predicting seq:1 testing MSE: 0.0010254856897518039
Predicting seq:2 testing MSE: 0.0005827951245009899
Predicting seq:3 testing MSE: 0.0008189201471395791
Predicting seq:4 testing MSE: 0.00045021457481198013
output.shape
(5, 504, 1)
start training from seq:30(190311) - seq:49(190405)
17:41:26.123676 repeat=0 training finished, training MSE=0.15474274776061064
17:41:27.190449 repeat=1 training finished, training MSE=0.08247008532343898
17:41:28.237178 repeat=2 training finished, training MSE=0.055790248

Predicting seq:4 testing MSE: 0.00013151433086022735
Predicting seq:5 testing MSE: 0.00024548560031689703
Predicting seq:6 testing MSE: 6.998430035309866e-05
Predicting seq:7 testing MSE: 6.735475471941754e-05
Predicting seq:8 testing MSE: 0.00026667071506381035
Predicting seq:9 testing MSE: 0.0001307867787545547
output.shape
(10, 516, 1)
start training from seq:50(190408) - seq:59(190423)
17:43:03.945204 repeat=0 training finished, training MSE=0.23516016881912946
17:43:04.812386 repeat=1 training finished, training MSE=0.12606531381570676
17:43:05.667664 repeat=2 training finished, training MSE=0.08593587820820782
17:43:06.537053 repeat=3 training finished, training MSE=0.06490012675258186
17:43:07.399565 repeat=4 training finished, training MSE=0.05208423426331137
np_all_outputs.shape
(5, 10, 516, 1)
np_all_outputs.shape
(50, 516, 1)
timestamps.shape
(60, 516, 1)
value_with_timestamp_price
(50, 516, 3)
test finished, the ema of testing error:0.00029197106225104175
find best profit:0

17:48:17.941657 repeat=3 training finished, training MSE=0.16411546639974403
17:48:18.551003 repeat=4 training finished, training MSE=0.13215501392347506
start predicting from seq:10(190211) - seq:19(190222)
Predicting seq:0 testing MSE: 0.0017737431917339563
Predicting seq:1 testing MSE: 0.0019612275063991547
Predicting seq:2 testing MSE: 0.0017645584885030985
Predicting seq:3 testing MSE: 0.0016691311029717326
Predicting seq:4 testing MSE: 0.0023453901521861553
Predicting seq:5 testing MSE: 0.002043679356575012
Predicting seq:6 testing MSE: 0.0020249022636562586
Predicting seq:7 testing MSE: 0.001860428717918694
Predicting seq:8 testing MSE: 0.0017433989560231566
Predicting seq:9 testing MSE: 0.002316598081961274
output.shape
(10, 516, 1)
start training from seq:10(190211) - seq:19(190222)
17:48:19.984957 repeat=0 training finished, training MSE=0.5342005610466003
17:48:20.590632 repeat=1 training finished, training MSE=0.31268641784845386
17:48:21.203311 repeat=2 training finished, 

17:49:22.014220 repeat=3 training finished, training MSE=0.06799943847654503
17:49:22.526296 repeat=4 training finished, training MSE=0.05499910035403446
start predicting from seq:30(190311) - seq:39(190322)
Predicting seq:0 testing MSE: 0.0027389791794121265
Predicting seq:1 testing MSE: 0.0002394871844444424
Predicting seq:2 testing MSE: 0.0005596081027761102
Predicting seq:3 testing MSE: 0.0009532097610644996
Predicting seq:4 testing MSE: 0.0014072532067075372
Predicting seq:5 testing MSE: 0.002754481742158532
Predicting seq:6 testing MSE: 0.0002352683077333495
Predicting seq:7 testing MSE: 0.0005378362257033587
Predicting seq:8 testing MSE: 0.0010908767580986023
Predicting seq:9 testing MSE: 0.0017063779523596168
output.shape
(10, 516, 1)
start training from seq:30(190311) - seq:39(190322)
17:49:23.691020 repeat=0 training finished, training MSE=0.21405537379905581
17:49:24.202507 repeat=1 training finished, training MSE=0.12781998253194615
17:49:24.715112 repeat=2 training finishe

17:52:12.827965 repeat=0 training finished, training MSE=0.0775915474339854
17:52:14.776561 repeat=1 training finished, training MSE=0.04110940377504448
17:52:16.710532 repeat=2 training finished, training MSE=0.027902760488844554
17:52:18.633617 repeat=3 training finished, training MSE=0.021179498198944203
17:52:20.567520 repeat=4 training finished, training MSE=0.017118459977791645
np_all_outputs.shape
(4, 10, 504, 1)
np_all_outputs.shape
(40, 504, 1)
timestamps.shape
(60, 504, 1)
value_with_timestamp_price
(40, 504, 3)
test finished, the ema of testing error:0.0016652897214987578
find best profit:1.0442879605973878
find best profit:1.0539812680792022
find best profit:1.0662909310197282
find best profit:1.074389951659687
find best profit:1.076952194337183
find best profit:1.079506285786905
find best profit:1.0799540329118782
find best profit:1.0841374802464643
total profit=1.0841374802464643, profit/day=0.002103437006161607 error=0.0016652897214987578
n_neurons:140.0,learning_rate:0.

find best profit:1.0749199516320425
find best profit:1.0756575811097027
find best profit:1.0927248131872414
total profit=1.0927248131872414, profit/day=0.001854496263744827 error=0.0024316136902913115
n_neurons:140.0,learning_rate:0.001,num_layers:3.0,rnn_type:0.0,learning_period:20.0,prediction_period:10.0,n_repeats:3.0,beta:98.0,ema:5.0,time_format:2.0,volume_input:0.0,use_centralized_bid:0.0,split_daily_data:1.0,
start training: training_seq:60, learning_seq:20, prediction_seq:10 last_training_date:190423
start training from seq:0(190128) - seq:19(190222)
17:56:37.072731 repeat=0 training finished, training MSE=0.047715575058828105
17:56:43.235206 repeat=1 training finished, training MSE=0.025390159209928244
17:56:49.372603 repeat=2 training finished, training MSE=0.017520658176120682
start predicting from seq:20(190225) - seq:29(190308)
Predicting seq:0 testing MSE: 0.000542889058124274
Predicting seq:1 testing MSE: 0.0007212423370219767
Predicting seq:2 testing MSE: 0.000799234607

17:59:51.584650 repeat=0 training finished, training MSE=0.3132318152114749
17:59:53.851509 repeat=1 training finished, training MSE=0.1798866025870666
17:59:56.121132 repeat=2 training finished, training MSE=0.12333664008571456
17:59:58.395152 repeat=3 training finished, training MSE=0.09327175679318316
18:00:00.667134 repeat=4 training finished, training MSE=0.07502775141736492
start predicting from seq:30(190311) - seq:34(190315)
Predicting seq:0 testing MSE: 0.00019011979748029262
Predicting seq:1 testing MSE: 0.0005811742739751935
Predicting seq:2 testing MSE: 0.0011020353995263577
Predicting seq:3 testing MSE: 0.0012121782638132572
Predicting seq:4 testing MSE: 0.0021443692967295647
output.shape
(5, 504, 1)
start training from seq:25(190304) - seq:34(190315)
18:00:03.962857 repeat=0 training finished, training MSE=0.33354726489633324
18:00:06.241535 repeat=1 training finished, training MSE=0.19560840562917292
18:00:08.546957 repeat=2 training finished, training MSE=0.135131969582

18:02:51.332548 repeat=0 training finished, training MSE=0.16530055198818444
18:02:52.073082 repeat=1 training finished, training MSE=0.08743605558993295
18:02:52.802598 repeat=2 training finished, training MSE=0.05961482172424439
18:02:53.539900 repeat=3 training finished, training MSE=0.04502064872031042
18:02:54.281978 repeat=4 training finished, training MSE=0.036157948661712
start predicting from seq:25(190304) - seq:29(190308)
Predicting seq:0 testing MSE: 0.002098113065585494
Predicting seq:1 testing MSE: 0.0002909913891926408
Predicting seq:2 testing MSE: 0.00014127236499916762
Predicting seq:3 testing MSE: 0.00024369728635065258
Predicting seq:4 testing MSE: 0.00019929884001612663
output.shape
(5, 516, 1)
start training from seq:20(190225) - seq:29(190308)
18:02:55.761947 repeat=0 training finished, training MSE=0.16477076448500155
18:02:56.489971 repeat=1 training finished, training MSE=0.08705234763910993
18:02:57.215222 repeat=2 training finished, training MSE=0.05923687648

18:04:38.930076 repeat=0 training finished, training MSE=0.08497368478856515
18:04:40.348237 repeat=1 training finished, training MSE=0.04352289222697436
18:04:41.771490 repeat=2 training finished, training MSE=0.02927836272477483
start predicting from seq:35(190318) - seq:39(190322)
Predicting seq:0 testing MSE: 0.0002511246711947024
Predicting seq:1 testing MSE: 0.00029718439327552915
Predicting seq:2 testing MSE: 0.0002563342859502882
Predicting seq:3 testing MSE: 0.0002749679842963815
Predicting seq:4 testing MSE: 0.00034500687615945935
output.shape
(5, 504, 1)
start training from seq:20(190225) - seq:39(190322)
18:04:43.747908 repeat=0 training finished, training MSE=0.08256643564236583
18:04:45.154701 repeat=1 training finished, training MSE=0.04221173604892101
18:04:46.568130 repeat=2 training finished, training MSE=0.028391440388319703
start predicting from seq:40(190325) - seq:44(190329)
Predicting seq:0 testing MSE: 0.0003295988426543772
Predicting seq:1 testing MSE: 0.000230

18:08:05.494791 repeat=0 training finished, training MSE=0.08479112525819801
18:08:06.907157 repeat=1 training finished, training MSE=0.04339987909443153
18:08:08.331694 repeat=2 training finished, training MSE=0.029211841809592444
start predicting from seq:50(190408) - seq:54(190412)
Predicting seq:0 testing MSE: 0.00017541018314659595
Predicting seq:1 testing MSE: 0.00023201224394142628
Predicting seq:2 testing MSE: 0.0002742143697105348
Predicting seq:3 testing MSE: 0.00022095527674537152
Predicting seq:4 testing MSE: 0.00026398932095617056
output.shape
(5, 504, 1)
start training from seq:35(190318) - seq:54(190412)
18:08:10.325106 repeat=0 training finished, training MSE=0.08505078099769889
18:08:11.751608 repeat=1 training finished, training MSE=0.043508880940134984
18:08:13.176096 repeat=2 training finished, training MSE=0.02926056104382345
start predicting from seq:55(190415) - seq:59(190423)
Predicting seq:0 testing MSE: 0.0005710856057703495
Predicting seq:1 testing MSE: 0.000