In [9]:
import numpy as np
import tensorflow as tf
import copy
from sklearn import preprocessing
import datetime
import pickle

In [10]:
class NetAttributes:
    def __init__(self, n_neurons = 100, 
                 learning_rate = 0.003, 
                 num_layers = 1,
                 rnn_type = 2,
                 n_repeats = 2):
        self.n_neurons = n_neurons;
        self.learning_rate = learning_rate;
        self.num_layers = num_layers;
        self.rnn_type = rnn_type;
        self.n_repeats = n_repeats
        self.n_steps = None
        self.n_inputs = None
        self.n_outputs = 1
        
    def set_input_dimension(self, n_steps, n_inputs):
        self.n_steps = n_steps
        self.n_inputs = n_inputs


In [11]:
class NetStates:
    def __init__(self):
        self.prediction_states = None
        self.training_states = None
    

In [12]:
class StatefulLstmModel:
    def __init__(self,
                n_neurons=100,
                learning_rate=0.002,
                num_layers=2,
                rnn_type=1,
                n_repeats=30):

        self.net_attributes = NetAttributes(n_neurons,
                                   learning_rate,
                                   num_layers,
                                   rnn_type,
                                   n_repeats)
        self.net_states = NetStates()
        self.model_initialized = False
        self.sess = None
    
    def __del__(self):
        if self.sess != None:
            self.sess.close()
    
    def get_batch(self, seq_index, data_train_input, data_train_output):
        X_batch = data_train_input[seq_index:seq_index+1]
        y_batch = data_train_output[seq_index:seq_index+1]
        return X_batch, y_batch
    
    
    def initialize_layers(self):
        layers = None
        net_attributes = self.net_attributes
        if net_attributes.rnn_type == 0:
            layers = [tf.nn.rnn_cell.BasicLSTMCell(net_attributes.n_neurons) 
              for _ in range(net_attributes.num_layers)]
        elif net_attributes.rnn_type == 1:
            layers = [tf.nn.rnn_cell.LSTMCell(net_attributes.n_neurons, use_peepholes=False) 
              for _ in range(net_attributes.num_layers)]
        elif net_attributes.rnn_type == 2:
            layers = [tf.nn.rnn_cell.LSTMCell(net_attributes.n_neurons, use_peepholes=True) 
              for _ in range(net_attributes.num_layers)]
        else:
            print("WRONG")
        return layers
    
    def reset_graph(self, seed=42):
        tf.reset_default_graph()
        tf.set_random_seed(seed)
        np.random.seed(seed)
    
    def create_model(self):
        net_attributes = self.net_attributes
        self.X = tf.placeholder(tf.float32, [None, net_attributes.n_steps, net_attributes.n_inputs])
        self.y = tf.placeholder(tf.float32, [None, net_attributes.n_steps, net_attributes.n_outputs])
        layers = self.initialize_layers()
        cell = tf.nn.rnn_cell.MultiRNNCell(layers)
        self.init_state = tf.placeholder(tf.float32, [net_attributes.num_layers, 2, 1, net_attributes.n_neurons])
        
        state_per_layer_list = tf.unstack(self.init_state, axis=0)
        rnn_tuple_state = tuple(
            [tf.nn.rnn_cell.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1])
             for idx in range(net_attributes.num_layers)]
        )
        
        rnn_outputs, self.new_states = tf.nn.dynamic_rnn(cell, self.X, dtype=tf.float32, 
                                                    initial_state=rnn_tuple_state)
        
        stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, net_attributes.n_neurons])
        stacked_outputs = tf.layers.dense(stacked_rnn_outputs, net_attributes.n_outputs)
        self.outputs = tf.reshape(stacked_outputs, [-1, net_attributes.n_steps, net_attributes.n_outputs])
        
        self.loss = tf.reduce_mean(tf.square(self.outputs - self.y))
        optimizer = tf.train.AdamOptimizer(learning_rate=net_attributes.learning_rate)
        self.training_op = optimizer.minimize(self.loss)

        self.init = tf.global_variables_initializer()
        self.model_initialized = True
    
    # train the model, input is the training data for one cycle
    # input is in the shape: [days, steps, features], the features are 
    # 1. diff, 2. volume. 3. timesteps.
    def fit(self, data_train_input, data_train_output, prediction_period):
        net_attributes = self.net_attributes
        net_states = self.net_states
        n_inputs = data_train_input.shape[2]
        n_steps = data_train_input.shape[1]

        net_attributes.set_input_dimension(n_steps, n_inputs)
        batch_size = 1
        days = data_train_input.shape[0]
        
        self.reset_graph()
        self.create_model()
        my_loss_train_list = []
        sess = tf.Session()
        # TODO: load from file.

        self.init.run(session=sess)
        # if this is the first time of fit?
        if self.net_states.training_states == None:
            init_states = np.zeros((net_attributes.num_layers, 2, 1, net_attributes.n_neurons))
        else:
            init_states = self.net_states.training_states
            
        for repeat in range(net_attributes.n_repeats):
            rnn_states = copy.deepcopy(init_states)
            for seq in range(days):
                X_batch, y_batch = self.get_batch(seq, data_train_input, data_train_output)
                feed_dict = {
                        self.X: X_batch,
                        self.y: y_batch,
                        self.init_state: rnn_states}
                my_op, rnn_states, my_loss_train, my_outputs = sess.run([self.training_op, 
                          self.new_states, 
                          self.loss, 
                          self.outputs], feed_dict=feed_dict)

                my_loss_train_list.append(my_loss_train)
                # last repeat , remember the sates
                if seq+1 == prediction_period and repeat == net_attributes.n_repeats-1:
                    # next training loop starts from here
                    training_states = copy.deepcopy(rnn_states)
                my_loss_train_avg = sum(my_loss_train_list) / len(my_loss_train_list)

            print("{} repeat={} training finished, training MSE={}".format(
                datetime.datetime.now().time(),
                repeat, my_loss_train_avg))
        
        self.net_states.training_states = training_states
        self.net_states.prediction_states = rnn_states
        self.sess = sess
        return
    
    def predict_base(self, data_test_input, data_test_output=None):
        net_attributes = self.net_attributes
        net_states = self.net_states
        days = data_test_input.shape[0]
        
        rnn_states = copy.deepcopy(net_states.prediction_states)
        #X, y, init_state, init, training_op, new_states, loss, outputs = self.create_model()
        sess = self.sess
        
        my_loss_test_list = []
        input_shape = data_test_input.shape
        outputs_all_days = np.zeros((input_shape[0], input_shape[1], 1))
        for seq in range(days):
            if data_test_output is None:
                feed_dict = {
                    self.X: data_test_input[seq:seq+1],
                    self.init_state: rnn_states,
                }

                rnn_states, my_outputs = sess.run([self.new_states, self.outputs], feed_dict=feed_dict)
            else:
                feed_dict = {
                    self.X: data_test_input[seq:seq+1],
                    self.y: data_test_output[seq:seq+1],
                    self.init_state: rnn_states,
                }

                rnn_states, my_outputs, my_loss_test = sess.run([self.new_states, 
                                                                 self.outputs, self.loss], feed_dict=feed_dict)
                print("Predicting seq:{} testing MSE: {}".format(seq, my_loss_test))
            outputs_all_days[seq] = my_outputs
            
        
        return outputs_all_days
    
    def predict(self, data_test_input):
        return self.predict_base(data_test_input)
        
    def predict_and_verify(self, data_test_input, data_test_output):
        return self.predict_base(data_test_input, data_test_output)
      
    def get_attributes_filename(self, path):
        if path[-1] != '/':
            path += '/'
        return path + 'net_attributes.pkl'
    
    def get_path(self, path, date):
        if path[-1] != '/':
            path += '/'
        return path + date + '/'
    
    def get_states_filename(self, path, date):
        return self.get_path(path, date) + 'net_states.pkl'
    
    def get_model_filename(self, path, date):
        return self.get_path(path, date) + '/tf_session.ckpt'
    
    def save(self, path, date):
        saver = tf.train.Saver()
        save_path = saver.save(self.sess, self.get_model_filename(path, date))
        with open(self.get_attributes_filename(path), 'wb') as f:
            # Pickle the 'data' dictionary using the highest protocol available.
            pickle.dump(self.net_attributes, f, pickle.HIGHEST_PROTOCOL)
        with open(self.get_states_filename(path, date), 'wb') as f:
            pickle.dump(self.net_states, f, pickle.HIGHEST_PROTOCOL)
        print("Model saved in path: %s" % path)
        
            
    def load(self, path, date):
        # TODO: if date is none, load the latest.
        
        # restore hyper-params
        with open(self.get_attributes_filename(path), 'rb') as f:
            self.net_attributes = pickle.load(f)

        # restore states
        with open(self.get_states_filename(path), 'rb') as f:
            self.net_states = pickle.load(f)
        
        # 2. restore graph
        if self.model_initialized == False:
            self.reset_graph()
            self.create_model()
        
        # 3. restore session
        saver = tf.train.Saver()
        self.sess = tf.Session()
        saver.restore(self.sess, self.get_model_filename(path))
        print("Model restored.")

In [17]:
class TimeFormat:
    NONE = 0
    DAY = 1
    WEEK = 2

class DataManipulator:
    def __init__(self, beta, ema, time_format, volume_input, use_centralized_bid, 
                split_daily_data, n_training_days):
        self.beta = beta
        self.ema = ema
        self.time_format = time_format
        self.volume_input = volume_input
        self.use_centralized_bid = use_centralized_bid
        self.split_daily_data = split_daily_data
        self.n_training_days = n_training_days
        self.scaler_input = None
        self.scaler_output = None
        
    def volume_transform(self, volume_series):
        # all the volumes must bigger than 0
        assert(np.all(volume_series>=0))
        return  np.log(volume_series.astype('float')+1)

    def inverse_transform_output(self, scaled_outputs):
        ori_shape = scaled_outputs.shape
        outputs_reshaped = scaled_outputs.reshape((ori_shape[0]*ori_shape[1], 
                                                   ori_shape[2]))
        #outputs = np.exp(self.scaler_output.inverse_transform(outputs_reshaped)) - 1
        outputs = self.scaler_output.inverse_transform(outputs_reshaped)
        return outputs.reshape(ori_shape)
    
    
    def transform(self, data_all, n_inputs, n_outputs):
        orig_shape = data_all.shape
        data_train_reshape = data_all.astype('float').reshape((orig_shape[0] * orig_shape[1], orig_shape[2]))
        
        self.scaler_input = preprocessing.MinMaxScaler().fit(data_train_reshape[:,:n_inputs])
        data_train_input_scaled = self.scaler_input.transform(data_train_reshape[:,:n_inputs])
        
        # the invalid step, we change it to zero!
        data_train_input_scaled[~np.any(data_train_reshape, axis=1)] = 0
        data_train_input = data_train_input_scaled.reshape(orig_shape[0], orig_shape[1], n_inputs)
        
        self.scaler_output = preprocessing.MinMaxScaler().fit(data_train_reshape[:,-n_outputs:])
        data_train_output_scaled = self.scaler_output.transform(data_train_reshape[:,-n_outputs:])
        # the invalid step, we change it to zero!
        data_train_output_scaled[~np.any(data_train_reshape, axis=1)] = 0
        data_train_output = data_train_output_scaled.reshape(orig_shape[0], orig_shape[1], n_outputs)
        
        return data_train_input, data_train_output

    def prep_test_data(self, input_path):
        return
    
    def prep_training_data(self, input_path, stock_index):
        # load numpy file
        npy_file_name = input_path + "/ema{}_beta{}_{}.npy".format(self.ema, self.beta, stock_index)
        input_np_data = np.load(npy_file_name, allow_pickle=True)
        
        # date list
        date_list = []
        for i in range(self.n_training_days):    
            date = input_np_data[i][0][5].date().strftime("%y%m%d")
            date_list.append(date_list)
        
        
        # check if we have days more than training period
        assert(input_np_data.shape[0] >= self.n_training_days)
        # the diff is the mandatory
        input_columns = [2]
        
        time_format = self.time_format
        
        if time_format == TimeFormat.DAY:
            input_columns += [0]
        elif time_format == TimeFormat.WEEK:
            input_columns += [1]
        
        if self.volume_input == 1:
            input_columns += [3]
        
        output_columns = [4]
        timestamp_column = [5]
        price_column = [6]
        input_np_data = input_np_data[:,:,input_columns + output_columns + timestamp_column + price_column]
        
        # we must tranform the volume for it is too big.
        if self.volume_input == 1:
            input_np_data[:,:,-4] = self.volume_transform(input_np_data[:,:,-4])
        
        if self.use_centralized_bid == 0:
            # remove all the rows for centralized bid. it should be from 9.01 to 17.24, which is 516-12=504 steps
            input_np_data = input_np_data[:,7:-5,:]
            
        shape = input_np_data.shape
        n_training_sequences = self.n_training_days
        if self.split_daily_data == 1:
            assert(shape[1] % 2 == 0)
            input_np_data = input_np_data.reshape((shape[0]*2, 
                                                  int(shape[1]/2), 
                                                  shape[2]))
            # get the first date and last date
            n_training_sequences *= 2
            
        # to scale the data, but not the timestamp and price
        data_train_input, data_train_output = self.transform(input_np_data[:n_training_sequences,:,:-2], len(input_columns), 1)
        return data_train_input, data_train_output, input_np_data[:n_training_sequences,:,-2], input_np_data[:n_training_sequences,:,-1]

In [18]:
import numpy as np
from pathlib import Path
import pandas as pd
import GPy
import GPyOpt

class ValueModel:
    mixed_domain = [{'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20,160,20))},
          {'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001,0.002,0.003,0.004)},
          {'name': 'num_layers', 'type': 'discrete', 'domain': (1,2,3,4)},
          {'name': 'rnn_type', 'type': 'discrete', 'domain': (0,1,2)},
          {'name': 'learning_period', 'type': 'discrete', 'domain': (10,20,30,40)},
          {'name': 'prediction_period', 'type': 'discrete', 'domain': (1,2,3,5,10)},
          {'name': 'n_repeats', 'type': 'discrete', 'domain': (5,10,20,30,40)},
          {'name': 'beta', 'type': 'discrete', 'domain': (99, 98)},
          {'name': 'ema', 'type': 'discrete', 'domain': (1,5,10,20)},
          {'name': 'time_format', 'type': 'discrete', 'domain': (0,1,2)}, #1 for stepofday, 2 for stepofweek
          {'name': 'volume_input', 'type': 'discrete', 'domain': (0,1)},
          {'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (0,1)},
          {'name': 'split_daily_data', 'type': 'discrete', 'domain': (0,1)}
         ]
    
    mixed_domain_test = [{'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20,160,20))},
          {'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001,0.002,0.003,0.004)},
          {'name': 'num_layers', 'type': 'discrete', 'domain': (1,2,3,4)},
          {'name': 'rnn_type', 'type': 'discrete', 'domain': (0,1,2)},
          {'name': 'learning_period', 'type': 'discrete', 'domain': (10,20)},
          {'name': 'prediction_period', 'type': 'discrete', 'domain': (5,10)},
          {'name': 'n_repeats', 'type': 'discrete', 'domain': (3,5)},
          {'name': 'beta', 'type': 'discrete', 'domain': (99, 98)},
          {'name': 'ema', 'type': 'discrete', 'domain': (1,5,10,20)},
          {'name': 'time_format', 'type': 'discrete', 'domain': (0,1,2)}, #1 for stepofday, 2 for stepofweek
          {'name': 'volume_input', 'type': 'discrete', 'domain': (0,1)},
          {'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (0,1)},
          {'name': 'split_daily_data', 'type': 'discrete', 'domain': (0,1)}
         ]
    
    
    def __init__(self, stock_name, stock_index, n_training_days):
        self.stock_name = stock_name
        self.stock_index = stock_index
        self.n_training_days = n_training_days
        self.save_path = "model_{}_{}".format(stock_name, n_training_days)
        self.last_training_date = None
        self.model = None
        self.max_profit = -999.0
        return
    
    def get_parameter_str(self, X):
        parameter_str = ""
        for i in range(len(self.mixed_domain)):
            parameter_str += self.mixed_domain[i]["name"]
            parameter_str += ':'
            parameter_str += str(X[i])
            parameter_str += ','
        return parameter_str
    
    def get_max_steps(self, groups):
        max_steps = 0
        for index, df in groups:
            df_len = len(df)
            if df_len > max_steps:
                max_steps = df_len
        return max_steps

    
    def get_data_prep_desc_filename(self, path):
        return path + '/data_prep_desc.pkl'
    
    def optimize(self, input_csv_path, max_iter=300, is_test=False):
        if is_test == True:
            mixed_domain = self.mixed_domain_test
        else:
            mixed_domain = self.mixed_domain
        
        opt_handler = GPyOpt.methods.BayesianOptimization(f=self.opt_func,  # Objective function       
                                     domain=mixed_domain,          # Box-constraints of the problem
                                     initial_design_numdata = 20,   # Number data initial design
                                     acquisition_type='EI',        # Expected Improvement
                                     exact_feval = True)           # True evaluations, no sample noise
        opt_handler.run_optimization(max_iter, eps=0)
    
    def save(self):
        self.model.save(self.save_path, self.last_training_date)
        
    def load(self):
        save_path = self.save_path
        
        # iterate the path, and find out the latest date
        
    
    def opt_func(self, X_list):
        answer = np.zeros((X_list.shape[0], 1))
        for i in range(len(X_list)):
            print(self.get_parameter_str(X_list[i]))
            features = X_list[i]
            error, model, value_price = self.get_value_result(features)
            
            strategy_model = StrategyModel()
            strategy_model.optimize(value_price)
            profit = strategy_model.get_max_profit()
            profit_daily = (profit-1) / len(value_price)
            print("total profit={}, profit/day={} error={}".format(profit, profit_daily, error))
            #self.draw_step_profit_graph(self.step_profit_list, "step_profit_{}".format(answer[i][0]))
            #self.step_profit_list = []
            if profit > self.max_profit:
                print("find new opt:{}, {}".format(profit, self.get_parameter_str(X_list[i])))
                self.model = model
                self.save()
                self.max_profit = profit
                # check the optimized strategy for this model
            answer[i][0] = -profit_daily
        return answer

    def sma(self, data, window):
        """
        Calculates Simple Moving Average
        http://fxtrade.oanda.com/learn/forex-indicators/simple-moving-average
        """
        if len(data) < window:
            return None
        return sum(data[-window:]) / float(window)
    
    def ema(self, data, window):
        if len(data) < 2 * window:
            raise ValueError("data is too short")
        c = 2.0 / (window + 1)
        current_ema = self.sma(data[-window*2:-window], window)
        for value in data[-window:]:
            current_ema = (c * value) + ((1 - c) * current_ema)
        return current_ema
    
    
    def get_value_result(self, features):
        n_neurons = int(features[0])
        learning_rate = features[1]
        num_layers = int(features[2])
        rnn_type = int(features[3])
        learning_period = int(features[4])
        prediction_period = int(features[5])
        n_repeats = int(features[6])
        beta = int(features[7])
        ema = int(features[8])
        time_format = int(features[9])
        volume_input = int(features[10])
        use_centralized_bid = int(features[11])
        #split_daily_data = int(features[12])
        split_daily_data = 0
        data_manipulator = DataManipulator(beta, ema, 
                                           time_format, 
                                           volume_input, 
                                           use_centralized_bid, 
                                           split_daily_data, 
                                           self.n_training_days)
        npy_path = 'npy_files'
        data_training_input, data_training_output, timestamps, price \
            = data_manipulator.prep_training_data(npy_path, self.stock_index)
        
        # get the date list.
        date_list = []
        for i in range(len(timestamps)):
            date = timestamps[i][0].strftime("%y%m%d")
            date_list.append(date)
        

        
        # now define the network
        model = StatefulLstmModel(n_neurons, learning_rate, num_layers, rnn_type, n_repeats)
        
        assert(self.n_training_days % prediction_period == 0)
        
        n_training_seq = self.n_training_days
        n_learning_seq = learning_period
        n_prediction_seq = prediction_period
        if split_daily_data == 1:
            n_training_seq *= 2
            n_learning_seq *= 2
            n_prediction_seq *= 2
            
        self.last_training_date = date_list[-1]
        daily_errors = []
        all_outputs = []
        print("start training: training_seq:{}, learning_seq:{}, prediction_seq:{} last_training_date:{}".format(n_training_seq, 
                                                                                           n_learning_seq, 
                                                                                           n_prediction_seq,
                                                                                           self.last_training_date))
        for i in range(0, n_training_seq-n_learning_seq+1, n_prediction_seq):
            learning_end = i + n_learning_seq
            print("start training from seq:{}({}) - seq:{}({})".format(i, date_list[i], learning_end-1, date_list[learning_end-1]))
            model.fit(data_training_input[i:learning_end], data_training_output[:learning_end], n_prediction_seq)
            prediction_end = learning_end + n_prediction_seq
            if prediction_end > n_training_seq:
                break
            
            print("start predicting from seq:{}({}) - seq:{}({})".format(learning_end, date_list[learning_end], 
                                                                       prediction_end-1, date_list[prediction_end-1]))
            
            outputs = model.predict_and_verify(data_training_input[learning_end:prediction_end], 
                                     data_training_output[learning_end:prediction_end])
            print("output.shape")
            print(outputs.shape)
            all_outputs.append(outputs)
            # calculate the error for every day
            y = data_training_output[learning_end:prediction_end]
            # error is a 1-D array for the every day error
            error = np.mean(np.square(outputs-y), axis=(1,2))
        
            daily_errors += error.tolist()
            
        np_all_outputs = np.array(all_outputs)
        print("np_all_outputs.shape")
        print(np_all_outputs.shape)
        shape = np_all_outputs.shape
        
        n_predicted_days = self.n_training_days - learning_period
        if split_daily_data == 1:
            steps_per_day = data_training_input.shape[1] * 2
        else:
            steps_per_day = data_training_input.shape[1]
        
        
        np_all_outputs = np_all_outputs.reshape((n_predicted_days, steps_per_day,1))
        np_all_outputs = data_manipulator.inverse_transform_output(np_all_outputs)
        
        print("np_all_outputs.shape")
        print(np_all_outputs.shape)
        shape = timestamps.shape
        timestamps = timestamps.reshape((self.n_training_days, steps_per_day, 1))
        price = price.reshape((self.n_training_days, steps_per_day, 1))
        print("timestamps.shape")
        print(timestamps.shape)
        value_with_timestamp_price = np.concatenate((timestamps[learning_period:],
                                               np_all_outputs,
                                               price[learning_period:]), axis=2)
        print("value_with_timestamp_price")
        print(value_with_timestamp_price.shape)
        ema = self.ema(daily_errors, int(len(daily_errors)/2))
        print("test finished, the ema of testing error:{}".format(ema))
        
        return ema, model, value_with_timestamp_price
    

In [19]:
class StrategyModel:
    mixed_domain = [{'name': 'buy_threshold', 'type': 'continuous', 'domain': (0.0, 0.005)},
                 {'name': 'sell_threshold', 'type': 'continuous', 'domain': (-0.005, 0.0)},
                 {'name': 'stop_loss', 'type': 'continuous', 'domain': (-0.01,-0.003)},
                 {'name': 'stop_gain', 'type': 'continuous', 'domain': (0.002, 0.01)},
                 {'name': 'min_hold_steps', 'type': 'discrete', 'domain': range(10,100)},
         ]
    def __init__(self):
        self.max_profit = -999.0
        return

    def optimize(self, input_data):
        self.input_data = input_data
        
        myBopt = GPyOpt.methods.BayesianOptimization(self.get_profit,  # Objective function       
                                             domain=self.mixed_domain,          # Box-constraints of the problem
                                             initial_design_numdata = 30,   # Number data initial design
                                             acquisition_type='EI',        # Expected Improvement
                                             exact_feval = True)           # True evaluations, no sample noise

        myBopt.run_optimization(100,eps=0)
        return 0
        
    # the input data is in shape (days, steps, [timestamp, value, price])
    def get_profit(self, X_list):    
        buy_threshold = X_list[0][0]
        sell_threshold = X_list[0][1]
        stop_loss = X_list[0][2]
        stop_gain = X_list[0][3]
        min_hold_steps = int(X_list[0][4])
        tot_profit = 1
        tot_stock_profit = 1
        buy_step = None
        max_trades = 3
        cost = 0.00015
        n_tot_trades = 0
        for day_idx in range(len(self.input_data)):
            #print("starting day {}".format(day_idx))
            n_trades = 0
            daily_profit = 1
            state = 0
            daily_data = self.input_data[day_idx]
            for step in range(len(daily_data)):
                value = daily_data[step][1]
                price = daily_data[step][2]
                if state == 0 and n_trades<max_trades and step < len(daily_data)-min_hold_steps:
                    if value > buy_threshold:
                        buy_price = price
                        buy_step = step
                        #print("buy at step {} price:{}".format(step, price))
                        state = 1

                elif state == 1:
                    profit = (price - buy_price)/buy_price
                    if (value < sell_threshold and 
                        step - buy_step > min_hold_steps) or step == len(daily_data)-1 or \
                        profit < stop_loss or \
                        profit > stop_gain:
                        #print("sell at step {} price:{}".format(step, price))
                        profit -= cost
                        tot_profit *= (1+profit)
                        daily_profit *= (1 + profit)
                        state = 0
                        n_trades += 1
            n_tot_trades += n_trades
            last = daily_data[-1][2]
            open = daily_data[0][2]
            stock_profit = (last - open) / open
            tot_stock_profit *= (1+stock_profit)
            #print("finishing day {}, daily_profit:{}".format(day_idx, daily_profit))
        #print("{}, n_tot_trades:{} profit:{}".format(X_list, n_tot_trades, tot_profit))
        if tot_profit > self.max_profit:
            print("find new opt profit:{}".format(tot_profit))
            self.max_profit = tot_profit
        return -tot_profit
    
    def get_max_profit(self):
        return self.max_profit
    
        

In [20]:
value_model = ValueModel('Nordea', 5, 60)
value_model.optimize('.', is_test=True)

n_neurons:140.0,learning_rate:0.002,num_layers:4.0,rnn_type:1.0,learning_period:20.0,prediction_period:10.0,n_repeats:3.0,beta:98.0,ema:10.0,time_format:1.0,volume_input:1.0,use_centralized_bid:1.0,split_daily_data:0.0,
start training: training_seq:60, learning_seq:20, prediction_seq:10 last_training_date:190423
start training from seq:0(190128) - seq:19(190222)
10:13:54.034732 repeat=0 training finished, training MSE=0.3594173719291575
10:14:02.560032 repeat=1 training finished, training MSE=0.18303415074551593
10:14:11.086698 repeat=2 training finished, training MSE=0.12245971533799699
start predicting from seq:20(190225) - seq:29(190308)
Predicting seq:0 testing MSE: 0.00019021553453058004
Predicting seq:1 testing MSE: 0.00032685702899470925
Predicting seq:2 testing MSE: 0.0003697482170537114
Predicting seq:3 testing MSE: 0.0002692380512598902
Predicting seq:4 testing MSE: 0.0001827759697334841
Predicting seq:5 testing MSE: 0.004807967692613602
Predicting seq:6 testing MSE: 0.000297

Predicting seq:4 testing MSE: 0.008534597232937813
output.shape
(5, 504, 1)
start training from seq:25(190304) - seq:44(190329)
10:18:06.697282 repeat=0 training finished, training MSE=0.09241953162127174
10:18:09.695567 repeat=1 training finished, training MSE=0.047401609972439473
10:18:12.700299 repeat=2 training finished, training MSE=0.03183394663525784
start predicting from seq:45(190401) - seq:49(190405)
Predicting seq:0 testing MSE: 0.00020294563728384674
Predicting seq:1 testing MSE: 0.0004285624891053885
Predicting seq:2 testing MSE: 0.0002846477145794779
Predicting seq:3 testing MSE: 0.0003325385332573205
Predicting seq:4 testing MSE: 9.075166599359363e-05
output.shape
(5, 504, 1)
start training from seq:30(190311) - seq:49(190405)
10:18:16.935264 repeat=0 training finished, training MSE=0.09204887151136062
10:18:19.945079 repeat=1 training finished, training MSE=0.04734457044396549
10:18:22.931511 repeat=2 training finished, training MSE=0.031797271901935646
start predicting

start training: training_seq:60, learning_seq:20, prediction_seq:10 last_training_date:190423
start training from seq:0(190128) - seq:19(190222)
10:21:40.760588 repeat=0 training finished, training MSE=0.2892441611547838
10:21:44.503323 repeat=1 training finished, training MSE=0.145419438126919
10:21:48.252127 repeat=2 training finished, training MSE=0.09743149594651186
start predicting from seq:20(190225) - seq:29(190308)
Predicting seq:0 testing MSE: 0.0002835031773429364
Predicting seq:1 testing MSE: 0.0003453318204265088
Predicting seq:2 testing MSE: 0.00034150900319218636
Predicting seq:3 testing MSE: 0.0003745431313291192
Predicting seq:4 testing MSE: 0.00027142366161569953
Predicting seq:5 testing MSE: 0.0036306921392679214
Predicting seq:6 testing MSE: 0.00017707240476738662
Predicting seq:7 testing MSE: 0.00020366912940517068
Predicting seq:8 testing MSE: 0.0006065879133529961
Predicting seq:9 testing MSE: 0.00028447265503928065
output.shape
(10, 516, 1)
start training from se

10:24:56.041023 repeat=0 training finished, training MSE=0.11803870905423537
10:24:56.743024 repeat=1 training finished, training MSE=0.0673767104264698
10:24:57.445604 repeat=2 training finished, training MSE=0.04630111439910252
start predicting from seq:35(190318) - seq:39(190322)
Predicting seq:0 testing MSE: 0.006885254755616188
Predicting seq:1 testing MSE: 0.00394984008744359
Predicting seq:2 testing MSE: 0.00249671982601285
Predicting seq:3 testing MSE: 0.0016682780114933848
Predicting seq:4 testing MSE: 0.0005172400269657373
output.shape
(5, 504, 1)
start training from seq:30(190311) - seq:39(190322)
10:24:58.911228 repeat=0 training finished, training MSE=0.1169148426677566
10:24:59.608002 repeat=1 training finished, training MSE=0.06659311891708057
10:25:00.317979 repeat=2 training finished, training MSE=0.04575989511795342
start predicting from seq:40(190325) - seq:44(190329)
Predicting seq:0 testing MSE: 0.005059524904936552
Predicting seq:1 testing MSE: 0.00324308662675321

10:26:24.970029 repeat=0 training finished, training MSE=0.1334903135197237
10:26:26.430161 repeat=1 training finished, training MSE=0.07164007471001241
10:26:27.873130 repeat=2 training finished, training MSE=0.0496801340651776
10:26:29.313197 repeat=3 training finished, training MSE=0.03776803098953678
10:26:30.758739 repeat=4 training finished, training MSE=0.030460862484178505
start predicting from seq:50(190408) - seq:59(190423)
Predicting seq:0 testing MSE: 0.0005174553953111172
Predicting seq:1 testing MSE: 0.000666281848680228
Predicting seq:2 testing MSE: 0.0005696075386367738
Predicting seq:3 testing MSE: 0.0006827941397204995
Predicting seq:4 testing MSE: 0.000791208993177861
Predicting seq:5 testing MSE: 0.0012175797019153833
Predicting seq:6 testing MSE: 0.0005928309983573854
Predicting seq:7 testing MSE: 0.0004274983948562294
Predicting seq:8 testing MSE: 0.004438231233507395
Predicting seq:9 testing MSE: 0.0010518148774281144
output.shape
(10, 504, 1)
start training from

10:29:50.555515 repeat=0 training finished, training MSE=0.14019589114468545
10:29:56.651553 repeat=1 training finished, training MSE=0.07138766928765108
10:30:02.765626 repeat=2 training finished, training MSE=0.04796624276107953
10:30:08.880931 repeat=3 training finished, training MSE=0.0361486394669555
10:30:14.989968 repeat=4 training finished, training MSE=0.029060167916177305
start predicting from seq:20(190225) - seq:29(190308)
Predicting seq:0 testing MSE: 0.00024829362519085407
Predicting seq:1 testing MSE: 0.00042596293496899307
Predicting seq:2 testing MSE: 0.0006469629006460309
Predicting seq:3 testing MSE: 0.0004666101885959506
Predicting seq:4 testing MSE: 0.00030104562756605446
Predicting seq:5 testing MSE: 0.0064827012829482555
Predicting seq:6 testing MSE: 0.0004807288642041385
Predicting seq:7 testing MSE: 0.0003361202252563089
Predicting seq:8 testing MSE: 0.00060937280068174
Predicting seq:9 testing MSE: 0.000249158387305215
output.shape
(10, 504, 1)
start training 

Predicting seq:4 testing MSE: 0.00012490195513237268
output.shape
(5, 516, 1)
start training from seq:20(190225) - seq:39(190322)
10:34:00.597484 repeat=0 training finished, training MSE=0.10713419780076947
10:34:02.774941 repeat=1 training finished, training MSE=0.055640578949532934
10:34:04.910982 repeat=2 training finished, training MSE=0.03740069177668678
10:34:07.078298 repeat=3 training finished, training MSE=0.028139834756075288
10:34:09.253692 repeat=4 training finished, training MSE=0.022569852069646002
start predicting from seq:40(190325) - seq:44(190329)
Predicting seq:0 testing MSE: 0.00024179833417292684
Predicting seq:1 testing MSE: 0.0001220790873048827
Predicting seq:2 testing MSE: 0.0003398105036467314
Predicting seq:3 testing MSE: 0.0005463162669911981
Predicting seq:4 testing MSE: 0.01411448698490858
output.shape
(5, 516, 1)
start training from seq:25(190304) - seq:44(190329)
10:34:12.878159 repeat=0 training finished, training MSE=0.11029956208949443
10:34:15.062841

Predicting seq:3 testing MSE: 0.0013730996288359165
Predicting seq:4 testing MSE: 0.004681467078626156
output.shape
(5, 504, 1)
start training from seq:25(190304) - seq:44(190329)
10:36:53.990022 repeat=0 training finished, training MSE=0.07355409081210382
10:36:56.511781 repeat=1 training finished, training MSE=0.03902514779183548
10:36:59.046583 repeat=2 training finished, training MSE=0.026460287013226964
10:37:01.591533 repeat=3 training finished, training MSE=0.02003441170691076
10:37:04.099365 repeat=4 training finished, training MSE=0.016162445579539053
start predicting from seq:45(190401) - seq:49(190405)
Predicting seq:0 testing MSE: 0.00031587007106281817
Predicting seq:1 testing MSE: 0.000705897284206003
Predicting seq:2 testing MSE: 0.000439518567873165
Predicting seq:3 testing MSE: 0.0005490379990078509
Predicting seq:4 testing MSE: 0.0003111683763563633
output.shape
(5, 504, 1)
start training from seq:30(190311) - seq:49(190405)
10:37:08.372413 repeat=0 training finished,

10:42:51.443623 repeat=1 training finished, training MSE=0.03137822803109884
10:42:53.897595 repeat=2 training finished, training MSE=0.021214456687448546
start predicting from seq:20(190225) - seq:29(190308)
Predicting seq:0 testing MSE: 0.00028613966424018145
Predicting seq:1 testing MSE: 0.0004612655029632151
Predicting seq:2 testing MSE: 0.0007161463727243245
Predicting seq:3 testing MSE: 0.0004903927911072969
Predicting seq:4 testing MSE: 0.00030588306253775954
Predicting seq:5 testing MSE: 0.00714652007445693
Predicting seq:6 testing MSE: 0.0005202619358897209
Predicting seq:7 testing MSE: 0.0003511423128657043
Predicting seq:8 testing MSE: 0.0006552453269250691
Predicting seq:9 testing MSE: 0.00025801791343837976
output.shape
(10, 504, 1)
start training from seq:10(190211) - seq:29(190308)
10:42:57.880334 repeat=0 training finished, training MSE=0.0747264098841697
10:43:00.329850 repeat=1 training finished, training MSE=0.038283529203545184
10:43:02.800934 repeat=2 training fini

10:44:30.453110 repeat=2 training finished, training MSE=0.08831880179107733
start predicting from seq:50(190408) - seq:54(190412)
Predicting seq:0 testing MSE: 0.0007463213405571878
Predicting seq:1 testing MSE: 0.0010003393981605768
Predicting seq:2 testing MSE: 0.0008334593148902059
Predicting seq:3 testing MSE: 0.0009623186197131872
Predicting seq:4 testing MSE: 0.0011298012686893344
output.shape
(5, 504, 1)
start training from seq:35(190318) - seq:54(190412)
10:44:32.684651 repeat=0 training finished, training MSE=0.25128244262887167
10:44:33.981368 repeat=1 training finished, training MSE=0.12868427070134203
10:44:35.284564 repeat=2 training finished, training MSE=0.0863873299327679
start predicting from seq:55(190415) - seq:59(190423)
Predicting seq:0 testing MSE: 0.001447435119189322
Predicting seq:1 testing MSE: 0.0007182176341302693
Predicting seq:2 testing MSE: 0.0006351826013997197
Predicting seq:3 testing MSE: 0.028771832585334778
Predicting seq:4 testing MSE: 0.0014100936

10:48:46.671940 repeat=4 training finished, training MSE=0.03754503011528868
start predicting from seq:20(190225) - seq:29(190308)
Predicting seq:0 testing MSE: 0.00031549533014185727
Predicting seq:1 testing MSE: 0.00044721312588080764
Predicting seq:2 testing MSE: 0.00040562456706538796
Predicting seq:3 testing MSE: 0.0005278371972963214
Predicting seq:4 testing MSE: 0.00037737746606580913
Predicting seq:5 testing MSE: 0.006160276010632515
Predicting seq:6 testing MSE: 0.0005611057276837528
Predicting seq:7 testing MSE: 0.0008463449194096029
Predicting seq:8 testing MSE: 0.0009173372527584434
Predicting seq:9 testing MSE: 0.0003318133531138301
output.shape
(10, 504, 1)
start training from seq:20(190225) - seq:29(190308)
10:48:47.938154 repeat=0 training finished, training MSE=0.17288571221288293
10:48:48.518616 repeat=1 training finished, training MSE=0.09033705008914694
10:48:49.097764 repeat=2 training finished, training MSE=0.06093835961073637
10:48:49.689048 repeat=3 training fin

Predicting seq:5 testing MSE: 0.0011169584468007088
Predicting seq:6 testing MSE: 0.0008119617705233395
Predicting seq:7 testing MSE: 0.0006709520821459591
Predicting seq:8 testing MSE: 0.002950282534584403
Predicting seq:9 testing MSE: 0.0009775531943887472
output.shape
(10, 516, 1)
start training from seq:40(190325) - seq:59(190423)
10:51:18.102725 repeat=0 training finished, training MSE=0.14237945063505322
10:51:20.874610 repeat=1 training finished, training MSE=0.07363481883949134
10:51:24.116808 repeat=2 training finished, training MSE=0.04931061596435029
np_all_outputs.shape
(4, 10, 516, 1)
np_all_outputs.shape
(40, 516, 1)
timestamps.shape
(60, 516, 1)
value_with_timestamp_price
(40, 516, 3)
test finished, the ema of testing error:0.0012668210468862776
find new opt profit:1
total profit=1, profit/day=0.0 error=0.0012668210468862776
n_neurons:120.0,learning_rate:0.003,num_layers:3.0,rnn_type:1.0,learning_period:10.0,prediction_period:5.0,n_repeats:3.0,beta:99.0,ema:20.0,time_for

start training: training_seq:60, learning_seq:20, prediction_seq:5 last_training_date:190423
start training from seq:0(190128) - seq:19(190222)
10:55:20.530242 repeat=0 training finished, training MSE=0.06565708721172996
10:55:21.860678 repeat=1 training finished, training MSE=0.033705349585216024
10:55:23.207468 repeat=2 training finished, training MSE=0.022962361767228382
start predicting from seq:20(190225) - seq:24(190301)
Predicting seq:0 testing MSE: 0.0007500892388634384
Predicting seq:1 testing MSE: 0.0008357809274457395
Predicting seq:2 testing MSE: 0.0013331506634131074
Predicting seq:3 testing MSE: 0.0017695151036605239
Predicting seq:4 testing MSE: 0.001900375704281032
output.shape
(5, 504, 1)
start training from seq:5(190204) - seq:24(190301)
10:55:25.369652 repeat=0 training finished, training MSE=0.059590019308961927
10:55:26.723380 repeat=1 training finished, training MSE=0.03078394609328825
10:55:28.038243 repeat=2 training finished, training MSE=0.021016063709006026
s

10:58:28.079554 repeat=0 training finished, training MSE=0.08427396152255824
10:58:29.073938 repeat=1 training finished, training MSE=0.04319467139721382
10:58:30.069508 repeat=2 training finished, training MSE=0.02904106243076967
start predicting from seq:35(190318) - seq:39(190322)
Predicting seq:0 testing MSE: 0.00021002143330406398
Predicting seq:1 testing MSE: 0.00045554054668173194
Predicting seq:2 testing MSE: 0.0003132759011350572
Predicting seq:3 testing MSE: 0.00022550941503141075
Predicting seq:4 testing MSE: 0.0003515668213367462
output.shape
(5, 504, 1)
start training from seq:20(190225) - seq:39(190322)
10:58:31.852165 repeat=0 training finished, training MSE=0.08407389709900599
10:58:32.871990 repeat=1 training finished, training MSE=0.04311364784189209
10:58:33.874630 repeat=2 training finished, training MSE=0.028997864274424502
start predicting from seq:40(190325) - seq:44(190329)
Predicting seq:0 testing MSE: 0.00029975903453305364
Predicting seq:1 testing MSE: 0.0001

KeyboardInterrupt: 