In [297]:
import numpy as np
import tensorflow as tf
import copy
from sklearn import preprocessing
import datetime
import pickle

In [512]:
class StatefulLstmModel:
    def __init__(self,
                 tf_model_filename = None, 
                 n_neurons = 100, 
                 learning_rate = 0.003, 
                 num_layers = 1,
                 rnn_type = 2,
                 prediction_period = 1,
                 n_repeats = 4,
                ):
        self.tf_model_filename = tf_model_filename
        self.n_neurons = n_neurons;
        self.learning_rate = learning_rate;
        self.num_layers = num_layers;
        self.rnn_type = rnn_type;
        self.prediction_period = prediction_period;
        self.n_repeats = n_repeats
        self.model_initialized = False
        
        return
    
    def __del__(self):
        if self.sess != None:
            self.sess.close()
    
    def get_batch(self, seq_index, data_train_input, data_train_output):
        X_batch = data_train_input[seq_index:seq_index+1]
        y_batch = data_train_output[seq_index:seq_index+1]
        return X_batch, y_batch
    
    
    def initialize_layers(self):
        layers = None
        if self.rnn_type == 0:
            layers = [tf.nn.rnn_cell.BasicLSTMCell(self.n_neurons) 
              for _ in range(self.num_layers)]
        elif self.rnn_type == 1:
            layers = [tf.nn.rnn_cell.LSTMCell(self.n_neurons, use_peepholes=False) 
              for _ in range(self.num_layers)]
        elif self.rnn_type == 2:
            layers = [tf.nn.rnn_cell.LSTMCell(self.n_neurons, use_peepholes=True) 
              for _ in range(self.num_layers)]
        else:
            print("WRONG")
        return layers
    
    def reset_graph(self, seed=42):
        tf.reset_default_graph()
        tf.set_random_seed(seed)
        np.random.seed(seed)
    
    def create_model(self):
        self.X = tf.placeholder(tf.float32, [None, self.steps, self.n_inputs])
        self.y = tf.placeholder(tf.float32, [None, self.steps, self.n_outputs])
        layers = self.initialize_layers()
        cell = tf.nn.rnn_cell.MultiRNNCell(layers)
        self.init_state = tf.placeholder(tf.float32, [self.num_layers, 2, 1, self.n_neurons])
        
        state_per_layer_list = tf.unstack(self.init_state, axis=0)
        rnn_tuple_state = tuple(
            [tf.nn.rnn_cell.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1])
             for idx in range(self.num_layers)]
        )
        
        rnn_outputs, self.new_states = tf.nn.dynamic_rnn(cell, self.X, dtype=tf.float32, 
                                                    initial_state=rnn_tuple_state)
        
        stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, self.n_neurons])
        stacked_outputs = tf.layers.dense(stacked_rnn_outputs, self.n_outputs)
        self.outputs = tf.reshape(stacked_outputs, [-1, self.steps, self.n_outputs])
        
        self.loss = tf.reduce_mean(tf.square(self.outputs - self.y))
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.training_op = optimizer.minimize(self.loss)

        self.init = tf.global_variables_initializer()
        self.model_initialized = True
        return self.X, self.y, self.init_state, self.init, self.training_op, \
            self.new_states, self.loss, self.outputs
    
    # train the model, input is the training data for one cycle
    # input is in the shape: [days, steps, features], the features are 
    # 1. diff, 2. volume. 3. timesteps.
    def fit(self,data_train_input, data_train_output):
        n_outputs = 1
        n_inputs = data_train_input.shape[2]
        batch_size = 1
        days = data_train_input.shape[0]
        steps = data_train_input.shape[1]
        self.steps = steps
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.reset_graph()
        X, y, init_state, init, training_op, new_states, loss, outputs = self.create_model()
        my_loss_train_list = []
        sess = tf.Session()
        # TODO: load from file.
        if (self.tf_model_filename == None):
            init.run(session=sess)
            # batch size is 1
            rnn_states_before_training = np.zeros((self.num_layers, 2, 1, self.n_neurons))
        else:
            print("load from file, Not implemented")

        for repeat in range(self.n_repeats):
            rnn_states = copy.deepcopy(rnn_states_before_training)
            for seq in range(days):
                X_batch, y_batch = self.get_batch(seq, data_train_input, data_train_output)
                feed_dict = {
                        X: X_batch,
                        y: y_batch,
                        init_state: rnn_states}
                my_op, rnn_states, my_loss_train, my_outputs = sess.run([training_op, 
                          new_states, 
                          loss, 
                          outputs], feed_dict=feed_dict)

                my_loss_train_list.append(my_loss_train)
                # last repeat , remember the sates
                if seq+1 == self.prediction_period and repeat == self.n_repeats-1:
                    # next training loop starts from here
                    tmp_states = copy.deepcopy(rnn_states)
                my_loss_train_avg = sum(my_loss_train_list) / len(my_loss_train_list)

            print("{} repeat={} training finished, training MSE={}".format(
                datetime.datetime.now().time(),
                repeat, my_loss_train_avg))
            # backup the states after training.
        rnn_states_before_training = copy.deepcopy(tmp_states)
        
        self.rnn_states = rnn_states
        self.sess = sess
        return
    
    def predict(self, data_test_input, data_test_output):
        days = data_test_input.shape[0]
        rnn_states = copy.deepcopy(self.rnn_states)
        #X, y, init_state, init, training_op, new_states, loss, outputs = self.create_model()
        sess = self.sess
        
        my_loss_test_list = []
        for seq in range(days):
            feed_dict = {
                self.X: data_test_input[seq:seq+1],
                self.y: data_test_output[seq:seq+1],
                self.init_state: rnn_states,
            }
            rnn_states, my_loss_test, my_outputs = sess.run([self.new_states, self.loss, self.outputs], feed_dict=feed_dict)
            my_loss_test_list.append(my_loss_test)
            print("sequence:{} test finished, testing MSE={}".format(seq, my_loss_test))
    
    def get_nodes(self):
        return self.sess, self.X, self.y, self.init_state, self.new_states, self.loss, self.outputs
    
    def clear_nodes(self):
        self.sess = None
        self.X = None
        self.y = None
        self.init_state = None
        self.new_states = None
        self.loss = None
        self.outputs = None
    
    def set_nodes(self, sess, X, y, init_state, new_states, loss, outputs):
        self.sess = sess
        self.X = X
        self.y = y
        self.init_state = init_state
        self.new_states = new_states
        self.loss = loss
        self.outputs = outputs
    
    def set_hyper_params_and_states(self, hyper_params_and_states):
        self.__dict__.update(hyper_params_and_states)

    
    def get_hyper_params_and_states(self):
        return {
            'n_neurons': self.n_neurons,
            'learning_rate': self.learning_rate,
            'num_layers': self.num_layers,
            'rnn_type': self.rnn_type,
            'n_repeats': self.n_repeats,
            'rnn_states': self.rnn_states,
            'steps': self.steps,
            'n_inputs': self.n_inputs,
            'n_outputs': self.n_outputs
        }
    
    def save(self, filename_no_suffix):
        saver = tf.train.Saver()
        save_path = saver.save(self.sess, filename_no_suffix + '.ckpt')
        print("Model saved in path: %s" % save_path)
        hyper_params_and_states = self.get_hyper_params_and_states()
        with open(filename_no_suffix + '.pkl', 'wb') as f:
            # Pickle the 'data' dictionary using the highest protocol available.
            pickle.dump(hyper_params_and_states, f, pickle.HIGHEST_PROTOCOL)
        
            
    def load(self, filename_no_suffix):
        # 1. restore hyper-params
        with open(filename_no_suffix + '.pkl', 'rb') as f:
            hyper_params_and_states = pickle.load(f)
            print(hyper_params_and_states)
        self.set_hyper_params_and_states(hyper_params_and_states)
        
        # 2. restore graph
        if self.model_initialized == False:
            self.reset_graph()
            self.create_model()
        
        # 3. restore session
        saver = tf.train.Saver()
        self.sess = tf.Session()
        saver.restore(self.sess, filename_no_suffix + '.ckpt')
        print("Model restored.")



In [513]:
# test
model = StatefulLstmModel()

In [514]:
file_name = "np_ema{}_beta{}.npz".format(20, 99)
data_all = np.load(file_name)['arr_0']

In [515]:
def transform(data_all, n_inputs, n_outputs):
    orig_shape = data_all.shape
    data_train_reshape = data_all.reshape((orig_shape[0] * orig_shape[1], orig_shape[2]))

    scaler_input = preprocessing.MinMaxScaler().fit(data_train_reshape[:,:n_inputs])
    data_train_input_scaled = scaler_input.transform(data_train_reshape[:,:n_inputs])

    # the invalid step, we change it to zero!
    data_train_input_scaled[~np.any(data_train_reshape, axis=1)] = 0
    data_train_input = data_train_input_scaled.reshape(orig_shape[0], orig_shape[1], n_inputs)

    scaler_output = preprocessing.MinMaxScaler().fit(data_train_reshape[:,-n_outputs:])
    data_train_output_scaled = scaler_output.transform(data_train_reshape[:,-n_outputs:])
    # the invalid step, we change it to zero!
    data_train_output_scaled[~np.any(data_train_reshape, axis=1)] = 0
    data_train_output = data_train_output_scaled.reshape(orig_shape[0], orig_shape[1], n_outputs)
    return data_train_input, data_train_output, scaler_output



stock_index = 5
input_column_list = [30+stock_index]
output_column_list = [60+stock_index]
all_data = data_all[:,7:-5,input_column_list+output_column_list]
data_train_input, data_train_output, scaler_output = transform(all_data, 1, 1)



print(data_train_input.shape)
print(data_train_output.shape)
print(scaler_output.data_range_)

# TODO: do the scaling outside here!
model.fit(data_train_input[:30,:,:],data_train_output[:30,:,:] )

(65, 504, 1)
(65, 504, 1)
[0.05327712]
05:12:49.548395 repeat=0 training finished, training MSE=0.05920288419074495
05:12:55.881972 repeat=1 training finished, training MSE=0.030061603643116542
05:13:02.442431 repeat=2 training finished, training MSE=0.020167609837517374
05:13:08.370142 repeat=3 training finished, training MSE=0.015214760272829152


In [517]:
model.predict(data_train_input[30:32,:,:],data_train_output[30:32,:,:])

sequence:0 test finished, testing MSE=7.825874490663409e-05
sequence:1 test finished, testing MSE=0.00013719226990360767


In [518]:
model.save('save')

Model saved in path: save.ckpt


In [519]:
model.load('save')
model.predict(data_train_input[30:32,:,:],data_train_output[30:32,:,:])

{'n_neurons': 100, 'learning_rate': 0.003, 'num_layers': 1, 'rnn_type': 2, 'n_repeats': 4, 'rnn_states': (LSTMStateTuple(c=array([[ 0.14004055, -0.12276338,  0.1360079 ,  0.0773332 , -0.22763984,
        -0.01303453,  0.05920409, -0.03322243,  0.00050975,  0.00637852,
        -0.2481907 , -0.01804037,  0.10121535,  0.24544367, -0.09096131,
        -0.14827758,  0.2746837 , -0.09316803, -0.33776656, -0.39537546,
        -0.14279257, -0.17128304,  0.23614636,  0.06018716,  0.25628114,
         0.09274361,  0.13785239,  0.13119625,  0.0721456 ,  0.04170251,
         0.14618814, -0.00350116,  0.02993896, -0.1152178 , -0.05495078,
        -0.00412621, -0.41165295,  0.33623067,  0.10702051, -0.02188108,
        -0.1752245 , -0.07821145, -0.16271316,  0.00938086, -0.28486693,
         0.15096101, -0.22611402, -0.0671767 , -0.01272984, -0.07623836,
         0.27866238,  0.10276368,  0.1338001 , -0.20315565,  0.1383292 ,
        -0.15351345,  0.00588265,  0.02938608, -0.23711422, -0.07224143,
 

In [520]:
new_m = StatefulLstmModel()
new_m.load('save')


{'n_neurons': 100, 'learning_rate': 0.003, 'num_layers': 1, 'rnn_type': 2, 'n_repeats': 4, 'rnn_states': (LSTMStateTuple(c=array([[ 0.14004055, -0.12276338,  0.1360079 ,  0.0773332 , -0.22763984,
        -0.01303453,  0.05920409, -0.03322243,  0.00050975,  0.00637852,
        -0.2481907 , -0.01804037,  0.10121535,  0.24544367, -0.09096131,
        -0.14827758,  0.2746837 , -0.09316803, -0.33776656, -0.39537546,
        -0.14279257, -0.17128304,  0.23614636,  0.06018716,  0.25628114,
         0.09274361,  0.13785239,  0.13119625,  0.0721456 ,  0.04170251,
         0.14618814, -0.00350116,  0.02993896, -0.1152178 , -0.05495078,
        -0.00412621, -0.41165295,  0.33623067,  0.10702051, -0.02188108,
        -0.1752245 , -0.07821145, -0.16271316,  0.00938086, -0.28486693,
         0.15096101, -0.22611402, -0.0671767 , -0.01272984, -0.07623836,
         0.27866238,  0.10276368,  0.1338001 , -0.20315565,  0.1383292 ,
        -0.15351345,  0.00588265,  0.02938608, -0.23711422, -0.07224143,
 

In [521]:
new_m.predict(data_train_input[30:32,:,:],data_train_output[30:32,:,:])

sequence:0 test finished, testing MSE=7.825874490663409e-05
sequence:1 test finished, testing MSE=0.00013719226990360767
