In [41]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

%matplotlib inline
plt.rcParams['figure.figsize'] = (20,10)

# keras lib
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout, LSTM, Input, concatenate
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [56]:
df = pd.read_csv('data/600297.SS.csv')

In [206]:
class lstm_model:
    
    def __init__(self, stock: stock_data, model=None, scaler=None, tar_pos=None):
        
        # initial variables
        self.stock = stock
        self.model = model
        self.scaler = scaler
        self.tar_pos = tar_pos
        self.lstm_inputs = list(stock.data.columns)
        
        
        if not model:
            print('Model not created')
        if not scaler:
            print('lstm dataset not created')
        print('Data read: ')
        print(self.stock.data.info())
        print('please create lstm dataset using create_lstm_data()')
       
    def create_features_data(self, seq, fs, ns, windows, ts=0.1):
        self.seq= seq
        self.fs = fs
        self.trn_len = int(len(self.stock.data)*(1-ts))
        self.skip = max(max(ns), max(windows))
        
        self.stock.preprocess(ns=[1,5], windows=[1,5])
        cols = [col for col in self.stock.proc_data.columns if col not in self.stock.data.columns]
        not_return_cols = [col for col in self.stock.proc_data.columns if not 'return' not in col]
        self.n_features = len(cols)
        proc_data = pd.DataFrame()
        proc_data[cols] = self.stock.proc_data[cols][self.skip:]
        scaler = MinMaxScaler()
        proc_data[not_return_cols] = scaler.fit_transform(proc_data[not_return_cols])
        x = []
        for i in range(seq, len(proc_data)-fs):
            x.append(proc_data[i-seq:i][cols].values)
        x = np.array(x).reshape(-1, seq, self.n_features)
        self.features_xtrain, self.features_xtest = x[:self.trn_len], x[self.trn_len:]
        
        
    def create_lstm_data(self, tar):
        
        if not self.scaler:
            print('creating new lstm dataset')
        df = self.stock.data[self.lstm_inputs].copy()
        scaler = MinMaxScaler()
        df[self.lstm_inputs] = scaler.fit_transform(df[self.lstm_inputs])
        x,y = [], []
        for i in range(self.seq+self.skip, len(df)-self.fs):
            x.append(df[i-self.seq:i][self.lstm_inputs].values)
            y.append(df[i:i+self.fs][tar].values)
        
        # assigning class vars
        self.x = np.array(x).reshape(-1, self.seq, len(self.lstm_inputs))
        self.y = np.array(y).reshape(-1, self.fs)
        self.scaler = scaler
        self.tar_pos = self.lstm_inputs.index(tar)
        
        trn_len = self.trn_len
        self.xtrain, self.xtest = self.x[:trn_len], self.x[trn_len:]
        self.ytrain, self.ytest = self.y[:trn_len], self.y[trn_len:]
        
    def multi_input_model(self):
        feature_inputs = Input(shape=(self.seq, self.n_features,), name='feat_inp')
        x = LSTM(2*self.n_features, return_sequences=True)(feature_inputs)
        x = LSTM(10)(x)
        feature_outputs = Dense(30, activation='linear')(x)
        
        lstm_inputs = Input(shape=(self.seq, len(self.lstm_inputs)), name='lstm_inp')
        x = LSTM(32, return_sequences=True)(lstm_inputs)
        x = LSTM(16)(x)
        lstm_outputs = Dense(30)(x)
        
        join = concatenate([feature_outputs, lstm_outputs])
        final_output = Dense(self.fs, activation='linear')(join)
        model = Model(inputs=[feature_inputs, lstm_inputs], outputs=[final_output])
        model.compile(loss='mean_squared_error', optimizer='adam')
        self.model = model
        print(model.summary())
     
    def train_multi_input(self, bs=1, epochs=100):
        es = EarlyStopping(monitor='val_loss', patience=5)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, \
                                          patience=3, min_lr=0.0000001, verbose=1)
        self.model.fit([self.features_xtrain, self.xtrain], [self.ytrain], validation_split=0.2, \
                      batch_size=bs, epochs=epochs, callbacks=[es, reduce_lr], shuffle=False)
        
    def train(self, bs=1, epochs=100):
        if not self.model: 
            print('Model has not been built')
        else:
            es = EarlyStopping(monitor='val_loss', patience=5)
            es2 = EarlyStopping(monitor='loss', patience=10)
            reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, \
                                          patience=3, min_lr=0.0000001, verbose=1)
            self.model.fit(self.data_xtrain, self.data_ytrain, epochs=epochs, batch_size=bs, \
                           validation_split=0.1, shuffle=True, callbacks=[es, reduce_lr])
    
    def predict(self, x, inverse=False):
        preds = self.model.predict(x)
        return self.inverse(preds) if inverse else preds
        
        
    def trend_accuracy(self, x, y):
        preds = self.get_trends(self.predict(x, inverse=True))
        actuals = self.get_trends(self.inverse(y))
        return sum(1 for a,b in zip(preds, actuals) if a==b) / len(preds)
    
    # helpers
    
    def inverse(self, vals):
        dmin, dmax = self.scaler.data_min_[self.tar_pos], self.scaler.data_max_[self.tar_pos]
        low, high = self.scaler.feature_range
        inv = np.vectorize(lambda x: (x-low)/(high-low)*(dmax-dmin)+dmin)
        return np.round(inv(vals),4)
    
    def get_trends(self, x):
        x = x.squeeze()
        return [(1 if x[i+1]-x[i]>0 else -1) for i in range(len(x)-1)]
    
    def comparison_plot(self, predictions, real):
        plt.figure(figsize=(18,8))
        plt.plot(predictions, label='predictions')
        plt.plot(real, label='real')
        plt.legend(loc='best')
        plt.show()

# Testing

In [207]:
df = pd.read_csv('data/600297.SS.csv')

In [208]:
dfTest = df.query("timestamp>'2016-08-01'").copy()

In [209]:
dfTest.tail()

Unnamed: 0,timestamp,open,high,low,close,volume
674,2016-08-08,7.0231,7.0538,6.9231,7.0461,31849307
675,2016-08-05,7.1539,7.2385,7.0538,7.0615,38740006
676,2016-08-04,7.1769,7.2,7.0615,7.1769,27544422
677,2016-08-03,7.2308,7.2769,7.1385,7.1692,33128103
678,2016-08-02,7.1077,7.3,7.0538,7.2615,39938440


In [210]:
stock = stock_data(dfTest)

In [211]:
lstm = lstm_model(stock)

Model not created
lstm dataset not created
Data read: 
<class 'pandas.core.frame.DataFrame'>
Index: 679 entries, 2019-05-22 to 2016-08-02
Data columns (total 5 columns):
open      679 non-null float64
high      679 non-null float64
low       679 non-null float64
close     679 non-null float64
volume    679 non-null int64
dtypes: float64(4), int64(1)
memory usage: 31.8+ KB
None
please create lstm dataset using create_lstm_data()


In [212]:
lstm.create_features_data(10, 1, [1,5], [1,5])

In [213]:
lstm.create_lstm_data('close')

creating new lstm dataset


  return self.partial_fit(X, y)


In [216]:
np.isnan(lstm.features_xtrain).any()

True

In [204]:
lstm.multi_input_model()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
feat_inp (InputLayer)           (None, 10, 30)       0                                            
__________________________________________________________________________________________________
lstm_inp (InputLayer)           (None, 10, 5)        0                                            
__________________________________________________________________________________________________
lstm_34 (LSTM)                  (None, 10, 60)       21840       feat_inp[0][0]                   
__________________________________________________________________________________________________
lstm_36 (LSTM)                  (None, 10, 32)       4864        lstm_inp[0][0]                   
__________________________________________________________________________________________________
lstm_35 (L

In [205]:
lstm.train_multi_input()

Train on 488 samples, validate on 123 samples
Epoch 1/100


  if self.monitor_op(current - self.min_delta, self.best):


Epoch 2/100
  3/488 [..............................] - ETA: 22s - loss: nan

  self.monitor_op = lambda a, b: np.less(a, b - self.min_delta)




KeyboardInterrupt: 