Import the necessary libraries

In [1]:
from src.loader import DataLoader, ModelLoder
from src.preprocessing import Preprocess
from src.model import CustomModel
from src.postprocessing import calculate_rmse

Provide a few variables

In [2]:
ticker = 'IPGP'
rolling_window = 20
forecast_horizon = 20
test_size = 0.3 # percentage of test data

Instantiate data loader

In [3]:
data = DataLoader(ticker='IPGP')
data.set_datadir(dirname='data/')
data.read_local(preprocessed=True)

Instantiating data loader
Reading from: data/Data_for_IPGP.csv


Instantiate preprocessor

In [4]:
preprocessor = Preprocess()
preprocessor.set_df(data.df_processed)
preprocessor.dropna()
preprocessor.set_rolling_window(rolling_window) 
preprocessor.set_test_size(test_size)

Instantiating preprocessor


Split dataframe

In [5]:
data_split = preprocessor.generate_train_test_predict_split()

Training data starts at 2007-03-13 00:00:00
Training - test split at 2016-04-05 00:00:00
Testing data ends at 2020-02-25 00:00:00

prediction data starts at 2020-02-26 00:00:00
prediction data ends at 2020-03-24 00:00:00


Check the splits

In [6]:
print()
print('first 5 rows from training split')
print(data_split['df_train'].head(5))

print()
print('first 5 rows from test split')
print(data_split['df_test'].head(5))

print()
print('first 5 rows from forecast split')
print(data_split['df_predict'].head(5))


first 5 rows from training split
            Adj Close    HL_PCT  PCT_change  Volume    MA_5    MA_20  \
Date                                                                   
2007-03-13  19.799999  2.290076    0.000000  119600  20.050  23.0290   
2007-03-14  19.090000  5.876131   -3.585854  265300  19.792  22.7305   
2007-03-15  18.020000  7.254459   -4.857450  482100  19.336  22.3795   
2007-03-16  18.230000  3.093358    1.277778  304200  19.002  22.0820   
2007-03-19  18.690001  4.547970    3.660566  231400  18.766  21.7790   

                MA_60      EMA_5    up_band  mid_band   low_band        ADX  \
Date                                                                          
2007-03-13  24.602833  22.412000  20.530665    20.050  19.569334  19.423748   
2007-03-14  24.494333  22.004000  20.576183    19.792  19.007817  20.829601   
2007-03-15  24.375500  21.538667  20.781246    19.336  17.890754  22.414110   
2007-03-16  24.269333  21.119333  20.540384    19.002  17.463616  

Set forecast horizon and caluculate sequence length for training and validation

In [7]:
# set forecast_horizon
preprocessor.set_forecast_horizon(forecast_horizon)
preprocessor.calculate_sequence_length()

Prepare training set

In [8]:
ndata_train = preprocessor.normalise_dataframe(df=data_split['df_train'], step=1, standard_norm=True)
scalers_train = ndata_train['scalers']
pdata_train = preprocessor.prepare_feature_and_label(data_list=ndata_train['normalised_data'])
train_data = pdata_train['data']
X_train = pdata_train['features']
y_train = pdata_train['labels']

Prepare test set

In [9]:
ndata_test = preprocessor.normalise_dataframe(df=data_split['df_test'], step=1, standard_norm=True)
scalers_test = ndata_test['scalers']
pdata_test = preprocessor.prepare_feature_and_label(data_list=ndata_test['normalised_data'])
test_data = pdata_test['data']
X_test = pdata_test['features']
y_test = pdata_test['labels']

Inspect shape of training and test set

In [10]:
print()
print('train_data.shape', train_data.shape)
print('X_train.shape', X_train.shape)
print('y_train.shape', y_train.shape)
print('y_train.squeeze().shape', y_train.squeeze().shape)

print()
print('test_data.shape', test_data.shape)
print('X_test.shape', X_test.shape)
print('y_test.shape', y_test.shape)
print('y_test.squeeze().shape', y_test.squeeze().shape)


train_data.shape (2244, 40, 19)
X_train.shape (2244, 20, 18)
y_train.shape (2244, 20, 1)
y_train.squeeze().shape (2244, 20)

test_data.shape (940, 40, 19)
X_test.shape (940, 20, 18)
y_test.shape (940, 20, 1)
y_test.squeeze().shape (940, 20)


Load model

In [11]:
model_loader = ModelLoder()
model_loader.set_model_dir(dirname='model/')
model_loader.set_ticker(ticker=ticker)
model_loader.set_rolling_window(rolling_window)
model_loader.set_forecast_horizon(forecast_horizon)

model_loader.read_model_local()
model = model_loader.model

Loading model from model/LSTM_IPGP_20_20.h5


Print model summary

In [12]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 400)               670400    
                                                                 
 repeat_vector_1 (RepeatVec  (None, 20, 400)           0         
 tor)                                                            
                                                                 
 lstm_2 (LSTM)               (None, 20, 400)           1281600   
                                                                 
 time_distributed_1 (TimeDi  (None, 20, 1)             401       
 stributed)                                                      
                                                                 
 dropout_1 (Dropout)         (None, 20, 1)             0         
                                                                 
 flatten_1 (Flatten)         (None, 20)               

Predict on test set

In [13]:
predicts = model.predict(X_test) # shape: (sample, output)



Calculate error on test set

In [14]:
y_test_sq = y_test.squeeze()
print('y_test_sq.shape', y_test_sq.shape)
print('predicts.shape', predicts.shape)

y_test_sq.shape (940, 20)
predicts.shape (940, 20)


In [15]:
rmse_in_usd = calculate_rmse(scalers=scalers_test, data=test_data, labels=y_test_sq, predicts=predicts, splitname='test')

The avearge root mse on test data is: 5.51


In [16]:
# window_size = rolling_window + forecast_horizon

# # plot some prediction results of the validation data set.
# nrows = 4
# ncols = 3

# f, axes = plt.subplots(nrows, ncols, sharex=True, figsize = (10,8))
# f.suptitle("Price predictions for {} in the next 20 days".format(stock_symbol), fontsize=16)

# i = 0

# for ax in axes.flatten():
   
#    ax.plot(invert_true_test[i], 'b', label = "True data",linewidth=2)
#    ax.plot(invert_predict_test[i], 'r', label = 'Prediction',linewidth=2)
   
#    i += window_size
   
# # compare the price change in the next 20 days

# gain = []  # the gain/loss is percentage terms for the next 20 days

# for p in range(0, len(invert_predict_test), 20):
#    prices = invert_predict_test[p]
#    gain.append( (prices[-1]/prices[0] - 1 ) * 100)
   
# _ = plt.hist(gain, bins='auto') 

Train model

In [17]:
# model = CustomModel()

# drop_rate=0.1
# latent_n=400
# feature_n= X_train.shape[2]

# model = model.build_model(
#     input_n=rolling_window,
#     output_n=forecast_horizon,
#     drop_rate=drop_rate,
#     latent_n=latent_n,
#     feature_n=feature_n
#     )

# # fit model here