In [22]:
#@title Packages

import pandas as pd
import numpy as np
import random
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error  
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt  

In [63]:
#@tile Read and Prepare Data

def read_prepare_data(symbol):
    #read
    data = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/dataset4.csv')
    
    #we're going to use only one symbol
    data = data[data['Symbol'] == symbol].copy()
    
    #we're going to use only the price variable
    data = data[['Date', 'Open', 'High', 'Low', 'Volume', 'Close']].copy()
    
    #set date as index
    data.set_index('Date', inplace=True)

    data = data[:15]
    train_size = int(len(data)*2/3)  
    train = data[:train_size] 
    test = data[train_size:] 
    print("Data:")
    print(data.head(15))
    print("Train:")
    print(train.head())
    print("Test:")
    print(test)      
    
    #normalize
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train = pd.DataFrame(scaler.fit_transform(train), columns=train.columns, index=train.index)
    test = pd.DataFrame(scaler.transform(test), columns=test.columns, index=test.index)
    data = pd.DataFrame(scaler.transform(data), columns=data.columns, index=data.index) 

    return scaler, data, train, test

scaler, data, train, test = read_prepare_data('AAPL')

print("Train (Norm):")
print(data.head())

Data:
              Open    High     Low      Volume   Close
Date                                                  
2021-11-01  148.99  149.70  147.80  18841481.0  148.96
2021-11-02  148.66  151.57  148.65  18841481.0  150.02
2021-11-03  150.39  151.97  149.82  18841481.0  151.49
2021-11-04  151.58  152.43  150.64  18841481.0  150.96
2021-11-05  151.89  152.20  150.06  18841481.0  151.28
2021-11-06  151.89  152.20  150.06  18841481.0  151.28
2021-11-07  151.89  152.20  150.06  18841481.0  151.28
2021-11-08  151.41  151.57  150.16  18841481.0  150.44
2021-11-09  150.20  151.43  150.06  18841481.0  150.81
2021-11-10  150.02  150.13  147.85  18841481.0  147.92
2021-11-11  148.96  149.43  147.68  18841481.0  147.87
2021-11-12  148.43  150.40  147.48  18841481.0  149.99
2021-11-13  148.43  150.40  147.48  18841481.0  149.99
2021-11-14  148.43  150.40  147.48  18841481.0  149.99
2021-11-15  150.37  151.88  149.43  18841481.0  150.00
Train:
              Open    High     Low      Volume   Clo

In [57]:
#@title Create Dataset

def create_dataset(dataframe, look_back):
    dataset = dataframe.values
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back, -1]) 
        
    return np.array(dataX), np.array(dataY)

In [58]:
trainX, trainY = create_dataset(train, 2)
print(trainX)
print("...")
print(trainY)

[[[-0.79566563 -1.         -1.         -1.         -0.41736695]
  [-1.          0.36996337 -0.40140845 -1.          0.17647059]]

 [[-1.          0.36996337 -0.40140845 -1.          0.17647059]
  [ 0.07120743  0.66300366  0.42253521 -1.          1.        ]]

 [[ 0.07120743  0.66300366  0.42253521 -1.          1.        ]
  [ 0.80804954  1.          1.         -1.          0.70308123]]

 [[ 0.80804954  1.          1.         -1.          0.70308123]
  [ 1.          0.83150183  0.5915493  -1.          0.88235294]]

 [[ 1.          0.83150183  0.5915493  -1.          0.88235294]
  [ 1.          0.83150183  0.5915493  -1.          0.88235294]]

 [[ 1.          0.83150183  0.5915493  -1.          0.88235294]
  [ 1.          0.83150183  0.5915493  -1.          0.88235294]]

 [[ 1.          0.83150183  0.5915493  -1.          0.88235294]
  [ 0.70278638  0.36996337  0.66197183 -1.          0.41176471]]]
...
[1.         0.70308123 0.88235294 0.88235294 0.88235294 0.41176471
 0.61904762]


In [59]:
#@title Reshape

def reshape(train, test, look_back):
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], trainX.shape[2]))
    testX = np.reshape(testX, (testX.shape[0], testX.shape[1], testX.shape[2]))

    return trainX, trainY, testX, testY

In [60]:
#@title Forecast

def LSTM_forecast(testY, look_back, horizon, model, last_sequence):
    testY_copy = testY.copy()
    last_sequence = last_sequence.copy()
    
    for val in range(0, horizon+1):
        a = last_sequence[-look_back:]
        a = np.reshape(a, (1, look_back, last_sequence.shape[-1]))
        a_predict = model.predict(a, verbose=0)[0]
        new_row = last_sequence[-1:].copy()
        new_row[0, -1] = a_predict  
        last_sequence = np.vstack([last_sequence, new_row])
        testY_copy = np.append(testY_copy, a_predict)
    
    forecast = testY_copy[len(testY)+1:]
    return forecast

In [61]:
#@title Auxiliary Function for the LSTMs

def predict_forecast_plot(data, train, test, trainX, trainY, testX, testY, nepochs, look_back, horizon, plot_predictions, model):
    #make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    
    #forecast (get the last sequence from testX for forecasting)
    last_sequence = testX[-1]
    forecast = LSTM_forecast(testY, look_back, horizon, model, last_sequence)

    #invert predictions - need to handle multivariate data
    #create dummy arrays with the same shape as the original data for inverse transform
    dummy = np.zeros((len(trainPredict), train.shape[1]))
    dummy[:, -1] = trainPredict.flatten() 
    trainPredict = scaler.inverse_transform(dummy)[:, -1]
    
    dummy = np.zeros((len(trainY), train.shape[1]))
    dummy[:, -1] = trainY.flatten()
    trainY = scaler.inverse_transform(dummy)[:, -1]
    
    dummy = np.zeros((len(testPredict), train.shape[1]))
    dummy[:, -1] = testPredict.flatten()
    testPredict = scaler.inverse_transform(dummy)[:, -1]
    
    dummy = np.zeros((len(testY), train.shape[1]))
    dummy[:, -1] = testY.flatten()
    testY = scaler.inverse_transform(dummy)[:, -1]
    
    dummy = np.zeros((len(forecast), train.shape[1]))
    dummy[:, -1] = forecast.flatten()
    forecast = scaler.inverse_transform(dummy)[:, -1]

    #calculate root mean squared error
    trainScore = np.sqrt(mean_squared_error(trainY, trainPredict))
    print('Train Score: %.2f RMSE' % (trainScore))
    testScore = np.sqrt(mean_squared_error(testY, testPredict))
    print('Test Score: %.2f RMSE' % (testScore))

    #plot predictions
    if plot_predictions==True: 
        # Get the original Close prices
        original_data = scaler.inverse_transform(data)[:, -1]
        
        #shift train predictions for plotting
        trainPredictPlot = np.empty_like(original_data)
        trainPredictPlot[:] = np.nan
        trainPredictPlot[look_back:len(trainPredict)+look_back] = trainPredict
        
        #shift test predictions for plotting
        testPredictPlot = np.empty_like(original_data)
        testPredictPlot[:] = np.nan
        testPredictPlot[len(trainPredict)+(look_back*2)+1:len(original_data)-1] = testPredict
        
        #shift forecast for plotting
        forecastPlot = np.empty((len(original_data) + len(forecast),))
        forecastPlot[:] = np.nan
        forecastPlot[len(original_data):] = forecast
        
        #plot baseline, predictions and forecast
        plt.figure(figsize=(15,7))
        plt.plot(original_data, label='actual')
        plt.plot(trainPredictPlot, label='train set')
        plt.plot(testPredictPlot, label='test set')
        plt.plot(forecastPlot, label='forecast')
        plt.legend()
        plt.show()

    print(trainPredict)
    print("...")
    print(testPredict)  
    
    return testScore

In [62]:
#@title Train and Predict (LSTM Model 1)

def LSTM_model_1(data, train, test, look_back=1, nepochs=10, horizon=10, plot_predictions=False):
    #reshape
    trainX, trainY, testX, testY = reshape(train, test, look_back)

    #create the LSTM network (model nº 5: 1 layer with 8 neurons and 0.1 dropout rate)
    model = Sequential()
    model.add(LSTM(8, input_shape=(trainX.shape[1], trainX.shape[2])))
    model.add(Dropout(0.1))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    #model.summary()

    #fit
    model.fit(trainX, trainY, epochs=nepochs, batch_size=1, verbose=1)

    #predict, forecast and plot
    testScore = predict_forecast_plot(data, train, test, trainX, trainY, testX, testY, nepochs, look_back, horizon, plot_predictions, model)
    
    return testScore

testScore = LSTM_model_1(data, train, test, look_back=2, nepochs=10, horizon=10, plot_predictions=False)    

Epoch 1/10


  super().__init__(**kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 1.4706
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.2098 
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.2076 
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.0511 
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0683 
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.9749
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8768 
Epoch 8/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7411 
Epoch 9/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7388 
Epoch 10/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3849 
[1m1/1[0m [32m━━━━━━━━━━━━━━

  new_row[0, -1] = a_predict


Train Score: 0.73 RMSE
Test Score: 1.13 RMSE
[148.90760691 149.01934348 149.0987905  149.11927932 149.11799822
 149.11799822 149.08232397]
...
[148.86638979 148.84954151]
