In [21]:
# LSTM for international airline passengers problem with regression framing
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [22]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return np.array(dataX), np.array(dataY)

In [23]:
# fix random seed for reproducibility
tf.random.set_seed(7)

# load the dataset
dataframe = pd.read_csv('../data/history_data.csv', engine='python') #df["Close"]
dataset = dataframe[['Close']].values
dataset = dataset.astype('float32')
dataset[:5]

array([[24520.14],
       [24452.41],
       [24433.03],
       [24617.9 ],
       [24659.87]], dtype=float32)

In [24]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
dataset[:5]

array([[0.94028234],
       [0.92943263],
       [0.9263282 ],
       [0.95594215],
       [0.9626651 ]], dtype=float32)

In [25]:
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [26]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)

2022-09-12 20:10:44.372125: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-09-12 20:10:44.372418: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-12 20:10:44.530341: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)


Epoch 1/100
119/119 - 1s - loss: 0.2234
Epoch 2/100
119/119 - 0s - loss: 0.1158
Epoch 3/100
119/119 - 0s - loss: 0.0573
Epoch 4/100
119/119 - 0s - loss: 0.0400
Epoch 5/100
119/119 - 0s - loss: 0.0349
Epoch 6/100
119/119 - 0s - loss: 0.0308
Epoch 7/100
119/119 - 0s - loss: 0.0270
Epoch 8/100
119/119 - 0s - loss: 0.0233
Epoch 9/100
119/119 - 0s - loss: 0.0196
Epoch 10/100
119/119 - 0s - loss: 0.0163
Epoch 11/100
119/119 - 0s - loss: 0.0130
Epoch 12/100
119/119 - 0s - loss: 0.0102
Epoch 13/100
119/119 - 0s - loss: 0.0078
Epoch 14/100
119/119 - 0s - loss: 0.0057
Epoch 15/100
119/119 - 0s - loss: 0.0041
Epoch 16/100
119/119 - 0s - loss: 0.0030
Epoch 17/100
119/119 - 0s - loss: 0.0023
Epoch 18/100
119/119 - 0s - loss: 0.0018
Epoch 19/100
119/119 - 0s - loss: 0.0015
Epoch 20/100
119/119 - 0s - loss: 0.0013
Epoch 21/100
119/119 - 0s - loss: 0.0013
Epoch 22/100
119/119 - 0s - loss: 0.0012
Epoch 23/100
119/119 - 0s - loss: 0.0012
Epoch 24/100
119/119 - 0s - loss: 0.0012
Epoch 25/100
119/119 - 0s

<tensorflow.python.keras.callbacks.History at 0x7f8aaaf527c0>

In [27]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [28]:
ms_in_hour = 3.6e6
last_date = dataframe.iloc[-1,0]

X = testX[-1].reshape(1,1,1)

y = np.array([])
dates = np.array([])
predictions = 6
n = 0

while(n<predictions):    
    y_pred = model.predict(X)
    y = np.append(y, y_pred)

    next_date = 4 * ms_in_hour + last_date
    dates = np.append(dates, next_date)    

    X=y_pred.reshape(1,1,1)
    last_date = next_date
    n+=1

In [29]:
y = np.expand_dims(y, axis=0)
f = scaler.inverse_transform(y)
f = np.reshape(f,-1)

In [30]:
pred = pd.DataFrame(np.vstack((dates, f))).T
pred.columns = ["Time", "Forecasted Price"]
#pred["Time"] = pd.to_datetime(pred.Time, unit='ms')
pred

Unnamed: 0,Time,Forecasted Price
0,1663013000000.0,21771.946985
1,1663027000000.0,21739.077566
2,1663042000000.0,21705.989542
3,1663056000000.0,21672.712309
4,1663070000000.0,21639.278052
5,1663085000000.0,21605.721934


In [31]:
pred.to_csv("../data/ForecastedPrices.csv")