Importing The Dependencies

In [1]:
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
tf.__version__
tf.random.set_seed(3)

In [2]:
dataDaily = pd.read_csv('daily.csv')
dataDaily.head()
dataWeekly = pd.read_csv('weekly.csv')
dataMonthly = pd.read_csv('monthly.csv')
dataMonthly['Day'] = dataMonthly['Day'].astype('datetime64[ns]')
dataWeekly['Day'] = dataWeekly['Day'].astype('datetime64[ns]')
dataDaily[5000:5700].tail()

Unnamed: 0,Day,Price
5695,20190826,2.23
5696,20190827,2.24
5697,20190828,2.24
5698,20190829,2.36
5699,20190830,2.33


In [3]:
df = dataMonthly['Price']
print(len(dataDaily))

6341


EDA:

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

dc = seasonal_decompose(df, model='multiplicative')
trend = dc.trend
seasonality = dc.seasonal
residual = dc.resid

plt.figure(figsize=(10, 6))
plt.grid(True)
plt.subplot(411)
plt.plot(df, label='Actual Data')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonality, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residual')
plt.legend(loc='best')

Prepping the datasets

In [4]:
s1 = MinMaxScaler(feature_range=(0, 1))

def dailySets(df, size):
  for i in range(len(df)-size):
    temp = [[item[0]] for item in df[i:i+size]]
    xData.append(temp)
    yData.append([df[i+size][0]])

def monthlySets(lookback, pred):
    dataTrain = dataDaily['Price'].to_numpy()
    xData = []
    yData = []

    for i in range(lookback, len(dataTrain)-lookback):
        temp = []
        for item in dataTrain[i-lookback:i]:
            temp.append([item])
        xData.append(temp)
        yData.append(dataTrain[i:i+pred])

    xData = tf.convert_to_tensor(xData, dtype=tf.float32)
    yData = tf.convert_to_tensor(yData, dtype=tf.float32)
    xData = s1.fit_transform(tf.reshape(xData, (-1, 1)))
    yData = s1.fit_transform(tf.reshape(yData, (-1, 1)))

    xData = tf.reshape(xData, (-1, lookback, 1))
    yData = tf.reshape(yData, (-1, pred))
    return (xData, yData)


-> LSTM Model:

Multi step:

In [5]:
model1 = tf.keras.Sequential()
model1.add(tf.keras.layers.LSTM(64, input_shape = (180,1), return_sequences = True))
model1.add(tf.keras.layers.Flatten())
model1.add(tf.keras.layers.Dense(30))
model1.compile(loss='mean_squared_error',optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=['mse', 'mae', 'mape'])
model1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 180, 64)           16896     
_________________________________________________________________
flatten (Flatten)            (None, 11520)             0         
_________________________________________________________________
dense (Dense)                (None, 30)                345630    
Total params: 362,526
Trainable params: 362,526
Non-trainable params: 0
_________________________________________________________________


In [6]:
xData, yData = monthlySets(180, 30)
xTrain = xData[:5000]
yTrain = yData[:5000]

xTest = xData[5000:]
yTest = yData[5000:]

print(xTest.shape, yTest.shape)

model1.fit(xTrain, yTrain, epochs = 5)
model1.save('monthlyModelLSTM.h5')

(981, 180, 1) (981, 30)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Single step:

In [227]:
model4 = tf.keras.Sequential()
model4.add(tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(100, 1)))
model4.add(tf.keras.layers.Flatten())
model4.add(tf.keras.layers.Dense(1))
model4.compile(loss='mean_squared_error',optimizer=tf.keras.optimizers.Adam(), metrics = ['mse', 'mae', 'mape'])
# model4.summary()

In [228]:
df = dataDaily['Price'][:6000]                 
print("Total Size:", len(df))
scaler = MinMaxScaler(feature_range=(0, 1))
dfPrice = scaler.fit_transform(np.array(df).reshape(-1, 1))
SIZE = 100

xData = []
yData = []

dailySets(dfPrice, SIZE)
xTrain = np.array(xData[:5500])
yTrain = np.array(yData[:5500])
xTest = np.array(xData[5500:])
yTest = np.array(yData[5500:])

model4.fit(xTrain, yTrain, epochs=5)
model4.save('dailyModelLSTM.h5')

Total Size: 6000
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


-> CNN Model:

Multi Step:

In [229]:
model2 = tf.keras.Sequential()
model2.add(tf.keras.layers.Conv1D(64, kernel_size=(4), input_shape=(180, 1)))
model2.add(tf.keras.layers.MaxPool1D(pool_size=3, strides=(1), padding='valid'))
model2.add(tf.keras.layers.Flatten())
model2.add(tf.keras.layers.Dense(30))
model2.compile(loss='mean_squared_error',optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=['mse', 'mae', 'mape'])
model2.summary()

Model: "sequential_56"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_35 (Conv1D)           (None, 177, 64)           320       
_________________________________________________________________
max_pooling1d_24 (MaxPooling (None, 175, 64)           0         
_________________________________________________________________
flatten_51 (Flatten)         (None, 11200)             0         
_________________________________________________________________
dense_57 (Dense)             (None, 30)                336030    
Total params: 336,350
Trainable params: 336,350
Non-trainable params: 0
_________________________________________________________________


In [254]:
xData, yData = monthlySets(180, 30)
xTrain = xData[:5000]
yTrain = yData[:5000]

xTest = xData[5000:]
yTest = yData[5000:]

print(xTest.shape, yTest.shape)


model2.fit(xTrain, yTrain, epochs = 5)
model2.save('monthlyModelCNN.h5')

(981, 180, 1) (981, 30)


Single step:

In [231]:
model5 = tf.keras.Sequential()
model5.add(tf.keras.layers.Conv1D(64, kernel_size=(4), input_shape=(100, 1)))
model5.add(tf.keras.layers.MaxPool1D(pool_size=3, strides=(1), padding='valid'))
model5.add(tf.keras.layers.Flatten())
model5.add(tf.keras.layers.Dense(1))
model5.compile(loss='mean_squared_error',optimizer=tf.keras.optimizers.Adam(), metrics = ['mse', 'mae', 'mape'])
model5.summary()

Model: "sequential_57"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_36 (Conv1D)           (None, 97, 64)            320       
_________________________________________________________________
max_pooling1d_25 (MaxPooling (None, 95, 64)            0         
_________________________________________________________________
flatten_52 (Flatten)         (None, 6080)              0         
_________________________________________________________________
dense_58 (Dense)             (None, 1)                 6081      
Total params: 6,401
Trainable params: 6,401
Non-trainable params: 0
_________________________________________________________________


In [232]:
df = dataDaily['Price'][:6000]                 
print("Total Size:", len(df))
scaler = MinMaxScaler(feature_range=(0, 1))
dfPrice = scaler.fit_transform(np.array(df).reshape(-1, 1))
SIZE = 100

xData = []
yData = []

dailySets(dfPrice, SIZE)
xTrain = np.array(xData[:5500])
yTrain = np.array(yData[:5500])
xTest = np.array(xData[5500:])
yTest = np.array(yData[5500:])

model5.fit(xTrain, yTrain, epochs=5)
model5.save('dailyModelCNN.h5')

Total Size: 6000
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


-> CNN-LSTM Hybrid Model

Multi Step

In [268]:
model3 = tf.keras.Sequential()
model3.add(tf.keras.layers.Conv1D(32, kernel_size=(3), input_shape=(180, 1)))
model3.add(tf.keras.layers.MaxPool1D(pool_size=3, strides=(1), padding='valid'))
model3.add(tf.keras.layers.LSTM(64, return_sequences=True))
model3.add(tf.keras.layers.Flatten())
model3.add(tf.keras.layers.Dense(30))
model3.compile(loss='mean_squared_error',optimizer=tf.keras.optimizers.Adam(), metrics=['mse', 'mae', 'mape'])
model3.summary()

Model: "sequential_66"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_45 (Conv1D)           (None, 178, 32)           128       
_________________________________________________________________
max_pooling1d_30 (MaxPooling (None, 176, 32)           0         
_________________________________________________________________
lstm_63 (LSTM)               (None, 176, 64)           24832     
_________________________________________________________________
flatten_61 (Flatten)         (None, 11264)             0         
_________________________________________________________________
dense_67 (Dense)             (None, 30)                337950    
Total params: 362,910
Trainable params: 362,910
Non-trainable params: 0
_________________________________________________________________


In [295]:
xData, yData = monthlySets(180, 30)
xTrain = xData[:5000]
yTrain = yData[:5000]

xTest = xData[5000:]
yTest = yData[5000:]

print(xTest.shape, yTest.shape)


model3.fit(xTrain, yTrain, epochs = 5, shuffle=True)
model3.save('monthlyModelCNN-LSTM.h5')

(981, 180, 1) (981, 30)


Single Step

In [250]:
model6 = tf.keras.Sequential()
model6.add(tf.keras.layers.Conv1D(32, kernel_size=(2), input_shape=(100, 1)))
model6.add(tf.keras.layers.LSTM(32, return_sequences=True))
model6.add(tf.keras.layers.Flatten())
model6.add(tf.keras.layers.Dense(1))
model6.compile(loss='mean_squared_error',optimizer=tf.keras.optimizers.Adam(), metrics=['mse', 'mae', 'mape'])
# model6.summary()

In [13]:
df = dataDaily['Price'][:6000]                 
print("Total Size:", len(df))
scaler = MinMaxScaler(feature_range=(0, 1))
dfPrice = scaler.fit_transform(np.array(df).reshape(-1, 1))
SIZE = 100

xData = []
yData = []

dailySets(dfPrice, SIZE)
xTrain = np.array(xData[:5500])
yTrain = np.array(yData[:5500])
xTest = np.array(xData[5500:])
yTest = np.array(yData[5500:])

model6.fit(xTrain, yTrain, epochs=5)
model6.save('dailyModelCNNLSTM.h5')

Total Size: 6000


NameError: name 'model6' is not defined

In [8]:
lstm = tf.keras.models.load_model('monthlyModelLSTM.h5')
cnn = tf.keras.models.load_model('monthlyModelCNN.h5')
cnnlstm = tf.keras.models.load_model('monthlyModelCNN-LSTM.h5')

# lstmSing = tf.keras.models.load_model('dailyModelLSTM.h5')
# cnnSing = tf.keras.models.load_model('dailyModelCNN.h5')
# cnnlstmSing = tf.keras.models.load_model('dailyModelCNNLSTM.h5')

In [16]:
from sklearn.metrics import mean_absolute_percentage_error
# plt.style.use('dark_background')

lstmPred = dataDaily['Price'].to_list()[5970:6000]
cnnPred = dataDaily['Price'].to_list()[5970:6000]
cnnlstmPred = dataDaily['Price'].to_list()[5970:6000]
truth = dataDaily['Price'].to_list()[5970:6000]

index = 10
lstmPred.extend(s1.inverse_transform(lstm.predict(np.expand_dims(xTest[index], axis=0))).flatten())
cnnPred.extend(s1.inverse_transform(cnn.predict(np.expand_dims(xTest[index], axis=0))).flatten())
cnnlstmPred.extend(s1.inverse_transform(cnnlstm.predict(np.expand_dims(xTest[index], axis=0))).flatten())
truth.extend(s1.inverse_transform(tf.reshape(yTest[index], (-1, 1))))
df = pd.DataFrame(data={'CNN': cnnPred, 'LSTM': lstmPred, 'CNN-LSTM': cnnlstmPred, 'Actual': truth})
plt.figure(figsize=(12, 8), dpi=108)
plt.grid(True)
plt.title('Multi-step Prediction Model Comparisons')
plt.plot(df, label=['CNN', 'LSTM', 'Hybrid', 'Actual Price'])
plt.legend()
plt.show()

# mapeObject = tf.keras.losses.MeanAbsolutePercentageError()
# mseObject = tf.keras.losses.MeanSquaredError()
# maeObject = tf.keras.losses.MeanAbsoluteError()
# mape = mapeObject(df['truth'], df['res']).numpy()
# mse = mseObject(df['truth'], df['res']).numpy()
# mae = maeObject(df['truth'], df['res']).numpy()



ValueError: in user code:

    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\training.py:1586 predict_function  *
        return step_function(self, iterator)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\training.py:1576 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\training.py:1569 run_step  **
        outputs = model.predict_step(data)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\training.py:1537 predict_step
        return self(x, training=False)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\sequential.py:369 call
        return super(Sequential, self).call(inputs, training=training, mask=mask)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\functional.py:414 call
        return self._run_internal_graph(
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\functional.py:550 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\007ra\.conda\envs\dataScience\lib\site-packages\keras\engine\input_spec.py:250 assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer dense is incompatible with the layer: expected axis -1 of input shape to have value 11520 but received input with shape (None, 6400)


In [10]:
end = 700

l = lstm.evaluate(xTest[:end], yTest[:end])
c = cnn.evaluate(xTest[:end], yTest[:end])
cl = cnnlstm.evaluate(xTest[:end], yTest[:end])



In [289]:
print(l, c, cl, sep = "\n")

[0.0003568771353457123, 0.0003568771353457123, 0.010468926280736923, 7.711786270141602]
[0.00038141122786328197, 0.00038141122786328197, 0.011915425769984722, 9.287092208862305]
[0.00031482407939620316, 0.00031482407939620316, 0.00955883227288723, 7.152036190032959]


Predicting 1 day into the Future:

In [11]:
def dailySets(df, size):
  for i in range(len(df)-size):
    temp = [[item[0]] for item in df[i:i+size]]
    xData.append(temp)
    yData.append([df[i+size][0]])

In [14]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error


dlstm = tf.keras.models.load_model('dailyModelLSTM.h5')
dcnn = tf.keras.models.load_model('dailyModelCNN.h5')
dcnnlstm = tf.keras.models.load_model('dailyModelCNNLSTM.h5')

analysis1 = scaler.inverse_transform(dlstm.predict(xTest)).flatten()
analysis2 = scaler.inverse_transform(dcnn.predict(xTest)).flatten()
analysis3 = scaler.inverse_transform(dcnnlstm.predict(xTest)).flatten()
truth = scaler.inverse_transform(yTest).flatten()

dfAnalysis = pd.DataFrame(data={'LSTM': analysis1[:300], 'CNN': analysis2[:300], 'CNNLSTM': analysis3[:300], 'Actual Value': truth[:300],})
plt.figure(figsize=(12, 8))
plt.grid(True)
plt.plot(dfAnalysis, label=['LSTM', 'CNN', 'Hybrid', 'Actual Price'])
plt.legend()
plt.show()

print(mean_absolute_percentage_error(truth[:300], analysis1[:300]))

NotFittedError: This MinMaxScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [305]:
ends = 300

lS = dlstm.evaluate(xTest[:ends], yTest[:ends])
cS = dcnn.evaluate(xTest[:ends], yTest[:ends])
clS = dcnnlstm.evaluate(xTest[:ends], yTest[:ends])

print(lS, cS, clS, sep='\n')

[5.1407052524155006e-05, 5.1407052524155006e-05, 0.005926220677793026, 5.115419864654541]
[3.789634502027184e-05, 3.789634502027184e-05, 0.004840700421482325, 4.173659801483154]
[5.573703674599528e-05, 5.573703674599528e-05, 0.006208572071045637, 5.385193347930908]


In [28]:
def getNextDayPred():
    lastData = dataDaily['Price'].to_numpy()[-100:]
    lastData = scaler.fit_transform(lastData.reshape(100, 1))
    day = dt.datetime.strptime(str(dataDaily['Day'].tolist()[-1]), '%Y%m%d').date()+dt.timedelta(days=1)
    return [day, scaler.inverse_transform(model4.predict(np.expand_dims(lastData, axis=0)))[0][0]]

print(getNextDayPred())

[datetime.date(2022, 3, 2), 4.353938]
