In [1]:
import numpy as np
import pandas as pd
from numpy import array
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import time

In [2]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
        # check if we are beyond the sequence
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, 0]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [3]:
def evaluate_forecasts(actual, predicted):
    scores = list()
    # calculate an RMSE score for each day
    for i in range(actual.shape[1]):
        # calculate mse
        mse = metrics.mean_squared_error(actual[:, i], predicted[:, i])
        # calculate rmse
        rmse = np.sqrt(mse)
        # store
        scores.append(rmse)
    # calculate overall RMSE
    s = 0
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += (actual[row, col] - predicted[row, col])**2
    score = np.sqrt(s / (actual.shape[0] * actual.shape[1]))
    return score, scores

In [None]:
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
from numpy import split
from numpy import array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import concatenate
import tensorflow as tf
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.layers import TimeDistributed

df=pd.read_csv('D:\\Ethereum project\\Data Processing\\5-original2016-43.csv', header=0, parse_dates=['Date'], index_col=['Date'])

# starting time
start = time.time()

#start analysis data from 1/1/2018
df=df.loc['2018-01-01':,:]

# Perform Log Transformation
df=np.log(df+1)

n_input = 14
n_out = 1

scaler=MinMaxScaler(feature_range=(0, 1))
df_norm = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
df_norm.index=df.index

# restructure into windows of weekly data
train, test = df_norm.loc['2018-01-01':'2020-12-27'],  df_norm.loc['2020-12-28':'2021-05-30']

train = array(split(train, len(train)/n_out))
test = array(split(test, len(test)/n_out))

train_index = df_norm.loc['2018-01-01':'2020-12-27'].index
test_index = df_norm.loc['2020-12-28':'2021-05-30'].index
y_train = train[:,:,0].reshape(train.shape[0]*train.shape[1]) 


# convert history into inputs and outputs
# flatten data
sequences = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
train_x, train_y= split_sequences(sequences, n_input, n_out)
n_features = train_x.shape[2]


#--------------------------
# Multi-headed MLP
#--------------------------
ModelType='Multi-headed MLP'
# separate input data
X=[train_x[:, :, i] for i in range(train_x.shape[2])]

# first input model
visible= [Input(shape=(n_input,)) for i in range(n_features)]
dense=[Dense(50, activation='relu')(visible[i]) for i in range(n_features)]

# merge input models
merge = concatenate([dense[i] for i in range(train_x.shape[2])])
output = Dense(n_out)(merge)
model_mlp = Model(inputs=[visible[i] for i in range(train_x.shape[2])], outputs=output)
model_mlp.compile(optimizer='Adam', loss='mse')
# fit model
model_mlp.fit(X, train_y, epochs=2000, verbose=0)

#--------------------------
# Vanilla LSTM
#--------------------------

model_vlstm = Sequential()
model_vlstm.add(LSTM(50, activation='relu', input_shape=(n_input, n_features)))
model_vlstm.add(Dense(n_out))
model_vlstm.compile(optimizer='adam', loss='mse')
# fit model
model_vlstm.fit(train_x, train_y, epochs=200, verbose=0)

#--------------------------
# Stacked LSTM
#--------------------------
# define model
model_slstm = Sequential()
model_slstm.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_input,
n_features)))
model_slstm.add(LSTM(50, activation='relu'))
model_slstm.add(Dense(n_out))
model_slstm.compile(optimizer='adam', loss='mse')
# fit model
model_slstm.fit(train_x, train_y, epochs=200, verbose=0)


#--------------------------
# Bidirectional LSTM
#--------------------------
# define model
model_bdlstm = Sequential()
model_bdlstm.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(n_input, n_features)))
model_bdlstm.add(Dense(n_out))
model_bdlstm.compile(optimizer='adam', loss='mse')
# fit model
model_bdlstm.fit(train_x, train_y, epochs=200, verbose=0)

#--------------------------
# Encoder-Decoder LSTM
#--------------------------
# define model
model_edlstm = Sequential()
model_edlstm.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
model_edlstm.add(RepeatVector(n_out))
model_edlstm.add(LSTM(100, activation='relu', return_sequences=True))
model_edlstm.add(TimeDistributed(Dense(1)))
model_edlstm.compile(optimizer='adam', loss='mse')
# fit model
model_edlstm.fit(train_x, train_y, epochs=200, verbose=0)


# evaluate All model
# history is a list of weekly data
history = [x for x in train]
Model_Names= [('mlp', 'Multi-Headed-MLP'), ('vlstm', 'Vanilla-LSTM'), ('slstm', 'Stacked-LSTM'), 
            ('bdlstm', 'Bi-Directional-LSTM'), ('edlstm', 'Encoder-Decoder-LSTM')]
for m in Model_Names:
    # walk-forward validation over each week
    predictions = list()
    for i in range(len(test)):
        # predict the week
        # flatten data
        data = array(history)
        data = data.reshape((data.shape[0] * data.shape[1] , data.shape[2]))
        # retrieve last observations for input data
        input_x = data[-n_input:, :]
        # reshape into n input arrays
        if ModelType= 'Multi-headed MLP':
            input_x=[input_x[:, i].reshape((1, n_input)) for i in range(n_features)] 
        else:
            input_x = input_x.reshape((1, n_input, n_features))
        yhat = model.predict(input_x, verbose=0)
        # we only want the vector forecast
        yhat = yhat[0]
        # store the predictions
        predictions.append(yhat)
        # get real observation and add to history for predicting the next week
        history.append(test[i, :])
    # evaluate predictions days for each week
    predictions = array(predictions)
    score, scores = evaluate_forecasts(test[:, :, 0], predictions)
    


    name='MLP Multivariate'
# summarize_scores
    s_scores = ', '.join(['%.1f' % s for s in scores])
    print('%s: [%.3f] %s' % (name, score, s_scores))


#days = ['mon', 'tue', 'wed', 'thr', 'fri', 'sat', 'sun']
#plt.plot(days, scores, marker='o', label='MLP')
#plt.show()         

# Plot normalized outputs
#plt.plot(train_index, y_train, label='Training-norm')
#plt.plot(test_index, predictions.reshape(predictions.shape[0]*predictions.shape[1]), label='Test Prediction-norm')
#plt.plot(test_index, test[:, :, 0].reshape(test.shape[0]*test.shape[1]), label='Test Targets-norm')
#plt.legend()
#plt.show()

# Plot rescaled outputs
    y_trainrev=(scaler.inverse_transform(train.reshape(train.shape[0]*train.shape[1],train.shape[2] ))[:,0])
    y_testrev=(scaler.inverse_transform(test.reshape(test.shape[0]*test.shape[1],test.shape[2] ))[:,0])
    y_predrev=(scaler.inverse_transform(np.concatenate((predictions.reshape(predictions.shape[0]*predictions.shape[1]).reshape(-1, 1), np.zeros((154,42))), axis=1))[:,0])

    plt.figure(figsize=(16,8), dpi=100)
    plt.plot(train_index, y_trainrev, label='Training')
    plt.plot(test_index, y_predrev, label='Test Prediction')
    plt.plot(test_index, y_testrev, label='Test Targets')
    plt.gca().set(title='Prediction Using: Multi-Headed MLP ( Log Transformation, 1 Day Ahead) ', xlabel='Date', ylabel='Ether Price $')
    plt.legend()
    plt.show()

    # end time
    end = time.time()

    rmse = np.sqrt(np.mean((y_testrev-y_predrev)**2))
    ape = np.mean(np.abs((y_testrev-y_predrev)*100/y_testrev))
    print('Total RMSE is: ' , rmse)
    print('Total APE is: ' , ape)
    # total time taken
    print(f"Runtime of the program is {end - start}")