In [1]:
import pandas as pd
from scipy.io import arff
import time
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
import math
from keras.layers import Conv1D, LSTM, Lambda, Dropout,Bidirectional
from keras_tuner import RandomSearch
from keras.optimizers import Adam
from sklearn.model_selection import TimeSeriesSplit
from keras.callbacks import EarlyStopping
from keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import Callback
import os
import random
import numpy as np
import tensorflow as tf
from pymannkendall import seasonal_test
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import re
from numpy import array
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras import regularizers
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from keras_tuner import HyperParameters

In [2]:
def build_model(hp,look_back=24):
    # Input layer   
    model = Sequential()  
    model.add(Conv1D(filters=hp.Int('input_units',min_value=32,max_value=256,step=32)
                     ,kernel_size=(look_back),activation='relu',input_shape=[look_back, 1]))
    model.add(LSTM(units=hp.Int('input_units',min_value=32,max_value=256,step=16),input_shape=(look_back, 1)))
    model.add(Flatten())
    model.add(Dense(1))
    # Output layer    
    model.compile(optimizer=Adam(hp.Choice('learning_rate', [1e-1, 1e-2, 1e-3])),loss='mean_absolute_error')
    from keras.utils import plot_model
    tf.keras.utils.plot_model(model, to_file='Functional_API_model.png')
   
    return model

In [3]:
class StopAtThreshold(Callback):
    def __init__(self, monitor='loss', threshold=0.01):
        super(StopAtThreshold, self).__init__()
        self.monitor = monitor
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        current = logs.get(self.monitor)
        if current is not None and current < self.threshold:
            self.model.stop_training = True

In [4]:
def set_seeds(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    # If you are using CUDA, uncomment the following 2 lines
    # os.environ['TF_DETERMINISTIC_OPS'] = '1'
    # os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

In [5]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=24):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [6]:
def LSTMTrain(data_file,training_ratio,look_back,batch_size):
    start_time = time.time()
    # Load arff file
    data, meta = arff.loadarff((f'../Data/WekaData/{data_file}.arff'))
    data_df = pd.DataFrame(data)
    
    close_index = data_df.columns.get_loc('Close')
    dataset = data_df.iloc[:, close_index:close_index+1].values  # numpy array
    dataset = dataset.astype('float32')


    # define early stopping
    early_stopping5 = EarlyStopping(monitor='val_loss', patience=5, verbose=0)


    # create an instance of our custom callback
    stop_at_threshold = StopAtThreshold(monitor='val_loss', threshold=0.015)
    

    set_seeds(1234)  

    # result = seasonal_test(data_df['Close'])
    # print(result)
    # trend, h, p, z, Tau, s, var_s, slope, intercept = result

    # params=re.split(r'\s*,\s', "trend, h, p, z, Tau, s, var_s, slope, intercept")
    # for pr in params:
    #     print(f'{pr}={eval(pr)}')




    # because it's multiplicative, so apply np.log
    dataset = np.log(dataset)

    # Initialize a scaler for the dataset
    #scaler = MinMaxScaler(feature_range=(0, 1))
    # Z-score normalization is useful when the data has outliers or when the distribution of the data is not known. 
    scaler = StandardScaler() 

    # Fit and transform the data to the scaler
    # Split into train and test sets

    training_ratio = training_ratio
    train_data, test_data = train_test_split(dataset, train_size=training_ratio, shuffle=False)

    # Fit the scaler to the training data and transform the training data
    train = scaler.fit_transform(train_data)

    # Use the same scaler to transform the test data
    test = scaler.transform(test_data)
    print(train.shape, test.shape)### Using Multiple Layer Perceptron



    # reshape dataset
    look_back = look_back
    trainX, trainY = create_dataset(train, look_back)
    test_data_with_look_back = np.concatenate((train[-look_back:], test))

    # Create testing data, starting with the end of the training data
    testX, testY = create_dataset(test_data_with_look_back, look_back)

    # create a TimeSeriesSplit object
    tscv = TimeSeriesSplit(n_splits=5)

    tuner = RandomSearch(
        build_model,
        objective='val_loss',
        max_trials=5,
        executions_per_trial=3,
        project_name='bitcoin')

    # define early stopping
    early_stopping15 = EarlyStopping(monitor='val_loss', patience=15, verbose=0)
    # create an instance of our custom callback
    stop_at_threshold = StopAtThreshold(monitor='val_loss', threshold=0.01)
    # perform hyperparameter tuning with time series cross-validation
    for train_index, val_index in tscv.split(trainX):
        X_train, X_val = trainX[train_index], trainX[val_index]
        y_train, y_val = trainY[train_index], trainY[val_index]
        tuner.search(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=10,
            callbacks=[early_stopping15]
            #callbacks=[stop_at_threshold]
        )

    # tuner.search_space_summary()
    # get the best hyperparameters
    best_hp = tuner.get_best_hyperparameters()[0]

    # get the best trial
    best_trial = tuner.oracle.get_best_trials()[0]



    # get the score of the best trial
    best_score = best_trial.score

    # print the score of the best trial
    print(f"Best score: {best_score}")

    # print the values of the best hyperparameters
    for hp in best_hp.values:
        print(f"{hp}: {best_hp.get(hp)}")


    # define early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=15, verbose=0)
    # create an instance of our custom callback
    stop_at_threshold = StopAtThreshold(monitor='val_loss', threshold=0.01)



    ntrainX, valX, ntrainY, valY = train_test_split(trainX, trainY, test_size=0.1, shuffle=False)

    #start_time = time.time()

    # create a new HyperParameters object
    new_hp = HyperParameters()

    # set the hyperparameters to the desired values
    # new_hp.Fixed('input_units', 228)
    # new_hp.Fixed('learning_rate', 0.01)
    new_hp.Fixed('input_units', best_hp.values['input_units'])
    new_hp.Fixed('learning_rate', best_hp.values['learning_rate'])

    # build a new model with the specified hyperparameters
    model = build_model(new_hp,look_back)

    # build the best model
    # model = build_model(best_hp)

    # fit the model with early stopping
    history = model.fit(
        ntrainX, ntrainY,
        validation_data=(valX, valY),
        epochs=1000,
        batch_size=batch_size, 
        verbose=0,
        callbacks=[early_stopping15]
        #callbacks=[stop_at_threshold]
    )

    # generate predictions for training
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)

    # Inverse transform the predictions to original scale
    trainPredict_orig = np.exp(scaler.inverse_transform(trainPredict))
    trainY_orig = np.exp(scaler.inverse_transform([trainY]))
    train_mse = mean_squared_error(trainY_orig[0], trainPredict_orig[:,0])
    train_mae = mean_absolute_error(trainY_orig[0], trainPredict_orig[:,0])
    print(f'train MSE: {train_mse:.4f}, RMSE: {math.sqrt(train_mse):.4f}, MAE: {train_mae:.4f}')

    testPredict_orig = np.exp(scaler.inverse_transform(testPredict))
    testY_orig = np.exp(scaler.inverse_transform([testY]))

    # Now you can calculate your evaluation metrics on the original scale
    test_mse = mean_squared_error(testY_orig[0], testPredict_orig[:,0])
    test_mae = mean_absolute_error(testY_orig[0], testPredict_orig[:,0])
    print(f'test MSE: {test_mse:.4f}, RMSE: {math.sqrt(test_mse):.4f}, MAE: {test_mae:.4f}')
    end_time = time.time()
    elapse = end_time-start_time
    print(f'Total time: {elapse//60} minutes, {elapse%60:.4f} seconds.')
    return best_score,best_hp.values['input_units'],best_hp.values['learning_rate'],math.sqrt(train_mse),train_mae,math.sqrt(test_mse),test_mae

In [7]:
# load a dataset
# data_file=['BTCUSD-all','BTCUSD-N2Y','BTCUSD-N4Y',
#            'ETHUSD-all','ETHUSD-N2Y','ETHUSD-N4Y',           
#           'USDTUSD-all','USDTUSD-N2Y','USDTUSD-N4Y',  
#            'BNBUSD-all','BNBUSD-N2Y','BNBUSD-N4Y']
# data_file=['BTCUSD-1m1h','ETHUSD-1m1h','USDTUSD-1m1h','BNBUSD-1m1h']
data_file=['BTCUSD-1m1h']

training_ratio=[0.7]
look_backs=[24]
batch_size=[4]


data_file_list= list()
training_ratio_list= list()
look_back_list= list()
batch_size_list= list()

best_score_list= list()
input_units_list= list()
learning_rate_list= list()

train_rmse_list= list()
train_mae_list= list()
test_rmse_list= list()
test_mae_list= list()

for df in data_file:
    for tr in training_ratio:
        for lb in look_backs:
            for bs in batch_size:
                data_file_list.append(df)
                training_ratio_list.append(tr)
                look_back_list.append(lb)
                batch_size_list.append(bs)
                resultLSTM=LSTMTrain(df,tr,lb,bs)
                best_score_list.append(resultLSTM[0])
                input_units_list.append(resultLSTM[1])
                learning_rate_list.append(resultLSTM[2])
                train_rmse_list.append(resultLSTM[3])
                train_mae_list.append(resultLSTM[4])
                test_rmse_list.append(resultLSTM[5])
                test_mae_list.append(resultLSTM[6])


Trial 5 Complete [00h 00m 09s]
val_loss: 0.12834186851978302

Best val_loss So Far: 0.12834186851978302
Total elapsed time: 00h 00m 44s
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Oracle triggered exit
Best score: 0.12834186851978302
input_units: 192
learning_rate: 0.01
train MSE: 39789.8176, RMSE: 199.4739, MAE: 144.6048
test MSE: 398841.4326, RMSE: 631.5389, MAE: 558.4357
Total time: 0.0 minutes, 51.7375 seconds.


In [67]:
Summary={'Data':data_file_list,'training ratio':training_ratio_list,'look back':look_back_list,
         'batch_size_list':batch_size_list,
            'train_mae':train_mae_list,          
            'train_rmse':train_rmse_list,          
            'test_mae':test_mae_list,           
            'test_rmse':test_rmse_list
            
         }
df_Summary = pd.DataFrame(Summary)

In [68]:

df_Summary.to_excel("Summary-CNN+LSTM-(1m1h-24lb).xlsx",index=False)
df_Summary

Unnamed: 0,Data,training ratio,look back,batch_size_list,train_mae,train_rmse,test_mae,test_rmse
0,BTCUSD-1m1h,0.7,24,4,119.642141,173.820768,429.995627,523.569952
1,BTCUSD-1m1h,0.7,24,8,179.109317,257.474806,400.895784,480.396552
2,BTCUSD-1m1h,0.7,24,16,146.209726,194.865382,305.811004,373.974934
3,BTCUSD-1m1h,0.7,24,32,115.172986,166.544759,358.444281,422.973337
4,BTCUSD-1m1h,0.8,24,4,186.508771,259.66284,311.29375,365.268243
5,BTCUSD-1m1h,0.8,24,8,124.355534,174.517744,236.112293,276.622973
6,BTCUSD-1m1h,0.8,24,16,145.245197,200.220728,456.220211,499.232285
7,BTCUSD-1m1h,0.8,24,32,128.581993,184.425455,355.189009,410.407185
8,BTCUSD-1m1h,0.9,24,4,94.605889,138.858014,158.322582,199.099637
9,BTCUSD-1m1h,0.9,24,8,100.840937,145.805369,120.82314,156.201525
