##Data preparation

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import os
import kerastuner as kt
from contextlib import redirect_stdout
import shutil
import locale

locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') #This is needed to convert string values of type 1,999.99 
                                               #to float 1999.99

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

#Load locally stored data:

In [4]:
#Define where are the datasets

#Paths for data with market info
#path = os.getcwd()+"/exper_files/datasets/" #Here they are datasets from yahoo
path = os.getcwd()+"/exper_files/coinmarket/" #Here they are datasets from coinmarket

#Paths for data with social info
social_path = os.getcwd()+"/exper_files/social_datasets/cryptocompare/" #Define where are the datasets from cryptocompare

Create the dataset for each input layer (which means for each cryptocurrency):

In [5]:
def dataset_creation(crypto_list, pth, social_pth):
    datasets = {} #Store all datasets here
    social_datasets = {} #Store all datasets with social media information here
    
    min_dates = {} #Store all minimum dates here
    max_dates = {} #Store all maximum dates here
    
    datasets_list = os.listdir(pth) #Find all dataset
    social_datasets_list = os.listdir(social_pth) #Find all dataset with social media information
    
    splitted_path = path.split('/')
    if splitted_path[-2] == 'coinmarket':
        type_of_file = '.txt'
    
    elif splitted_path[-2] == 'datasets':
        type_of_file = '.csv'
        
    
    social_crypto_list = [elem+'.csv' for elem in crypto_list]
    crypto_list = [elem+type_of_file for elem in crypto_list]
    
    
    for dataset in datasets_list:
        if dataset.endswith(type_of_file) and (dataset) in crypto_list:

            name = dataset.split(".")
            dataset_name = name[0]
            
            if splitted_path[-2] == 'datasets':
                
                datasets[dataset_name] = pd.read_csv(pth + dataset) 

                datasets[dataset_name]['Date'] = pd.to_datetime(datasets[dataset_name]['Date']) #Dataset from yahoo
                                                                                            #is stored ia .csv file.
                    
                datasets[dataset].rename(columns={'Adj Close':dataset+'Adj_Close'}) #Space will cause troubles
                
            
            elif splitted_path[-2] == 'coinmarket':  
                    
                lines = []
                with open(pth + dataset) as f: #Dataset from coinmarket is stored in a .txt file, so we need to read it
                                               #line by line.
                    lines = f.readlines()
                
                initial_columns = lines[0].split("\t") #Split the first line to individual words. The first line 
                                                       #contains the column names. 
                columns = []
                for col in initial_columns:
                    clear_col = col.split('*') #Some column names contains * or **, so we need to exclude them
                    clear_col = clear_col[0].split('\n') #The last element cointains a \n
                    
                    if clear_col[0] == 'Market Cap':
                        clear_col[0] = 'Market_Cap' #Space will cause troubles
                    columns.append(clear_col[0])
                
                df = pd.DataFrame(columns=columns) #Define column names
                
                
                for line in range(1, len(lines)):
                    
                    splitted_line = lines[line].split("\t") #Split each line to individual numbers
                    
                    for elem in range(len(splitted_line)):
                        clear_elem = splitted_line[elem].split('$') #All number cointains a $ (except Date) so we need 
                                                                    #throw it away.
                        
                        if len(clear_elem) == 1:
                            splitted_line[elem] = pd.to_datetime(clear_elem[0])#Tranform each Date to 'datetime' type
                            
                        else:
                            clear_elem[1] = clear_elem[1].split('\n')
                            splitted_line[elem] = locale.atof(clear_elem[1][0])
                    
                    #Append each line to the dataframe
                    row_to_append = pd.Series(splitted_line, index=columns)                    
                    df = df.append(row_to_append, ignore_index=True)
                
                #Invert the sorting of values to ascending based on 'Date'
                df = df.sort_values(by=['Date']).reset_index().drop(['index'], axis=1)
                datasets[dataset_name] = df.copy()
            
            #There are a few missing values, so let fill them with the previous value
            datasets[dataset_name].fillna(method='ffill', inplace=True)

            #Create new columns 'close_off_high' and 'volatility' in order to make predictions more accurate:
            kwards = {'close_off_high': lambda x: 2 * (x['High'] - x['Close']) / (x['High'] - x['Low']) - 1,
              'volatility': lambda x: (x['High'] - x['Low']) / (x['Open'])
              }

            datasets[dataset_name] = datasets[dataset_name].assign(**kwards)
            
            datasets[dataset_name].fillna(-1, inplace=True) #Fill possible NaN close_off_high values with -1
                                                            #Because NaN values caused by (0/0)-1
            

            first_date = pd.to_datetime(datasets[dataset_name]['Date'][0])
            min_dates[dataset_name]=first_date
            
            last_date = pd.to_datetime(datasets[dataset_name]['Date'].iloc[-1])
            max_dates[dataset_name] = last_date
            
    
    
    #Get social media information for each coin from the corresponding csv
    for dataset in social_datasets_list:
        if dataset.endswith(".csv") and (dataset) in social_crypto_list:
            
            name = dataset.split(".")
            dataset_name = name[0]

            social_datasets[dataset_name] = pd.read_csv(social_pth + dataset) 
            
            social_datasets[dataset_name]['time'] = pd.to_datetime(social_datasets[dataset_name]['time'])
            
            #Drop first column which is an unused index
            social_datasets[dataset_name] = social_datasets[dataset_name].iloc[:, 1:]
            
            #We observed that there are many zero rows at social media datasets.
            #We should find the first row which is non-zero
            res = [next(((j, i) for i, j in enumerate(social_datasets[dataset_name][col]) if j != 0), (0, 0)) 
                   for col in social_datasets[dataset_name] if col != 'time']
            
            #Get all columns except 'time'
            columns_except_time = [col for col in social_datasets[dataset_name].columns if col != 'time']
            
            #Store the index of each column with the first non-zero element
            df_res = pd.DataFrame(res, columns=['value', 'position'], index=columns_except_time)
            
            #Get the minimum of these indices
            first_non_zero_row = df_res['position'].min()

            
            first_date = pd.to_datetime(social_datasets[dataset_name]['time'][first_non_zero_row])
            if first_date >  min_dates[dataset_name]:
                 min_dates[dataset_name] = first_date
                    
            last_date = pd.to_datetime(social_datasets[dataset_name]['time'].iloc[-1])
            if last_date < max_dates[dataset_name]:
                max_dates[dataset_name] = last_date
            
    
    
    max_date = max(min_dates.values(), key=lambda v: v)
    min_date = min(max_dates.values(), key=lambda v: v)

    #Drop all the data which are prior to max_date and later to min_date
    for dataset in datasets:
        datasets[dataset] = datasets[dataset][(datasets[dataset]['Date'] >= max_date) & 
                                              (datasets[dataset]['Date'] <= min_date)]
        
    for dataset in social_datasets:
        social_datasets[dataset] = social_datasets[dataset][(social_datasets[dataset]['time'] >= max_date) & 
                                                            (social_datasets[dataset]['time'] <= min_date)]


    #Compute the average and standard deviation of 'Close' value for the last 7-days and 30-days(month): 
    for dataset in datasets:

        temp = datasets[dataset].copy()

        #Drop the first 30 days to be able to compute average and standard deviation of month for the rows of the table
        temp = temp[29:]

        temp['mean_7days_Close'] = datasets[dataset]['Close'].rolling(window=7).mean()

        temp['mean_month_Close'] = datasets[dataset]['Close'].rolling(window=30).mean()

        temp['std_7days_Close'] = datasets[dataset]['Close'].rolling(window=7).std()

        temp['std_month_Close'] = datasets[dataset]['Close'].rolling(window=30).std()

        datasets[dataset] = temp.copy()
        
    
    #Drop the first 30 days of 'social_datasets' to be aligned with 'datasets'
    for dataset in social_datasets:
            social_datasets[dataset] = social_datasets[dataset][29:]
          


    #Rename the columns and concatenate all datasets to one 
    count = 0
    for dataset in datasets:

        datasets[dataset] = datasets[dataset].rename(columns=lambda x: dataset+'_'+x)
        datasets[dataset] = datasets[dataset].rename(columns={dataset+'_Date': 'Date'})                                                   
        
        if count == 0:
            
            date_col = (datasets[dataset]['Date'].reset_index()).drop(['index'], axis=1)
            
            
        datasets[dataset] = ((datasets[dataset].drop(['Date'], axis=1)).reset_index()).drop(['index'], axis=1)    
                    
    
    for dataset in social_datasets:
        
        social_datasets[dataset] = social_datasets[dataset].rename(columns=lambda x: dataset+'_'+x)
        
        social_datasets[dataset] = ((social_datasets[dataset].drop([dataset+'_time'], axis=1)).reset_index()).drop(['index'], 
                                                                                                          axis=1) 

    
    
    #Concatenate market and social datasets for each each cryptocurrency separately 
    
    cryprocurrencies_data = {}
    
    if len(social_crypto_list) > 0:
        
        for dataset in datasets:
            
            cryprocurrencies_data[dataset] = pd.concat([date_col, datasets[dataset], social_datasets[dataset]], axis=1)
        
    else:
        
        for dataset in datasets:
        
            cryprocurrencies_data[dataset] = pd.concat([date_col, datasets[dataset]], axis=1)
              
   
    return cryprocurrencies_data

Split data into training, validation and test set:

In [6]:
def split_data(perc_train_set, perc_val_set, dict_currency_data):
        
        dict_tr_set = {}
        dict_val_set = {}
        dict_tst_set = {}
        
        for coin, currency_data in dict_currency_data.items():
            
            #Compute the date to split the dataset into training and validation_test set based on 'perc_train_set'
            splt_date_train = currency_data.iloc[round(currency_data.shape[0] * perc_train_set)]['Date']

            #Split the dataset into trainning and validation_test set
            tr_set, val_tst_set = currency_data[currency_data['Date'] < splt_date_train], \
                             currency_data[currency_data['Date'] >= splt_date_train]

            #Compute the date to split the val_tst_set into validation and test set based on 'perc_val_set'
            splt_date_val = val_tst_set.iloc[round(val_tst_set.shape[0] * perc_val_set)]['Date']

            #Split the val_tst_set into validation and test set        
            val_set, tst_set = val_tst_set[val_tst_set['Date'] < splt_date_val], \
                                val_tst_set[val_tst_set['Date'] >= splt_date_val]
            
            
            
            #Drop 'Date' column and save train, validation and test sets of each coin at the corresponding dictionary 
            tr_set = ((tr_set.drop(['Date'], axis=1)).reset_index()).drop(['index'], axis=1)
            dict_tr_set[coin] = tr_set
            
            val_set = ((val_set.drop(['Date'], axis=1)).reset_index()).drop(['index'], axis=1)
            dict_val_set[coin] = val_set
            
            tst_set = ((tst_set.drop(['Date'], axis=1)).reset_index()).drop(['index'], axis=1)
            dict_tst_set[coin] = tst_set
        
        return dict_tr_set, dict_val_set, dict_tst_set, splt_date_train, splt_date_val

Normalize training, validation and test inputs and outputs with MixMaxScaler:

In [7]:
def normalize_in_out(prd_range, wind_len, dict_tr_set, dict_val_set, dict_tst_set, target_feats, coin_targ, crypto_list):
    
    dict_LSTM_tr_in = {}
    dict_LSTM_val_in = {}
    dict_LSTM_test_in = {}
    
    
    for crypto in crypto_list:
        
        tr_set = dict_tr_set[crypto]
        val_set = dict_val_set[crypto]
        tst_set = dict_tst_set[crypto]
        
        
        scaler = MinMaxScaler(feature_range=(0, 1)) #Scaler for all columns for each cryptocurrency
        
        train_scaled_data = pd.DataFrame(scaler.fit_transform(tr_set.values), columns=tr_set.columns, 
                                         index=tr_set.index) #Training set fit and transform
        
        
        if crypto == coin_targ:
            close_scaler = MinMaxScaler(feature_range=(0, 1)) #Scaler for 'Close' column only
            close_scaler.fit((tr_set[coin_targ+'_Close'].values).reshape(-1, 1)) #Training set fit only to 'Close' 
                                                                                 #values of target coin
            coin_target_train_scaled_data = train_scaled_data.copy()    
                

        val_scaled_data = pd.DataFrame(scaler.transform(val_set.values), columns=val_set.columns,
                                      index=val_set.index) #Validation set just transform
        
        if crypto == coin_targ:
                coin_target_val_scaled_data = val_scaled_data.copy()

        test_scaled_data = pd.DataFrame(scaler.transform(tst_set.values), columns=tst_set.columns,
                                       index=tst_set.index) #Test set just tranform
        
        
        all_feats = tr_set.columns #Get all features
        feats = [crypto+"_"+feat for feat in target_feats] #Get the features in the appropriate format 
                                                           #(e.g 'Close' --> 'BTC-USD_Close')
        
        
        #Create LSTM inputs for training
        LSTM_tr_in = []
        for i in range(len(train_scaled_data) - wind_len):
            tmp_set = train_scaled_data[i:(i + wind_len)].copy()

            for col in all_feats:
                if col not in feats:
                    tmp_set = tmp_set.drop([col], axis=1) #Drop the feature that will not be used

            LSTM_tr_in.append(tmp_set)

        #Transform from DataFrame to numpy array
        LSTM_tr_in = [np.array(LSTM_tr_i) for LSTM_tr_i in LSTM_tr_in]
        LSTM_tr_in = np.array(LSTM_tr_in)
        
        dict_LSTM_tr_in[crypto] = LSTM_tr_in


        #Create LSTM inputs for validation
        LSTM_val_in = []
        for i in range(len(val_scaled_data) - wind_len):
            tmp_set = val_scaled_data[i:(i + wind_len)].copy()

            for col in all_feats:
                if col not in feats:
                    tmp_set = tmp_set.drop([col], axis=1) #Drop the feature that will not be used

            LSTM_val_in.append(tmp_set)

        #Transform from DataFrame to numpy array
        LSTM_val_in = [np.array(LSTM_val_i) for LSTM_val_i in LSTM_val_in]
        LSTM_val_in = np.array(LSTM_val_in)
        
        dict_LSTM_val_in[crypto] = LSTM_val_in

        
        
        #Normalize test inputs
        LSTM_test_in = []
        for i in range(len(test_scaled_data) - wind_len):
            tmp_set = test_scaled_data[i:(i + wind_len)].copy() 

            for col in all_feats:
                if col not in feats:

                    tmp_set = tmp_set.drop([col], axis=1) #Drop the feature that will not be used

            LSTM_test_in.append(tmp_set)


        #Transform from DataFrame to numpy array
        LSTM_test_in = [np.array(LSTM_test_i) for LSTM_test_i in LSTM_test_in]
        LSTM_test_in = np.array(LSTM_test_in)
        
        dict_LSTM_test_in[crypto] = LSTM_test_in 
    
    
    tr_set = dict_tr_set[coin_targ].copy()
    val_set = dict_val_set[coin_targ].copy()
    
    
    #Normalize training outputs
    LSTM_rangd_train_out = []
    for i in range(wind_len, len(coin_target_train_scaled_data[coin_targ+'_Close']) - prd_range):
        LSTM_rangd_train_out.append(coin_target_train_scaled_data[coin_targ+'_Close'][i:i+prd_range].values)

    LSTM_rangd_train_out = np.array(LSTM_rangd_train_out)
    
    
    #Normalize validation outputs
    LSTM_rangd_val_out = []
    for i in range(wind_len, len(coin_target_val_scaled_data[coin_targ+'_Close']) - prd_range):
        LSTM_rangd_val_out.append(coin_target_val_scaled_data[coin_targ+'_Close'][i:i+prd_range].values)
    
    LSTM_rangd_val_out = np.array(LSTM_rangd_val_out)
    
    return LSTM_rangd_train_out, LSTM_rangd_val_out, dict_LSTM_tr_in, dict_LSTM_val_in, dict_LSTM_test_in, close_scaler

Define and build model:

In [12]:
########Model with CNN layers, followed by LSTM layers, followed by Dense layers #####################
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM, GRU
from keras.layers import Dropout
import tensorflow as tf
from keras.layers import Conv1D
from keras.layers import MaxPool1D
from keras.layers import AveragePooling1D
from keras.layers import BatchNormalization
from keras.layers import Concatenate
from keras.models import Model
from keras import Input


def model_tuning(coin_list, inputs, outputs, output_size, validation_inputs, validation_outputs, epochs, early_stop_patience, 
                 neurons, dropout, prd_range, batch_size, keras_tuner_dir, activ_func="linear", loss="mae", 
                 optimizer="adam"):
    
    def build_model(hp):
        
        separate_coin_layers = {}
        
        neurGRU1 = hp.Choice('neurons_1_GRU', values=neurons)#neurLSTM1 = hp.Choice('neurons_1_LSTM', values=neurons)
        
        for coin in coin_list:
                       
            #Input layer for each coin with each own data
            separate_coin_layers[coin] = {}
            separate_coin_layers[coin]['input'] = Input(shape=((inputs[coin].shape[1], inputs[coin].shape[2])),
                                                             name="input_"+coin)
            #model = Sequential()

            #filters_conv1 = hp.Choice('filters_Conv1', values=[16, 32, 64])
            #kernel_size_conv1 = hp.Choice('kernel_size_Conv1', values=[2, 3, 5])
            #strides_conv1 = hp.Choice('strides_Conv1', values=[1, 2, 3])
            #model.add(Conv1D(filters=filters_conv1, kernel_size=kernel_size_conv1, strides=strides_conv1, 
            #                 activation='relu',
            #                 input_shape=(inputs.shape[1], inputs.shape[2])))


            #moment_batchNormConv1 = hp.Float('momentum_batchNormConv1', min_value=0.09, max_value=0.99, step=0.1)
            #model.add(BatchNormalization(momentum=moment_batchNormConv1))

            #pool_size_maxPool1 = hp.Choice('pool_size_MaxPool1', values=[2, 3, 4])
            #model.add(MaxPool1D(pool_size=2))
            #model.add(AveragePooling1D(pool_size=2))

            
            separate_coin_layers[coin]['GRU1'] = GRU(neurGRU1)(separate_coin_layers[coin]['input'])#separate_coin_layers[coin]['LSTM1'] = LSTM(neurLSTM1)(separate_coin_layers[coin]['input']) #, input_shape=(inputs[coin].shape[1], inputs[coin].shape[2]) #return_sequences=True

            #dropLSTM1 = hp.Choice('dropout_LSTM_1_'+coin, values=dropout)
            #model.add(Dropout(dropLSTM1))

            #neurLSTM2 = hp.Choice('neurons_2_LSTM', values=neurons)
            #model.add(LSTM(neurLSTM2))

            #dropLSTM2 = hp.Choice('dropout_LSTM_2', values=dropout)
            #model.add(Dropout(dropLSTM2))

            #moment_batchNormLSTM2 = hp.Float('momentum_batchNormLSTM2', min_value=0.09, max_value=0.99, step=0.1)
            #model.add(BatchNormalization(momentum=moment_batchNormLSTM2))

                      
        # Merge all available features into a single large vector via concatenation
        conc_layer_output = Concatenate()([separate_coin_layers[coin]['GRU1'] for coin in coin_list])#conc_layer_output = Concatenate()([separate_coin_layers[coin]['LSTM1'] for coin in coin_list])
        
        neurD1 = hp.Choice('neurons_1_Dense', values=[32, 64, 128, 256])
        dense1_output = Dense(units=neurD1, activation='relu')(conc_layer_output)
        
        #dropDense1 = hp.Choice('dropout_Dense_1', values=dropout)
        #drop1_dense_out = Dropout(dropDense1)(dense1_output)
        
        #moment_batchNormDense1 = hp.Float('momentum_batchNormDense1', min_value=0.09, max_value=0.99, step=0.1)
        #model.add(BatchNormalization(momentum=moment_batchNormDense1))
        
        neurD2 = hp.Choice('neurons_2_Dense', values=[32, 64, 128, 256])
        dense2_output = Dense(units=neurD2, activation='relu')(dense1_output)#(drop1_dense_out) 
        
        #dropDense2 = hp.Choice('dropout_Dense_2', values=dropout)
        #drop2_dense_out = Dropout(dropDense2)(dense2_output)
        
        #moment_batchNormDense2 = hp.Float('momentum_batchNormDense2', min_value=0.09, max_value=0.99, step=0.1)
        #model.add(BatchNormalization(momentum=moment_batchNormDense2))
        
        final_output = Dense(units=output_size, activation=activ_func) (dense2_output)#(drop2_dense_out) #(dense2_output)
        
        overall_model = Model([separate_coin_layers[coin]['input'] for coin in coin_list], final_output)
        
        hp_learning_rate = hp.Float(name='learning_rate', min_value=0.0001, max_value=0.05, step=0.0005)
        
        hp_beta_1 = hp.Float('beta_1', min_value=0.85, max_value=0.95, step=0.01)
        
        hp_beta_2 = hp.Float('beta_2', min_value=0.98, max_value=0.999, step=0.001)
        
        hp_epsilon = hp.Float('epsilon', min_value=1e-07, max_value=1e-08, sampling='LOG')
        
        optimizer = tf.keras.optimizers.Adam(
        learning_rate=hp_learning_rate, beta_1=hp_beta_1, beta_2=hp_beta_2, epsilon=hp_epsilon)
        
        overall_model.compile(loss=loss, optimizer=optimizer)
        return overall_model
    
    
    
    #Create_tuner
    tuner = kt.Hyperband(build_model,
                 objective='val_loss',
                 max_epochs=epochs,
                 factor=3,
                 directory=keras_tuner_dir,
                 project_name='keras_tuner')

    stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=early_stop_patience)

    tuner.search([inputs[coin][:-prd_range] for coin in coin_list], outputs, 
                 validation_data=([validation_inputs[coin][:-prd_range] for coin in coin_list], validation_outputs), 
                 epochs=epochs, batch_size=batch_size, callbacks=[stop_early],
                 use_multiprocessing=True,
                 workers=8)

    # Get the optimal hyperparameters
    best_hps=tuner.get_best_hyperparameters(num_trials=3)[0]

    
    return best_hps, tuner

Create MAE plot:

In [9]:
def create_plot(rangd_h, path_to_sav, target_cn, loss='loss'):
    
    fig, ax1 = plt.subplots(1, 1, figsize=(25, 10))

    ax1.plot(rangd_h.epoch, rangd_h.history[loss], color='b', label='Loss')
    ax1.plot(rangd_h.epoch, rangd_h.history['val_'+loss], color='orange', label='Val_loss')
    ax1.set_xticks(range(0, len(rangd_h.epoch)))
    ax1.set_title('Training Error')
    ax1.set_ylabel('Mean Absolute Error (MAE)', fontsize=12)
    ax1.set_xlabel('#Epoch', fontsize=12)
    plt.legend()
    plt.savefig(path_to_sav+"/"+target_cn+'_MAE.png')
    plt.close(fig)
    #plt.show()

Tune model and train it:

In [13]:
#Get data
crypto_list = [['BTC-USD', 'ETH-USD', 'DOGE-USD', 'USDT-USD']] #Define the sets of cryptocurrencies to be tested
                #, 'XRP-USD', 'BNB-USD', 'ADA-USD'
first_txt_flag = 0


for cryptocurrency_list in crypto_list:
    
    cryptocurrency_list_output = 'Using cryptocurrencies: '+str(cryptocurrency_list)
    print(cryptocurrency_list_output)
    data = dataset_creation(cryptocurrency_list, path, social_path)

    #Split data
    percent_train_set = 0.8
    percent_val_set = 0.5

    training_set, validation_set, test_set, split_date_train, split_date_valid = split_data(percent_train_set, 
                                                                                           percent_val_set, data)

    
    features_list = [['Close']]
    """[['Close'], ['Close', 'Volume'], ['Close', 'Open', 'High'],
                     ['Close', 'close_off_high', 'volatility'],
                     ['Close', 'mean_7days_Close', 'mean_month_Close'],
                     ['Close', 'std_7days_Close', 'std_month_Close']]""" #Define the sets of features to be tested 
    
    for featurs in features_list: 
        
        new_txt_flag = 0 
        
        featurs_output = '\tUsing the features: '+str(featurs)
        print('\tUsing the features: '+str(featurs))
        
        
        #Create inputs and outputs for the model training, validation and testing
        pred_range = 5
        window_len = 10
        features = featurs
        coin_target = 'BTC-USD'
        
        

        LSTM_ranged_training_outputs, LSTM_ranged_validation_outputs, LSTM_training_inputs, LSTM_validation_inputs, LSTM_test_inputs, clse_scaler = normalize_in_out(
                                                                                                      pred_range, window_len, 
                                                                                                      training_set, 
                                                                                                      validation_set, 
                                                                                                      test_set, 
                                                                                                      features, coin_target,
                                                                                                      cryptocurrency_list)

        
        batch_size_list = [64] #[1, 32, 64]
        neuron_list = [20, 40, 60, 100, 128, 256, 512]
        dropout_list = [0.0, 0.1, 0.2, 0.25, 0.3, 0.4]
        early_stop_patience = 10
        epochs = 100
        early_stop_patience = 10
        shuffle = True
        verbose = 0
        
        
        
        
        for bat_s in batch_size_list:

            #Build and train model
            batch_size = bat_s
            
            
            exper_params_output = '\t\tBatch_size: '+str(batch_size)
            print(exper_params_output)
           
            
            #Path to write log files
            cur_path = os.getcwd()+'/'
            log_dir = cur_path + "logs/fit/" + str(cryptocurrency_list) + '/' + str(featurs) + '/' + 'Batch_size='+str(batch_size)
            if not os.path.exists(log_dir):
                os.makedirs(log_dir)
            
            #Path to save its checkpoints the keras_tuner because it raises an error when it is in the current working
            #directory or in a sub-folder
            keras_tuner_dir = 'C:\keras_tuner'
            if not os.path.exists(keras_tuner_dir):
                os.makedirs(keras_tuner_dir)
            
            best_hps, tuner = model_tuning(cryptocurrency_list, inputs=LSTM_training_inputs, outputs=LSTM_ranged_training_outputs,
                                output_size=pred_range, 
                                validation_inputs = LSTM_validation_inputs, 
                                validation_outputs = LSTM_ranged_validation_outputs,
                                epochs=epochs, early_stop_patience=early_stop_patience, neurons=neuron_list, 
                                dropout=dropout_list, prd_range=pred_range, batch_size=bat_s, 
                                           keras_tuner_dir=keras_tuner_dir)
        
            exper_outputs = "\t\tThe hyperparameter search is complete. The optimal parameter were found to be: \n" + \
                            str(tuner.oracle.get_best_trials(num_trials=1)[0].hyperparameters.values)
        
            
            
            # Build the model with the optimal hyperparameters and train it
            best_model = tuner.hypermodel.build(best_hps)
            mod_history = best_model.fit([LSTM_training_inputs[coin][:-pred_range] for coin in cryptocurrency_list], 
                                         LSTM_ranged_training_outputs, 
                                         validation_data=([LSTM_validation_inputs[coin][:-pred_range] 
                                                           for coin in cryptocurrency_list], 
                                                          LSTM_ranged_validation_outputs), 
                                         epochs=epochs)
            
            #Get the epoch with the best validation loss
            val_loss_per_epoch = mod_history.history['val_loss']
            best_epoch = val_loss_per_epoch.index(min(val_loss_per_epoch)) + 1
            exper_outputs += '\n\n\t\tBest epoch: '+ str(best_epoch) + ' with validation loss: ' + str((min(val_loss_per_epoch)))
            
            #Print results to a .txt file
            if first_txt_flag == 0:
                
                if new_txt_flag == 0:
                    
                    #Create a .txt file and write the results
                    txt_log_dir = "logs/fit/" + str(cryptocurrency_list) + '/' + str(featurs) + '/'
                    f = open(txt_log_dir+"/"+coin_target+"_results.txt", "w")
                    f.write(cryptocurrency_list_output)
                    f.write("\n"+featurs_output)
                    
                    first_txt_flag += 1
                    new_txt_flag += 1
            else:
                
                if new_txt_flag == 0:
                    
                    f.close() #Close previous txt file
                    
                    #Create a .txt file and write the results
                    txt_log_dir = "logs/fit/" + str(cryptocurrency_list) + '/' + str(featurs) + '/'
                    f = open(txt_log_dir+"/"+coin_target+"_results.txt", "w")
                    f.write(cryptocurrency_list_output)
                    f.write("\n"+featurs_output)
                    
                    new_txt_flag += 1
                    
                
            
            f.write("\n"+exper_params_output)
            f.write("\n"+exper_outputs)
            
            f.write("\n\nFull keras tuner results: \n")
            with redirect_stdout(f): 
                tuner.results_summary()
            
            
            create_plot(mod_history, log_dir, coin_target)

f.close()


#Write best model summary to a txt file
with open(cur_path + 'logs/fit/modelsummary.txt', 'w') as f_sum:
    with redirect_stdout(f_sum):
        best_model.summary()
        

#Remove folder with is created by keras tuner
shutil.rmtree(keras_tuner_dir)

Trial 254 Complete [00h 00m 24s]
val_loss: 0.027513325214385986

Best val_loss So Far: 0.02575628273189068
Total elapsed time: 00h 56m 24s
INFO:tensorflow:Oracle triggered exit
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100


Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
