# CNN Best Model

Lets test the best model from the 1 month grid search on the full data, with and without MP for comparison

## Load data, functions

In [1]:
import pandas as pd
import numpy as np
from numpy import array
from numpy import mean
from numpy import std
import tensorflow as tf
from tensorflow import keras
from keras import optimizers
from keras.models import Sequential
from keras import callbacks
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed, Conv1D, MaxPooling1D, AveragePooling1D,  GlobalMaxPooling1D, Flatten, Bidirectional, Input, Flatten, Activation, Reshape, RepeatVector, Concatenate
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import pywt
import matrixprofile as mp

In [2]:
import tensorflow.python.platform.build_info as build
print(build.build_info)

OrderedDict([('cpu_compiler', 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.27.29110/bin/HostX64/x64/cl.exe'), ('cuda_compute_capabilities', ['sm_35', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'compute_80']), ('cuda_version', '64_112'), ('cudart_dll_name', 'cudart64_112.dll'), ('cudnn_dll_name', 'cudnn64_8.dll'), ('cudnn_version', '64_8'), ('is_cuda_build', True), ('is_rocm_build', False), ('is_tensorrt_build', False), ('msvcp_dll_names', 'msvcp140.dll,msvcp140_1.dll'), ('nvcuda_dll_name', 'nvcuda.dll')])


In [3]:
def split_sequence(sequence, n_steps_in, n_steps_out, step_interval, n_step_lookahead):
    X, y = list(), list()
    example_count = int((len(sequence)/step_interval))
    for i in range(example_count):
        # find the end of this pattern
        end_ix = (i*step_interval) + n_steps_in
        out_start_ix = end_ix + n_step_lookahead -1
        out_end_ix = end_ix + n_steps_out + n_step_lookahead -1
        # check if we are beyond the sequence
        if out_end_ix > len(sequence):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[(i*step_interval):end_ix], sequence[out_start_ix:out_end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [4]:
#To demonstrate above function
sequence = range(0,13)
n_steps_in = 1
n_steps_in = 5
n_steps_out =1
step_interval =1
n_step_lookahead=5
split_sequence(sequence, n_steps_in, n_steps_out, step_interval, n_step_lookahead)

(array([[0, 1, 2, 3, 4],
        [1, 2, 3, 4, 5],
        [2, 3, 4, 5, 6],
        [3, 4, 5, 6, 7]]),
 array([[ 9],
        [10],
        [11],
        [12]]))

In [5]:
percentile_data = pd.read_csv (r'C:\Users/conal/Desktop/MCM/Practicum - Copy/data/block gas price percentile data.csv', header=0)
percentile_data['datetime'] = pd.to_datetime(percentile_data['block_timestamp'], format = '%Y-%m-%d %H:%M:%S UTC')

percentile_data = percentile_data.sort_values(by='datetime',ascending=False)
percentile_data = percentile_data.set_index('datetime')
percentile_data = percentile_data.resample('5T').mean()
percentile_data = percentile_data/1000000000

In [6]:
usage_data = pd.read_csv (r'C:\Users\conal\Desktop\MCM\Practicum - Copy\data\ETH,gas,usage merged 11-26 to 05-26.csv', header=0)
usage_data['datetime'] = pd.to_datetime(usage_data['datetime'], format = '%Y-%m-%d %H:%M:%S')
usage_data = usage_data.set_index('datetime')

usage_data = usage_data.squeeze()
usage_data = usage_data.astype('float')
usage_data = usage_data.resample('5T').mean()

In [7]:
usage_data2 = pd.read_csv (r'C:\Users\conal\Desktop\MCM\Practicum - Copy\data\Contract counts 2021-11-26 to 2022-05-26.csv', header=0, index_col=0)
usage_data2['datetime'] = pd.to_datetime(usage_data2['block_timestamp'], format = '%Y-%m-%d %H:%M:%S') 
usage_data2 = usage_data2.set_index('datetime')
usage_data2 = usage_data2.drop(['block_timestamp'], axis=1)
usage_data2 = usage_data2.squeeze()
usage_data2 = usage_data2.astype('float')
usage_data2 = usage_data2.resample('5T').sum()

In [8]:
data = usage_data.merge(percentile_data, left_index=True, right_index=True)
data = data.merge(usage_data2, left_index=True, right_index=True)

Load data, datetime to index, downsample with left edge label, convert wei to gwei

In [9]:
def generate_training_val_examples(data):
 
    #Filter inputs, standardize
    data =data[inputs]
    scaler = StandardScaler()
    data[inputs] = scaler.fit_transform(data[inputs])
    

    #Creat input:output examples
    data = data[start_date:end_date].to_numpy()
    X, y = split_sequence(data, n_steps_in, n_steps_out, step_interval, n_step_lookahead)
    
    X_train, X_val = np.split(X, [int(0.7 * len(X))])
    #we are only lookign to forecast the min gas price
    y_train, y_val = np.split(y, [int(0.7 * len(X))])

    
    #Reshape to 3D for LSTM
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], len(inputs)))
    y_train =y_train.reshape((y_train.shape[0], y_train.shape[1], len(inputs)))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], len(inputs)))
    y_val = y_val.reshape((y_val.shape[0], y_val.shape[1], len(inputs)))

    
    return X_train, y_train, X_val, y_val, scaler



In [10]:
def LSTM_model():
    
    checkpoint_filepath='./cnn/checkpoint'
    model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)
    
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=9, activation='tanh', input_shape=(n_steps_in, len(inputs))))
    model.add(Conv1D(filters=64, kernel_size=11, activation='tanh'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(RepeatVector(n_steps_out))
    model.add(LSTM(200, activation='tanh', return_sequences=True))
    model.add(TimeDistributed(Dense(100, activation='tanh')))
    model.add(TimeDistributed(Dense(len(inputs))))
    model.compile(loss='mse', optimizer='adam')
    return model, model_checkpoint_callback

In [11]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error


In [12]:
def descale_y_retrun_metrics(yhat, y_val2):
    #reverts standard scaling, returns dictionary of metrics for each output, for all lookaheads
    dict_indexes=[]
    dict_dfs=[]
    for j in range(0, n_steps_out):
        RMSE_list, MAE_list, MAPE_list, R2_list, MSE_list = [],[],[],[],[]
        for i in range(0, len(inputs)):  
            pred_descaled= (scaler.inverse_transform(yhat[:,j:j+1,:].reshape(yhat.shape[0], yhat.shape[2])))[:, i:i+1]
            groud_truth_descaled= ((scaler.inverse_transform(y_val2[:,j:j+1,:].reshape(y_val2.shape[0], y_val2.shape[2]))))[:, i:i+1]
            RMSE = mean_squared_error(groud_truth_descaled, pred_descaled, squared=False)
            MSE = mean_squared_error(groud_truth_descaled, pred_descaled, squared=True)
            MAE = mean_absolute_error(groud_truth_descaled, pred_descaled)
            MAPE = mean_absolute_percentage_error(groud_truth_descaled, pred_descaled)
            R2 = r2_score(groud_truth_descaled, pred_descaled)
            RMSE_list.append(RMSE)
            MAE_list.append(MAE)
            MAPE_list.append(MAPE)
            R2_list.append(R2)
            MSE_list.append(MSE)
        metrics_df = pd.DataFrame({'RMSE':RMSE_list, 'MSE':MSE_list, 'MAE':MAE_list, 'MAPE':MAPE_list, 'R2':R2_list}, index=inputs)
        dict_dfs.append(metrics_df)
        dict_indexes.append('Lookahead' +str(j))
    metrics_dict = dict(zip(dict_indexes, dict_dfs))
    return metrics_dict

In [13]:
def generate_training_val_examples_univariate_output(data):
 
    #Filter inputs, standardize
    data =data[inputs]
    scaler = StandardScaler()
    data[inputs] = scaler.fit_transform(data[inputs])
    

    #Creat input:output examples
    data = data[start_date:end_date].to_numpy()
    X, y = split_sequence(data, n_steps_in, n_steps_out, step_interval, n_step_lookahead)
    
    X_train, X_val = np.split(X, [int(0.7 * len(X))])
    
    y_train, y_val = np.split(y, [int(0.7 * len(X))])

    
    #Reshape to 3D for LSTM, filter output to only the first input
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], len(inputs)))
    y_train =y_train.reshape((y_train.shape[0], y_train.shape[1], len(inputs)))[:,:,:1]
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], len(inputs)))
    y_val = y_val.reshape((y_val.shape[0], y_val.shape[1], len(inputs)))[:,:,:1]

    
    return X_train, y_train, X_val, y_val, scaler

In [14]:
def descale_y_retrun_metrics_univariate_y(yhat, y_val2):
    #reverts standard scaling, returns dictionary of metrics for single output, for all lookaheads
    dict_indexes=[]
    dict_dfs=[]
    RMSE_list, MAE_list, MAPE_list, R2_list, MSE_list = [],[],[],[],[]
    for j in range(0, n_steps_out):
        pred_descaled= (scaler.inverse_transform(yhat[:, j:j+1, :len(inputs)].reshape(yhat.shape[0], len(inputs))))[:,:1]
        groud_truth_descaled= (scaler.inverse_transform(array([y_val2[ :, j:j+1,0].reshape(y_val2.shape[0])]*len(inputs)).transpose()))[:,:1]
        RMSE = mean_squared_error(groud_truth_descaled, pred_descaled, squared=False)
        MSE = mean_squared_error(groud_truth_descaled, pred_descaled, squared=True)
        MAE = mean_absolute_error(groud_truth_descaled, pred_descaled)
        MAPE = mean_absolute_percentage_error(groud_truth_descaled, pred_descaled)
        R2 = r2_score(groud_truth_descaled, pred_descaled)
        RMSE_list.append(RMSE)
        MAE_list.append(MAE)
        MAPE_list.append(MAPE)
        R2_list.append(R2)
        MSE_list.append(MSE)
    metrics_df = pd.DataFrame({'RMSE':RMSE_list, 'MSE':MSE_list, 'MAE':MAE_list, 'MAPE':MAPE_list, 'R2':R2_list}, index=range(1, (n_steps_out+1)))

 
    return metrics_df

In [None]:
def add_mp(data, window):
    #Given 3d array, add matrix profile of (x,y,0) as new dimension; new array has dimensiosn (x,y,z+1) 
    mp_list=[]
    for i in data[:,:,0]:
        profile = mp.compute(i, window, n_jobs=4)['mp']
        #we are padding the end of the sequence with the mean
        #matrix profile is always 1 full window size smalelr than input data
        mp_list.append(np.append(([mean(profile)]*(data.shape[1]-len(profile))),profile))
        
    #concatenate matrix profile data with original    
    mp_array = np.array(mp_list).reshape(data.shape[0], data.shape[1])
    std_array = ((mp_array-mean(mp_array))/np.std(mp_array)).reshape(data.shape[0], data.shape[1],1)
    data = np.concatenate((data, std_array), axis=2)[:, window:, :]
    
    return data

## Extend Lookahead to 10 timesteps

In [25]:
def LSTM_model(filters, kernel_size):
    checkpoint_filepath='./cnn/'
    model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)
    
    input_layer = Input(shape=(n_steps_in, X_train.shape[2])) 
    head_list = []
    for i in range(0, X_train.shape[2]):
        conv_layer_head = Conv1D(filters=filters, kernel_size=kernel_size, activation='tanh')(input_layer)
        conv_layer_head_2 = Conv1D(filters=filters, kernel_size=kernel_size, activation='tanh')(conv_layer_head)
        conv_layer_flatten = Flatten()(conv_layer_head_2)
        head_list.append(conv_layer_flatten)

    concat_cnn = Concatenate(axis=1)(head_list)
    reshape = Reshape((head_list[0].shape[1], X_train.shape[2]))(concat_cnn)
    lstm = LSTM(100, activation='tanh')(reshape)
    repeat = RepeatVector(n_steps_out)(lstm)
    lstm_2 = (Bidirectional(LSTM(100, activation='tanh', return_sequences=True)))(repeat)
    dropout = Dropout(0.2)(lstm_2)
    dense = Dense(X_train.shape[2], activation='linear')(dropout)
    model = Model(inputs=input_layer, outputs=dense)
    model.compile(loss='mse', optimizer='adam')
    return model, model_checkpoint_callback, checkpoint_filepath

In [26]:
#Create Training Examples for all lookaheads
resample_rate = '5T'
end_dates = ['2021-12-26 23:55:00', '2022-01-26 23:55:00', '2022-02-26 23:55:00', '2022-03-26 23:55:00', '2022-04-26 23:55:00'  ]
Start_dates = ['2021-11-26 00:00:00', '2021-12-26 00:00:00', '2022-01-26 23:55:00', '2022-02-26 23:55:00', '2022-03-26 23:55:00']

#end_dates = ['2022-01-26 23:55:00', '2022-03-26 23:55:00']
#Start_dates = ['2021-11-26 00:00:00', '2022-01-26 00:00:00']
inputs = ['min_gas_price', 'block_gas_5th_percentile', 'block_gas_95th_percentile', 'gas_used', 'base_fee_per_gas', 'transaction_count', 'size', 'Open', 'contracts']
#No of timesteps behind to forecast on, no of timesteps to forecast ahead
n_steps_in = 4032+288
n_steps_out = 10
#How many timesteps between start of training examples
step_interval = 1
n_step_lookahead = 1
mp_window = 288


y_hat_list=[]
train_loss_list=[]
val_loss_list=[]
training_metrics_dicts=[]
valdiation_metrics_dicts=[]
parameter_index=[]
month=0
for filters in [9]:
    for kernel_size in [7]:
            for month in [0, 1, 2, 3, 4]:
                n_steps_in = 4032+288
                n_step_lookahead = 1
                start_date=Start_dates[month]
                end_date=end_dates[month]
                X_train, y_train, X_val, y_val, scaler = generate_training_val_examples_univariate_output(data)
                X_train = add_mp(X_train, mp_window)
                X_val = add_mp(X_val, mp_window)
                n_steps_in = 4032




                model, model_checkpoint_callback, checkpoint_filepath = LSTM_model(filters, kernel_size)
                train_history = model.fit(X_train, y_train,validation_data=(X_val, y_val), epochs=15, verbose=1, callbacks=[model_checkpoint_callback])
                train_loss_list.append(train_history.history['loss'])
                val_loss_list.append(train_history.history['val_loss'])

                model.load_weights(checkpoint_filepath)
                yhat_train=model.predict(X_train, verbose=1)
                yhat_val = model.predict(X_val, verbose=1)
                model.save_weights('mp_cnn_best_10lookahead/' +str(month)+'/')

                training_metrics_dicts.append(descale_y_retrun_metrics_univariate_y(yhat_train, y_train))
                valdiation_metrics_dicts.append(descale_y_retrun_metrics_univariate_y(yhat_val, y_val))
                np.save("mp_cnn_best_10lookahead/training_metrics.npy", training_metrics_dicts)
                np.save("mp_cnn_best_10lookahead/val_metrics.npy", valdiation_metrics_dicts)

                parameter_index.append([filters, kernel_size])

                keras.backend.clear_session()

                pd.DataFrame(train_loss_list).to_csv('mp_cnn_best_10lookahead/train_loss')
                pd.DataFrame(val_loss_list).to_csv('mp_cnn_best_10lookahead/val_loss')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [27]:
valdiation_metrics_dicts

[         RMSE         MSE        MAE      MAPE        R2
 1   14.399236  207.338004   9.984202  0.153011  0.625049
 2   15.459951  239.010081  10.330912  0.152429  0.567725
 3   16.075388  258.418090  10.736479  0.155109  0.532628
 4   16.707183  279.129976  11.069232  0.158046  0.495104
 5   17.009718  289.330490  11.249533  0.159771  0.476587
 6   17.413791  303.240107  11.594301  0.165363  0.451313
 7   17.636058  311.030551  11.938548  0.172115  0.437213
 8   17.741563  314.763068  12.194381  0.177828  0.430208
 9   17.921318  321.173654  12.550755  0.186349  0.419034
 10  18.210865  331.635598  13.156697  0.199449  0.400502,
          RMSE          MSE        MAE      MAPE        R2
 1   42.071518  1770.012613  26.903243  0.193842  0.544768
 2   44.878347  2014.066021  28.782214  0.206614  0.481117
 3   46.200776  2134.511733  29.726407  0.212939  0.449250
 4   46.994716  2208.503334  30.038223  0.213533  0.429571
 5   47.522918  2258.427718  30.224780  0.213359  0.416672
 6   48

## No MP

In [16]:
def LSTM_model(filters, kernel_size):
    checkpoint_filepath='./cnn/'
    model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)
    
    input_layer = Input(shape=(n_steps_in, X_train.shape[2])) 
    head_list = []
    for i in range(0, X_train.shape[2]):
        conv_layer_head = Conv1D(filters=filters, kernel_size=kernel_size, activation='tanh')(input_layer)
        conv_layer_head_2 = Conv1D(filters=filters, kernel_size=kernel_size, activation='tanh')(conv_layer_head)
        conv_layer_flatten = Flatten()(conv_layer_head_2)
        head_list.append(conv_layer_flatten)

    concat_cnn = Concatenate(axis=1)(head_list)
    reshape = Reshape((head_list[0].shape[1], X_train.shape[2]))(concat_cnn)
    lstm = LSTM(100, activation='tanh')(reshape)
    repeat = RepeatVector(n_steps_out)(lstm)
    lstm_2 = (Bidirectional(LSTM(100, activation='tanh', return_sequences=True)))(repeat)
    dropout = Dropout(0.2)(lstm_2)
    dense = Dense(X_train.shape[2], activation='linear')(dropout)
    model = Model(inputs=input_layer, outputs=dense)
    model.compile(loss='mse', optimizer='adam')
    return model, model_checkpoint_callback, checkpoint_filepath

In [20]:
#Create Training Examples for all lookaheads
resample_rate = '5T'
end_dates = ['2021-12-26 23:55:00', '2022-01-26 23:55:00', '2022-02-26 23:55:00', '2022-03-26 23:55:00', '2022-04-26 23:55:00'  ]
Start_dates = ['2021-11-26 00:00:00', '2021-12-26 00:00:00', '2022-01-26 23:55:00', '2022-02-26 23:55:00', '2022-03-26 23:55:00']

#end_dates = ['2022-01-26 23:55:00', '2022-03-26 23:55:00']
#Start_dates = ['2021-11-26 00:00:00', '2022-01-26 00:00:00']
inputs = ['min_gas_price', 'block_gas_5th_percentile', 'block_gas_95th_percentile', 'gas_used', 'base_fee_per_gas', 'transaction_count', 'size', 'Open', 'contracts']
#No of timesteps behind to forecast on, no of timesteps to forecast ahead
n_steps_in = 4032
n_steps_out = 10
#How many timesteps between start of training examples
step_interval = 1
n_step_lookahead = 1



y_hat_list=[]
train_loss_list=[]
val_loss_list=[]
training_metrics_dicts=[]
valdiation_metrics_dicts=[]
parameter_index=[]
month=0
for filters in [9]:
    for kernel_size in [7]:
            for month in [0, 1, 2, 3, 4]:
                n_steps_in = 4032
                n_step_lookahead = 1
                start_date=Start_dates[month]
                end_date=end_dates[month]
                X_train, y_train, X_val, y_val, scaler = generate_training_val_examples_univariate_output(data)
                




                model, model_checkpoint_callback, checkpoint_filepath = LSTM_model(filters, kernel_size)
                train_history = model.fit(X_train, y_train,validation_data=(X_val, y_val), epochs=15, verbose=1, callbacks=[model_checkpoint_callback])
                train_loss_list.append(train_history.history['loss'])
                val_loss_list.append(train_history.history['val_loss'])

                model.load_weights(checkpoint_filepath)
                yhat_train=model.predict(X_train, verbose=1)
                yhat_val = model.predict(X_val, verbose=1)
                model.save_weights('cnn_best_10lookahead/' +str(month)+'/')

                training_metrics_dicts.append(descale_y_retrun_metrics_univariate_y(yhat_train, y_train))
                valdiation_metrics_dicts.append(descale_y_retrun_metrics_univariate_y(yhat_val, y_val))
                np.save("cnn_best_10lookahead/training_metrics.npy", training_metrics_dicts)
                np.save("cnn_best_10lookahead/val_metrics.npy", valdiation_metrics_dicts)

                parameter_index.append([filters, kernel_size])

                keras.backend.clear_session()

                pd.DataFrame(train_loss_list).to_csv('cnn_best_10lookahead/train_loss')
                pd.DataFrame(val_loss_list).to_csv('cnn_best_10lookahead/val_loss')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [21]:
valdiation_metrics_dicts

[         RMSE         MSE        MAE      MAPE        R2
 1   17.156416  294.342598  12.293846  0.186768  0.485840
 2   17.332388  300.411689  12.300291  0.183775  0.475217
 3   17.534323  307.452499  12.347169  0.182476  0.462948
 4   17.793027  316.591811  12.623060  0.185456  0.446830
 5   18.022723  324.818541  12.846932  0.188649  0.432033
 6   18.264434  333.589561  13.116082  0.193610  0.416443
 7   18.438039  339.961294  13.434818  0.200742  0.405371
 8   18.790016  353.064718  13.947682  0.211944  0.382604
 9   19.298107  372.416946  14.674824  0.227697  0.349026
 10  20.009536  400.381529  15.653866  0.248524  0.300454,
          RMSE          MSE        MAE      MAPE        R2
 1   39.871278  1589.718802  21.935246  0.137405  0.583111
 2   42.194250  1780.354755  24.276039  0.155406  0.533239
 3   43.686297  1908.492557  25.809123  0.167490  0.499568
 4   45.051703  2029.655945  26.930740  0.175913  0.467908
 5   46.081476  2123.502393  28.005436  0.183963  0.443462
 6   46

In [19]:
np.save("cnn_best_10lookahead/training_metrics.npy", training_metrics_dicts)
np.save("cnn_best_10lookahead/val_metrics.npy", valdiation_metrics_dicts)

parameter_index.append([filters, kernel_size])

keras.backend.clear_session()

pd.DataFrame(val_loss_list).to_csv('cnn_best_10lookahead/val_loss')

In [None]:
valdiation_metrics_dicts

In [26]:
def LSTM_model(filters, kernel_size):
    checkpoint_filepath='./cnn/'
    model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_loss',
    mode='min',
    save_best_only=True)
    
    input_layer = Input(shape=(n_steps_in, X_train.shape[2])) 
    head_list = []
    for i in range(0, X_train.shape[2]):
        conv_layer_head = Conv1D(filters=filters, kernel_size=kernel_size, activation='tanh')(input_layer)
        conv_layer_head_2 = Conv1D(filters=filters, kernel_size=kernel_size, activation='tanh')(conv_layer_head)
        conv_layer_flatten = Flatten()(conv_layer_head_2)
        head_list.append(conv_layer_flatten)

    concat_cnn = Concatenate(axis=1)(head_list)
    reshape = Reshape((head_list[0].shape[1], X_train.shape[2]))(concat_cnn)
    lstm = LSTM(100, activation='tanh')(reshape)
    repeat = RepeatVector(n_steps_out)(lstm)
    lstm_2 = (Bidirectional(LSTM(100, activation='tanh', return_sequences=True)))(repeat)
    dropout = Dropout(0.2)(lstm_2)
    dense = Dense(X_train.shape[2], activation='linear')(dropout)
    model = Model(inputs=input_layer, outputs=dense)
    model.compile(loss='mse', optimizer='adam')
    return model, model_checkpoint_callback, checkpoint_filepath

In [None]:
def add_mp_reversed(data, window):
    #Given 3d array, add matrix profile of (x,y,0) as new dimension; new array has dimensiosn (x,y,z+1) 
    mp_list=[]
    for i in data[:,:,0]:
        profile = mp.compute(np.flip(i, axis=0), window, n_jobs=4)['mp']
        #we are padding the end of the sequence with the mean
        #matrix profile is always 1 full window size smalelr than input data
        mp_list.append(np.append(profile,([mean(profile)]*(data.shape[1]-len(profile)))))
        
    #concatenate matrix profile data with original    
    mp_array = np.array(mp_list).reshape(data.shape[0], data.shape[1])
    std_array = ((mp_array-mean(mp_array))/np.std(mp_array)).reshape(data.shape[0], data.shape[1],1)
    data = np.concatenate((data, std_array), axis=2)
    
    return data

In [55]:
keras.backend.clear_session()

In [54]:
#Create Training Examples for all lookaheads
resample_rate = '5T'
end_dates = ['2022-02-26 23:55:00'  ]
Start_dates = ['2021-11-26 00:00:00']

#end_dates = ['2022-01-26 23:55:00', '2022-03-26 23:55:00']
#Start_dates = ['2021-11-26 00:00:00', '2022-01-26 00:00:00']
inputs = ['min_gas_price', 'block_gas_5th_percentile', 'block_gas_95th_percentile', 'gas_used', 'base_fee_per_gas', 'transaction_count', 'size', 'Open', 'contracts']
#No of timesteps behind to forecast on, no of timesteps to forecast ahead
n_steps_in = 4032
n_steps_out = 5
#How many timesteps between start of training examples
step_interval = 1
n_step_lookahead = 1
mp_window = 288


y_hat_list=[]
train_loss_list=[]
val_loss_list=[]
training_metrics_dicts=[]
valdiation_metrics_dicts=[]
parameter_index=[]
month=0
for filters in [9]:
    for kernel_size in [7]:
            for month in [0]:
                n_step_lookahead = 1
                start_date=Start_dates[month]
                end_date=end_dates[month]
                X_train, y_train, X_val, y_val, scaler = generate_training_val_examples_univariate_output(data)
                X_train = add_mp_reversed(X_train, mp_window)
                X_val = add_mp_reversed(X_val, mp_window)




                model, model_checkpoint_callback, checkpoint_filepath = LSTM_model(filters, kernel_size)
                train_history = model.fit(X_train, y_train,validation_data=(X_val, y_val), epochs=15, verbose=1, callbacks=[model_checkpoint_callback])
                train_loss_list.append(train_history.history['loss'])
                val_loss_list.append(train_history.history['val_loss'])

                model.load_weights(checkpoint_filepath)
                yhat_train=model.predict(X_train, verbose=1)
                yhat_val = model.predict(X_val, verbose=1)
                model.save('mp_cnn_best2/' +str(month)+'/')

                training_metrics_dicts.append(descale_y_retrun_metrics_univariate_y(yhat_train, y_train))
                valdiation_metrics_dicts.append(descale_y_retrun_metrics_univariate_y(yhat_val, y_val))
                np.save("mp_cnn_best2/training_metrics.npy", training_metrics_dicts)
                np.save("mp_cnn_best2/val_metrics.npy", valdiation_metrics_dicts)

                parameter_index.append([filters, kernel_size])

                keras.backend.clear_session()

                pd.DataFrame(train_loss_list).to_csv('mp_cnn_best2/train_loss')
                pd.DataFrame(val_loss_list).to_csv('mp_cnn_best2/val_loss')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 1/15



INFO:tensorflow:Assets written to: ./cnn\assets


INFO:tensorflow:Assets written to: ./cnn\assets


Epoch 2/15
 29/498 [>.............................] - ETA: 2:18 - loss: 0.4889

KeyboardInterrupt: 

In [50]:
def LSTM_model(filters, kernel_size):
    checkpoint_filepath='./cnn/'
    model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_loss',
    mode='min',
    save_best_only=True)
    
    input_layer = Input(shape=(n_steps_in, X_train.shape[2])) 
    head_list = []
    for i in range(0, X_train.shape[2]):
        conv_layer_head = Conv1D(filters=filters, kernel_size=kernel_size, activation='tanh')(input_layer)
        conv_layer_head_2 = Conv1D(filters=filters, kernel_size=kernel_size, activation='tanh')(conv_layer_head)
       
        pooled = AveragePooling1D(pool_size=4, strides=4)(conv_layer_head)
        conv_layer_flatten = Flatten()(pooled)
        head_list.append(conv_layer_flatten)

    concat_cnn = Concatenate(axis=1)(head_list)
    reshape = Reshape((head_list[0].shape[1], X_train.shape[2]))(concat_cnn)
    lstm = LSTM(100, activation='tanh')(reshape)
    repeat = RepeatVector(n_steps_out)(lstm)
    lstm_2 = (Bidirectional(LSTM(100, activation='tanh', return_sequences=True)))(repeat)
    dropout = Dropout(0.3)(lstm_2)
    dense = Dense(X_train.shape[2], activation='linear')(dropout)
    model = Model(inputs=input_layer, outputs=dense)
    model.compile(loss='mse', optimizer='adam')
    return model, model_checkpoint_callback, checkpoint_filepath

In [48]:
model, model_checkpoint_callback, checkpoint_filepath = LSTM_model(filters, kernel_size)
train_history = model.fit(X_train, y_train,validation_data=(X_val, y_val), epochs=15, verbose=1, callbacks=[model_checkpoint_callback])

Epoch 1/15












INFO:tensorflow:Assets written to: ./cnn\assets


INFO:tensorflow:Assets written to: ./cnn\assets


Epoch 2/15
Epoch 3/15
Epoch 4/15

KeyboardInterrupt: 