In [2]:
import tensorflow as tf
from tensorflow import keras
from keras import models, layers
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization, Conv3D
import yaml


In [20]:
with open('configs/config_file.yaml') as file:
    params = yaml.safe_load(file)

print(params)

window_size = params['training']['window_size']
h_step = params['forecast']['h_step']
patience = params['training']['patience']
epsilon = params['training']['epsilon']
batch_size = params['training']['batch_size']
epochs = params['training']['epochs']

run = params['model']['run']
learning_rate = params['model']['lr']
covariate_columns = params['model']['covariates']

{'training': {'window_size': 21, 'batch_size': 32, 'patience': 15, 'epsilon': 1e-06, 'random_state': 42, 'epochs': 100, 'covariates': 'None'}, 'model': {'run': 'long_ttm', 'filters': 2, 'kernel_size': [2, 2], 'strides': 1, 'kernel_initializer': 'glorot_uniform', 'recurrent_initializer': 'orthogonal', 'optimizer': 'adam', 'lr': 0.001, 'covariates': ['VIX', 'VVIX', 'SKEW', 'RVOL', 'TMS', 'CRS', 'EPU', 'ADS']}, 'forecast': {'h_step': 1}}


In [30]:
covar_df = pd.read_excel('data/final/covariates_train.xlsx')
covar_df = covar_df.rename(columns={'Date':'date'})
# covars_val = pd.read_excel('data/final/covariates_validation.xlsx')
covar_df = covar_df[['date'] + covariate_columns]
# Difference between the two is, that the train file uses standardization calculated based on the train set
# validation file standardizes using calculation based on train + validation set
# Here, we need the train set

In [31]:
# Let's reshape our input data of the thing... we are going to need labels, and we are going to need train surface.
# The labels will be, the smoothed IVs of our data
# The train will be the dimensions, with time x ttm x moneyness encoders
# If we have covariates, the channels will be larger? -> Yes, starting channels will be added to the layers

# Load the data first
if run == 'short_ttm':
    data_train = pd.read_csv('data/final/smoothed/data_train.csv')
    data_val = pd.read_csv('data/final/evaluation/validation_set.csv')
    data_test = pd.read_csv('data/final/evaluation/test_set.csv')
elif run == 'long_ttm':
    data_train = pd.read_csv('data/final/smoothed/data_train_long.csv')
    data_val = pd.read_csv('data/final/evaluation/validation_set_long.csv')
    data_test = pd.read_csv('data/final/evaluation/test_set_long.csv')
else:
    print('Select a dataset')


In [32]:
# add the covariates to the columns of the datasets, based on date
# covar_df['date'] = pd.to_datetime(covar_df['date'])
data_train['date'] = pd.to_datetime(data_train['date'])
data_train = pd.merge(data_train, covar_df, on='date', how='left')

In [None]:
print(set(data_train['date'])- set(covar_df['date']))
print(len(set(data_train['date'])- set(covar_df['date'])))

#Things to check; which days are missing, how we can include them, and if the covars are recorded in the morning or evening
# Looks like 93 dates missing, but how can this be?

{Timestamp('2014-07-17 00:00:00'), Timestamp('2014-11-20 00:00:00'), Timestamp('2013-04-12 00:00:00'), Timestamp('2013-01-11 00:00:00'), Timestamp('2013-03-14 00:00:00'), Timestamp('2015-06-18 00:00:00'), Timestamp('2015-01-09 00:00:00'), Timestamp('2012-11-15 00:00:00'), Timestamp('2012-08-10 00:00:00'), Timestamp('2013-10-17 00:00:00'), Timestamp('2013-11-14 00:00:00'), Timestamp('2013-12-19 00:00:00'), Timestamp('2015-11-19 00:00:00'), Timestamp('2014-12-12 00:00:00'), Timestamp('2014-06-19 00:00:00'), Timestamp('2014-06-13 00:00:00'), Timestamp('2015-04-02 00:00:00'), Timestamp('2013-05-16 00:00:00'), Timestamp('2015-07-16 00:00:00'), Timestamp('2013-01-17 00:00:00'), Timestamp('2015-07-02 00:00:00'), Timestamp('2012-10-12 00:00:00'), Timestamp('2013-09-19 00:00:00'), Timestamp('2012-09-14 00:00:00'), Timestamp('2012-01-19 00:00:00'), Timestamp('2012-12-20 00:00:00'), Timestamp('2012-07-13 00:00:00'), Timestamp('2013-10-11 00:00:00'), Timestamp('2013-07-12 00:00:00'), Timestamp('20

In [35]:
data_train[covariate_columns].isna().sum()

VIX      88204
VVIX     88938
SKEW     97518
RVOL     95239
TMS     126410
CRS      88204
EPU      88204
ADS      88204
dtype: int64

In [None]:
print(covar_df[covariate_columns].isna().sum()) # Not that many nans here

VIX      0
VVIX     1
SKEW     3
RVOL     2
TMS     21
CRS      0
EPU      0
ADS      0
dtype: int64


In [34]:
def process(data):
    data = data.drop(columns="Unnamed: 0")

    # Let's implement the thing, where deep OTM, OTM, ATM, ITM, deep ITM is a thing

    # we have to discriminate between calls and puts
    # Coding; deep OTM = 1, OTM =2, ATM =3, ITM = 4, deep ITM=5 
    # outliers, sort of?
    print(data.shape)
    data = data[data['moneyness'] >= 0.8]
    data = data[data['moneyness'] <= 1.6]
    print(data.shape)

    # Also consider what to do with low volume... probably include them and acknowledge them as a limitation

    data.loc[(data['cp_flag']=='C') & (data['moneyness'] <0.90), 'moneyness_enc'] = 1
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.97), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.97) & (data['moneyness'] < 1.03), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.03) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.10), 'moneyness_enc'] = 5

    data.loc[(data['cp_flag']=='P') & (data['moneyness'] <0.90), 'moneyness_enc'] = 5
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.97), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.97) & (data['moneyness'] < 1.03), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.03) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.10), 'moneyness_enc'] = 1
    return data

data_train = process(data_train)
data_val = process(data_val)
data_test = process(data_test)

# Thing to fix: The moneyness encoded, results in multiple impl volatility values for the same moneyness, maturity
# combination. To fix this, take the average, and omit the others

(5673528, 29)
(5517184, 29)
(300251, 28)
(294479, 28)
(1618161, 28)
(1553959, 28)


In [35]:
def frame_to_numpy(data, eval=False):
    # Convert 'time_step' to datetime
    data['time_step'] = pd.to_datetime(data['date'])

    # Create a time_step index (e.g., from the first unique date)
    time_step_index = pd.to_datetime(data['time_step']).dt.strftime('%Y-%m-%d').unique()

    # Map time_step dates to integer index
    data['time_step_idx'] = data['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])
    print(data['time_step_idx'])

    maturity_values = np.sort(data['maturity'].unique())
    maturity_to_idx = {mat: i for i, mat in enumerate(maturity_values)}


    time_steps = len(time_step_index)
    money_dim = len(data['moneyness_enc'].unique())
    ttm_dim = len(data['maturity'].unique())

    # Create an empty numpy array with the shape (time_steps, height_dim, width_dim)
    IV_array = np.zeros((time_steps, money_dim, ttm_dim, ), dtype=np.float32)

    # Populate the numpy array with values from the DataFrame
    for idx, row in data.iterrows():
        time_step_idx = row['time_step_idx']
        height = int(row['moneyness_enc']) - 1 
        width = maturity_to_idx[row['maturity']]
        
        if eval==False:
            value = row['IV_smooth']
        else:
            value = row['impl_volatility']
            
        # print(time_step_idx, height, width, value)
        # Assign the value to the corresponding position in the numpy array
        IV_array[time_step_idx, height, width] = value
        
    IV_array = IV_array.reshape((IV_array.shape[0], money_dim, ttm_dim, 1))
    return IV_array

In [36]:
IV_train = frame_to_numpy(data_train)
IV_val = frame_to_numpy(data_val, eval=True)
IV_test = frame_to_numpy(data_test, eval=True)
# 7 min on pc, for long set
# Write array to data folder?

0             0
1             0
2             0
3             0
4             0
           ... 
5673523    2415
5673524    2415
5673525    2415
5673526    2415
5673527    2415
Name: time_step_idx, Length: 5517184, dtype: int64
0           0
1           0
2           0
3           0
4           0
         ... 
300246    217
300247    217
300248    217
300249    217
300250    217
Name: time_step_idx, Length: 294479, dtype: int64
0            0
1            0
2            0
3            0
4            0
          ... 
1618156    252
1618157    252
1618158    252
1618159    252
1618160    252
Name: time_step_idx, Length: 1553959, dtype: int64


In [37]:
print(IV_train.shape, IV_val.shape, IV_test.shape)

(2416, 5, 260, 1) (218, 5, 260, 1) (253, 5, 260, 1)


In [38]:
# # Convert 'time_step' to datetime
# data_train['time_step'] = pd.to_datetime(data_train['date'])

# # Create a time_step index (e.g., from the first unique date)
# time_step_index = pd.to_datetime(data_train['time_step']).dt.strftime('%Y-%m-%d').unique()

# # Map time_step dates to integer index
# data_train['time_step_idx'] = data_train['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])
# print(data_train['time_step_idx'])
# ttm_dim = 5
# money_dim = 5
# time_steps = len(time_step_index)

# # Create an empty numpy array with the shape (time_steps, height_dim, width_dim)
# IV_array = np.zeros((time_steps, ttm_dim, money_dim))

# # Populate the numpy array with values from the DataFrame
# for idx, row in data_train.iterrows():
#     time_step_idx = row['time_step_idx']
#     width = row['maturity'] - 1 
#     height = int(row['moneyness_enc']) - 1 
#     value = row['IV_smooth']
#     print(time_step_idx, width, height, value)
#     # Assign the value to the corresponding position in the numpy array
#     IV_array[time_step_idx, height, width] = value

In [39]:
# print(IV_array.shape)

In [40]:
# IV_array = IV_array.reshape((IV_array.shape[0], 5, 5, 1))
# print(IV_array.shape)

In [41]:
# window_size = 21 # about one month
# labels = IV_array[1:]
# X = IV_array[:-1]

dataset_train = tf.keras.utils.timeseries_dataset_from_array(
    data=IV_train[:-1],
    targets=IV_train[window_size:],
    sequence_length=window_size,
    batch_size=batch_size
)

# Add the last timepoints of the dataset to the validation set, for the computation of the
# validation set performance is calculated within the window size too
# So the validation set should start from the end of the training set

IV_val_input = np.concatenate((IV_train[-window_size:], IV_val), axis=0)

dataset_val = tf.keras.utils.timeseries_dataset_from_array(
    data=IV_val_input[:-1],
    targets=IV_val_input[window_size:],
    sequence_length=window_size,
    batch_size=batch_size
)

IV_test_input = np.concatenate((IV_val[-window_size:], IV_test), axis=0)

dataset_test= tf.keras.utils.timeseries_dataset_from_array(
    data=IV_test_input[:-1],
    targets=IV_test_input[window_size:],
    sequence_length=window_size,
    batch_size=batch_size
)

In [42]:
# def create_model(n_params, 
#                  dropout, 
#                  recurrent_dropout, 
#                  n_convlstm_layers = 2,
#                  hidden_activation =  tf.keras.activations.tanh, 
#                  optimizer = keras.optimizers.Adam()):

#     # input layer
#     input_layer = layers.Input(shape= (None,5,5,1) )
    
#     # lstm layers
#     lstm = input_layer
#     for i in range( n_convlstm_layers ):
#         lstm =  layers.ConvLSTM2D( 
#             kernel_size= (1,1), 
#             filters=n_params, 
#             data_format= 'channels_last', 
#             return_sequences = i<n_convlstm_layers-1,
#             activation=hidden_activation,
#             padding = "same",
#             dropout=dropout, 
#             recurrent_dropout=recurrent_dropout
#         )( lstm )
#         lstm = layers.BatchNormalization()(lstm)    

#     output = layers.Conv2D(
#         filters=1, kernel_size=(1, 1), activation="linear", padding="same"
#     )( lstm )
#     output_layer = layers.Reshape((5,5))(output)

#     # compile
#     model = models.Model( input_layer, output_layer )
#     model.compile(
#         loss= "MAE",
#         optimizer=optimizer, 
#     ) 
    
#     print(model.summary())
#     return model
# model = create_model(n_params=10,dropout=0.1,recurrent_dropout=0.1,n_convlstm_layers=2)


In [43]:
# def train_model(model, 
#                 x_train, 
#                 y_train,
#                 verbose = True, 
#                 save : "dir" = False,
#                 training_kwarg_overwrites : "dict" = {} ):
    
#     # train until we run out of improvement
#     callbacks = [
#         keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=5),
#         keras.callbacks.EarlyStopping(monitor="val_loss", patience=15),
#     ]
    
#     # train model
#     training_kwargs = {
#         "x" : x_train,
#         "y" : y_train, #dataset["train"]["y_scaled"],
#         "epochs" : 200,
#         "batch_size" : 64,
#         "verbose" : verbose,
#         "validation_split" : 0.2,
#         "callbacks" : callbacks,
#     } 
#     training_kwargs.update(training_kwarg_overwrites)
#     train_hist = model.fit( **training_kwargs )
    
    
#     if save:
#         Path(save).mkdir(parents=True, exist_ok=True) # make a home for the models
#         train_start, train_end = [ f( dataset["dates"]["train"] ) for f in (min,max) ]
#         model_name = "-".join( date.strftime("%Y%m%d") for date in [train_start, train_end] )
#         model.save( save+model_name )
        
#     return model, train_hist



In [44]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization, Flatten, Dense

# time_steps = window_size
# height = 5
# width = 5
# channels = 1

# # Model definition
# model = Sequential([
#     ConvLSTM2D(filters=64, kernel_size=(3,3), activation='relu', 
#                return_sequences=True, input_shape=(time_steps, height, width, channels)),
#     BatchNormalization(),
#     ConvLSTM2D(filters=32, kernel_size=(3,3), activation='relu', return_sequences=False),
#     Flatten(),
#     Dense(128, activation='relu'),
#     Dense(1)  # Predicting IV at a future time
# ])

# # Compile model
# model.compile(optimizer='adam', loss='mse')
# print(model.summary())

In [45]:
TIME_STEPS = window_size
HEIGHT = len(data_train['moneyness_enc'].unique())
WIDTH = len(data_train['maturity'].unique())
CHANNELS = 1

model = Sequential()

# ConvLSTM2D expects 5D input: (batch, time, height, width, channels)
model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3),
                     padding='same', return_sequences=True,
                     input_shape=(TIME_STEPS, HEIGHT, WIDTH, CHANNELS)))
model.add(BatchNormalization())

model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3),
                     padding='same', return_sequences=False))
model.add(BatchNormalization())

# Final 3D convolution to map to the next frame
model.add(tf.keras.layers.Conv2D(filters=1, kernel_size=(1, 1),
                                 activation='sigmoid', padding='same'))

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=epsilon)
model.compile(loss='mse', optimizer=optimizer)

# Double check the architecture, and the activaiton function
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d_2 (ConvLSTM2D)  (None, 21, 5, 260, 64)    150016    
                                                                 
 batch_normalization_2 (Batc  (None, 21, 5, 260, 64)   256       
 hNormalization)                                                 
                                                                 
 conv_lstm2d_3 (ConvLSTM2D)  (None, 5, 260, 64)        295168    
                                                                 
 batch_normalization_3 (Batc  (None, 5, 260, 64)       256       
 hNormalization)                                                 
                                                                 
 conv2d_1 (Conv2D)           (None, 5, 260, 1)         65        
                                                                 
Total params: 445,761
Trainable params: 445,505
Non-tr

In [46]:
model.fit(dataset_train, epochs=epochs, validation_data=dataset_val, callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')])

# Takes about 40 min on dell xps laptop, 6.5 min on PC for short term
# 24 min on PC for long term

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x157c19f00d0>

In [47]:
# Next step; functions for the IVRMSE and R_oos!!!!!
# H-step ahead performance!!!!!!
# WRITE IT ALL TO RESULTS!!!!!!
# Lower learning rate, beneficial, or leave it? Probably leave it
# All the HYPERPARAMETERS!!!!!! kernel strides, window sizes, parameters, layers, ALL OF THEM
# COVARIATES!!!!!!, Do all the hyperparameters again, and then only save the TEST PERFORMANCE
# MODEL ARCHITECTURE!!!!
# Investigate what happens, if you leave it as 0... We cannot do the interpolation properly, to be frank.

#THEN LONG TERM !!!!!!
# Transformer models!!!!!!!!!!!!!!!!
pred_val = model.predict(dataset_val)
pred_test = model.predict(dataset_test)



In [48]:
# Put it in the compile, but also call it afterward
# double check formula and also make it for R_oos
# Should also work per h-step ahead 
def calculate_ivrmse(y_true, y_pred, all_points=False):
    if not all_points:
        ivrmse = tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
    else:
        sq_error = tf.square(y_true - y_pred)
        error_surface = tf.reduce_mean(sq_error, axis=[1 , 2])
        ivrmse = tf.sqrt(error_surface)

    return ivrmse.numpy()

# model.compile(optimizer='adam', loss='mse', metrics=[ivrmse_metric])


In [49]:
print(calculate_ivrmse(IV_val, pred_val))
print(calculate_ivrmse(IV_val, pred_val, all_points=False))
# The loss is less, than in the optimization.. probably a different metric, or formula than mse
# check values of the papers that are like yours

0.026743365
0.026743365


In [50]:
def calculate_r_oos(y_true, y_pred, all_points=False):
    if not all_points:
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
        mean_IV = tf.reduce_mean(y_true, axis=[1, 2], keepdims=True) # should be shape of 114 long
        # print(mean_IV[0], mean_IV[1], mean_IV[2])
        # print(y_true[0], y_true[1], y_true[2])
        ss_tot = tf.reduce_sum(tf.square(y_true - mean_IV))
        # print((y_true - mean_IV)[0])
        r2 = 1 - ss_res/ss_tot
    else:
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=[1, 2])
        mean_IV = tf.reduce_mean(y_true, axis=[1, 2], keepdims=True)
        ss_tot = tf.reduce_sum(tf.square(y_true - mean_IV), axis=[1, 2])
        r2 = 1 - ss_res/ss_tot
    return r2.numpy()

print(calculate_r_oos(IV_val, pred_val))

0.6964496


In [51]:
# Train model again on BOTH train and validation, and then investigate the TEST PERFORMANCE!!

In [52]:
print(calculate_ivrmse(IV_test, pred_test))
print(calculate_r_oos(IV_test, pred_test))

0.05695225
0.67053


In [53]:
def get_results(y_real, y_pred):
    ivrmse = calculate_ivrmse(y_real, y_pred)
    ivrmse_h = calculate_ivrmse(y_real, y_pred, all_points=True)
    r_oos = calculate_r_oos(y_real, y_pred)
    r_oos_h = calculate_r_oos(y_real, y_pred, all_points=True)

    return ivrmse, ivrmse_h, r_oos, r_oos_h

In [54]:
def write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, surface, surface_pred):

    ivrmse_path = folder_path / Path("ivrmse")
    r_oos_path = folder_path / Path("r_oos")
    ivrmse_h_path = folder_path / Path("ivrmse_h")
    r_oos_h_path = folder_path / Path("r_oos_h")
    surface_path = folder_path / Path("surface")
    surface_pred_path = folder_path / Path("surface_pred")

    if not ivrmse_path.exists():
        ivrmse_path.mkdir(parents=True, exist_ok=True)

    if not r_oos_path.exists():
        r_oos_path.mkdir(parents=True, exist_ok=True)

    if not ivrmse_h_path.exists():
        ivrmse_h_path.mkdir(parents=True, exist_ok=True)

    if not r_oos_h_path.exists():
        r_oos_h_path.mkdir(parents=True, exist_ok=True)

    if not surface_path.exists():
        surface_path.mkdir(parents=True, exist_ok=True)

    if not surface_pred_path.exists():
        surface_pred_path.mkdir(parents=True, exist_ok=True)

    np.save(ivrmse_path / f"{window_size}_{h_step}.npy", ivrmse)
    np.save(r_oos_path / f"{window_size}_{h_step}.npy", r_oos)
    np.save(ivrmse_h_path / f"{window_size}_{h_step}.npy", ivrmse_h)
    np.save(r_oos_h_path / f"{window_size}_{h_step}.npy", r_oos_h)
    np.save(surface_path/ f"{window_size}_{h_step}.npy", surface)
    np.save(surface_pred_path / f"{window_size}_{h_step}.npy", surface_pred)

In [55]:
folder_path = Path(f"results/test_{run}")
ivrmse, ivrmse_h, r_oos, r_oos_h = get_results(IV_test, pred_test)
write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, IV_test, pred_test)

In [56]:
print(r_oos_h)

[[ 0.2536705 ]
 [ 0.8199241 ]
 [ 0.800799  ]
 [ 0.85355824]
 [ 0.804582  ]
 [ 0.7611677 ]
 [ 0.84622806]
 [ 0.7885027 ]
 [ 0.8726183 ]
 [ 0.83219767]
 [ 0.8029318 ]
 [ 0.86571825]
 [ 0.77316964]
 [ 0.7527638 ]
 [ 0.76095337]
 [ 0.78598344]
 [ 0.8546557 ]
 [ 0.86272913]
 [ 0.7356323 ]
 [ 0.79146326]
 [ 0.8553258 ]
 [ 0.7752081 ]
 [ 0.64446795]
 [ 0.5794525 ]
 [ 0.81432474]
 [ 0.85043055]
 [ 0.84998024]
 [ 0.7686269 ]
 [ 0.79091096]
 [ 0.56344134]
 [ 0.82608384]
 [ 0.83274114]
 [ 0.73001575]
 [ 0.7314482 ]
 [ 0.82104015]
 [-0.29882216]
 [ 0.7059878 ]
 [ 0.7372205 ]
 [ 0.5900607 ]
 [ 0.7503256 ]
 [ 0.83801675]
 [ 0.6299157 ]
 [ 0.82373875]
 [ 0.6074254 ]
 [ 0.8452396 ]
 [ 0.75253874]
 [ 0.811813  ]
 [ 0.90128154]
 [ 0.74461067]
 [ 0.8762873 ]
 [ 0.69524664]
 [ 0.81041044]
 [ 0.74541485]
 [ 0.73105246]
 [ 0.8134964 ]
 [ 0.7025858 ]
 [ 0.6236203 ]
 [ 0.54182684]
 [ 0.84359205]
 [ 0.73678946]
 [ 0.70554686]
 [ 0.7227696 ]
 [ 0.7085861 ]
 [ 0.8282721 ]
 [ 0.7123053 ]
 [ 0.06441087]
 [ 0.68039

In [57]:
folder_path = Path(f"results/validation_{run}")
ivrmse, ivrmse_h, r_oos, r_oos_h = get_results(IV_val, pred_val)
write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, IV_test, pred_test)

In [58]:
# For the h-step ahead forecast, an autoregressive approach is used
# Predict the one step ahead forecast (done now)
# After, use this one step ahead forecast in the predict command. 

In [59]:
print(pred_test.shape)

(253, 5, 260, 1)
