In [1]:
import tensorflow as tf
from tensorflow import keras
from keras import models, layers
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization, Conv3D
import yaml


In [2]:
with open('configs/config_file.yaml') as file:
    params = yaml.safe_load(file)

print(params)

window_size = params['training']['window_size']
h_step = params['forecast']['h_step']
patience = params['training']['patience']
epsilon = params['training']['epsilon']
batch_size = params['training']['batch_size']
epochs = params['training']['epochs']

run = params['model']['run']
learning_rate = params['model']['lr']
covariate_columns = params['model']['covariates']

{'training': {'window_size': 21, 'batch_size': 32, 'patience': 15, 'epsilon': 1e-06, 'random_state': 42, 'epochs': 100, 'covariates': 'None'}, 'model': {'run': 'long_ttm', 'filters': 2, 'kernel_size': [2, 2], 'strides': 1, 'kernel_initializer': 'glorot_uniform', 'recurrent_initializer': 'orthogonal', 'optimizer': 'adam', 'lr': 0.001, 'covariates': ['VIX', 'VVIX', 'SKEW']}, 'forecast': {'h_step': 1}}


In [3]:
# Let's reshape our input data of the thing... we are going to need labels, and we are going to need train surface.
# The labels will be, the smoothed IVs of our data
# The train will be the dimensions, with time x ttm x moneyness encoders
# If we have covariates, the channels will be larger? -> Yes, starting channels will be added to the layers

# Load the data first
if run == 'short_ttm':
    data_train = pd.read_csv('data/final/smoothed/data_train.csv')
    data_val = pd.read_csv('data/final/evaluation/validation_set.csv')
    data_test = pd.read_csv('data/final/evaluation/test_set.csv')

    if covariate_columns is not None:
        covar_df = pd.read_excel('data/final/covariates/covariates_train.xlsx')
        covar_df_val = pd.read_excel('data/final/covariates/covariates_validation.xlsx')

        covar_df = covar_df.rename(columns={'Date':'date'})
        covar_df_val = covar_df_val.rename(columns={'Date':'date'})
        covar_df = covar_df[['date'] + covariate_columns]
        covar_df_val = covar_df_val[['date'] + covariate_columns]

elif run == 'long_ttm':
    data_train = pd.read_csv('data/final/smoothed/data_train_long.csv')
    data_val = pd.read_csv('data/final/evaluation/validation_set_long.csv')
    data_test = pd.read_csv('data/final/evaluation/test_set_long.csv')

    if covariate_columns is not None:
        covar_df = pd.read_excel('data/final/covariates/covariates_train_long.xlsx')
        covar_df_val = pd.read_excel('data/final/covariates/covariates_validation_long.xlsx')

        covar_df = covar_df.rename(columns={'Date':'date'})
        covar_df_val = covar_df_val.rename(columns={'Date':'date'})
        covar_df = covar_df[['date'] + covariate_columns]
        covar_df_val = covar_df_val[['date'] + covariate_columns]
   
else:
    print('Select a dataset')


In [4]:
data_train['date'] = pd.to_datetime(data_train['date'])
data_val['date'] = pd.to_datetime(data_val['date'])
data_test['date'] = pd.to_datetime(data_test['date'])
data_train = pd.merge(data_train, covar_df, on='date', how='left')
data_val = pd.merge(data_val, covar_df, on='date', how='left')
data_test = pd.merge(data_test, covar_df, on='date', how='left')

In [5]:
def process(data):
    data = data.drop(columns="Unnamed: 0")

    # Let's implement the thing, where deep OTM, OTM, ATM, ITM, deep ITM is a thing

    # we have to discriminate between calls and puts
    # Coding; deep OTM = 1, OTM =2, ATM =3, ITM = 4, deep ITM=5 
    # outliers, sort of?
    print(data.shape)
    data = data[data['moneyness'] >= 0.8]
    data = data[data['moneyness'] <= 1.6]
    print(data.shape)

    # Also consider what to do with low volume... probably include them and acknowledge them as a limitation

    data.loc[(data['cp_flag']=='C') & (data['moneyness'] <0.90), 'moneyness_enc'] = 1
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.97), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.97) & (data['moneyness'] < 1.03), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.03) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.10), 'moneyness_enc'] = 5

    data.loc[(data['cp_flag']=='P') & (data['moneyness'] <0.90), 'moneyness_enc'] = 5
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.97), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.97) & (data['moneyness'] < 1.03), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.03) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.10), 'moneyness_enc'] = 1
    return data

data_train = process(data_train)
data_val = process(data_val)
data_test = process(data_test)

# Thing to fix: The moneyness encoded, results in multiple impl volatility values for the same moneyness, maturity
# combination. To fix this, take the average, and omit the others

(5673528, 32)
(5517184, 32)
(300251, 31)
(294479, 31)
(1618161, 31)
(1553959, 31)


In [6]:
# def frame_to_numpy(data, eval=False):
#     # Convert 'time_step' to datetime
#     data['time_step'] = pd.to_datetime(data['date'])

#     # Create a time_step index (e.g., from the first unique date)
#     time_step_index = pd.to_datetime(data['time_step']).dt.strftime('%Y-%m-%d').unique()

#     # Map time_step dates to integer index
#     data['time_step_idx'] = data['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])
#     print(data['time_step_idx'])

#     maturity_values = np.sort(data['maturity'].unique())
#     maturity_to_idx = {mat: i for i, mat in enumerate(maturity_values)}

#     time_steps = len(time_step_index)
#     money_dim = len(data['moneyness_enc'].unique())
#     ttm_dim = len(data['maturity'].unique())

#     # Create an empty numpy array with the shape (time_steps, height_dim, width_dim)
#     IV_array = np.zeros((time_steps, money_dim, ttm_dim, ), dtype=np.float32)

#     # Populate the numpy array with values from the DataFrame
#     for idx, row in data.iterrows():
#         time_step_idx = row['time_step_idx']
#         height = int(row['moneyness_enc']) - 1 
#         width = maturity_to_idx[row['maturity']]
        
#         if eval==False:
#             value = row['IV_smooth']
#         else:
#             value = row['impl_volatility']
            
#         # print(time_step_idx, height, width, value)
#         # Assign the value to the corresponding position in the numpy array
#         IV_array[time_step_idx, height, width] = value
        
#     IV_array = IV_array.reshape((IV_array.shape[0], money_dim, ttm_dim, 1))
#     return IV_array

In [7]:
def frame_to_numpy(data, covariate_cols=None, eval=False):
    
    data['time_step'] = pd.to_datetime(data['date'])
    time_step_index = pd.to_datetime(data['time_step']).dt.strftime('%Y-%m-%d').unique()
    data['time_step_idx'] = data['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])

    maturity_values = np.sort(data['maturity'].unique())
    maturity_to_idx = {mat: i for i, mat in enumerate(maturity_values)}

    time_steps = len(time_step_index)
    money_dim = len(data['moneyness_enc'].unique())
    ttm_dim = len(maturity_values)

    # Base IV tensor
    IV_array = np.zeros((time_steps, money_dim, ttm_dim))

    # If covariates provided, create tensor to hold them
    covariate_arrays = {}
    if covariate_cols:
        for cov in covariate_cols:
            covariate_arrays[cov] = np.zeros((time_steps, money_dim, ttm_dim), dtype=np.float32)

    for idx, row in data.iterrows():
        time_step_idx = row['time_step_idx']
        height = int(row['moneyness_enc']) - 1 
        width = maturity_to_idx[row['maturity']]
        value = row['IV_smooth'] if not eval else row['impl_volatility']
        IV_array[time_step_idx, height, width] = value

        # Also fill in covariates
        if covariate_cols:
            for cov in covariate_cols:
                covariate_arrays[cov][time_step_idx, height, width] = row[cov]

    # Reshape and concatenate
    IV_array = IV_array.reshape((time_steps, money_dim, ttm_dim, 1))

    if covariate_cols:
        covariate_stack = [arr.reshape((time_steps, money_dim, ttm_dim, 1)) for arr in covariate_arrays.values()]
        covariate_stack = np.concatenate(covariate_stack, axis=-1)  # shape: (T, H, W, C)
        IV_array = np.concatenate([IV_array, covariate_stack], axis=-1)  # final shape: (T, H, W, 1+C)

    return IV_array


In [8]:
IV_train = frame_to_numpy(data_train, covariate_columns)
IV_val = frame_to_numpy(data_val, covariate_columns, eval=True)
IV_test = frame_to_numpy(data_test, covariate_columns, eval=True)

In [9]:
# IV_train = frame_to_numpy(data_train)
# IV_val = frame_to_numpy(data_val, eval=True)
# IV_test = frame_to_numpy(data_test, eval=True)
# 7 min on pc, for long set
# Write array to data folder?

In [10]:
print(IV_train.shape, IV_val.shape, IV_test.shape)

(2416, 5, 260, 4) (218, 5, 260, 4) (253, 5, 260, 4)


In [11]:
# # Convert 'time_step' to datetime
# data_train['time_step'] = pd.to_datetime(data_train['date'])

# # Create a time_step index (e.g., from the first unique date)
# time_step_index = pd.to_datetime(data_train['time_step']).dt.strftime('%Y-%m-%d').unique()

# # Map time_step dates to integer index
# data_train['time_step_idx'] = data_train['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])
# print(data_train['time_step_idx'])
# ttm_dim = 5
# money_dim = 5
# time_steps = len(time_step_index)

# # Create an empty numpy array with the shape (time_steps, height_dim, width_dim)
# IV_array = np.zeros((time_steps, ttm_dim, money_dim))

# # Populate the numpy array with values from the DataFrame
# for idx, row in data_train.iterrows():
#     time_step_idx = row['time_step_idx']
#     width = row['maturity'] - 1 
#     height = int(row['moneyness_enc']) - 1 
#     value = row['IV_smooth']
#     print(time_step_idx, width, height, value)
#     # Assign the value to the corresponding position in the numpy array
#     IV_array[time_step_idx, height, width] = value

In [12]:
# print(IV_array.shape)

In [13]:
# IV_array = IV_array.reshape((IV_array.shape[0], 5, 5, 1))
# print(IV_array.shape)

In [14]:
print(IV_train[:-1].shape)

(2415, 5, 260, 4)


In [15]:
print(IV_train[window_size:][:,:,:,0:1].shape)

(2395, 5, 260, 1)


In [16]:
dataset_train = tf.keras.utils.timeseries_dataset_from_array(
    data=IV_train,
    targets=IV_train[window_size:][:,:,:,0:1], # Select only the IV, not the covariates
    sequence_length=window_size,
    batch_size=batch_size
)

In [17]:
print(dataset_train)

<BatchDataset element_spec=(TensorSpec(shape=(None, None, 5, 260, 4), dtype=tf.float64, name=None), TensorSpec(shape=(None, 5, 260, 1), dtype=tf.float64, name=None))>


In [18]:
# window_size = 21 # about one month
# labels = IV_array[1:]
# X = IV_array[:-1]

# dataset_train = tf.keras.utils.timeseries_dataset_from_array(
#     data=IV_train[:-1],
#     targets=IV_train[window_size:],
#     sequence_length=window_size,
#     batch_size=batch_size
# )

# Add the last timepoints of the dataset to the validation set, for the computation of the
# validation set performance is calculated within the window size too
# So the validation set should start from the end of the training set

IV_val_input = np.concatenate((IV_train[-window_size:], IV_val), axis=0)

dataset_val = tf.keras.utils.timeseries_dataset_from_array(
    data=IV_val_input,
    targets=IV_val_input[window_size:][:,:,:,0:1],
    sequence_length=window_size,
    batch_size=batch_size
)

IV_test_input = np.concatenate((IV_val[-window_size:], IV_test), axis=0)

dataset_test= tf.keras.utils.timeseries_dataset_from_array(
    data=IV_test_input,
    targets=IV_test_input[window_size:][:,:,:,0:1],
    sequence_length=window_size,
    batch_size=batch_size
)

In [19]:
# def create_model(n_params, 
#                  dropout, 
#                  recurrent_dropout, 
#                  n_convlstm_layers = 2,
#                  hidden_activation =  tf.keras.activations.tanh, 
#                  optimizer = keras.optimizers.Adam()):

#     # input layer
#     input_layer = layers.Input(shape= (None,5,5,1) )
    
#     # lstm layers
#     lstm = input_layer
#     for i in range( n_convlstm_layers ):
#         lstm =  layers.ConvLSTM2D( 
#             kernel_size= (1,1), 
#             filters=n_params, 
#             data_format= 'channels_last', 
#             return_sequences = i<n_convlstm_layers-1,
#             activation=hidden_activation,
#             padding = "same",
#             dropout=dropout, 
#             recurrent_dropout=recurrent_dropout
#         )( lstm )
#         lstm = layers.BatchNormalization()(lstm)    

#     output = layers.Conv2D(
#         filters=1, kernel_size=(1, 1), activation="linear", padding="same"
#     )( lstm )
#     output_layer = layers.Reshape((5,5))(output)

#     # compile
#     model = models.Model( input_layer, output_layer )
#     model.compile(
#         loss= "MAE",
#         optimizer=optimizer, 
#     ) 
    
#     print(model.summary())
#     return model
# model = create_model(n_params=10,dropout=0.1,recurrent_dropout=0.1,n_convlstm_layers=2)


In [20]:
# def train_model(model, 
#                 x_train, 
#                 y_train,
#                 verbose = True, 
#                 save : "dir" = False,
#                 training_kwarg_overwrites : "dict" = {} ):
    
#     # train until we run out of improvement
#     callbacks = [
#         keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=5),
#         keras.callbacks.EarlyStopping(monitor="val_loss", patience=15),
#     ]
    
#     # train model
#     training_kwargs = {
#         "x" : x_train,
#         "y" : y_train, #dataset["train"]["y_scaled"],
#         "epochs" : 200,
#         "batch_size" : 64,
#         "verbose" : verbose,
#         "validation_split" : 0.2,
#         "callbacks" : callbacks,
#     } 
#     training_kwargs.update(training_kwarg_overwrites)
#     train_hist = model.fit( **training_kwargs )
    
    
#     if save:
#         Path(save).mkdir(parents=True, exist_ok=True) # make a home for the models
#         train_start, train_end = [ f( dataset["dates"]["train"] ) for f in (min,max) ]
#         model_name = "-".join( date.strftime("%Y%m%d") for date in [train_start, train_end] )
#         model.save( save+model_name )
        
#     return model, train_hist



In [21]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization, Flatten, Dense

# time_steps = window_size
# height = 5
# width = 5
# channels = 1

# # Model definition
# model = Sequential([
#     ConvLSTM2D(filters=64, kernel_size=(3,3), activation='relu', 
#                return_sequences=True, input_shape=(time_steps, height, width, channels)),
#     BatchNormalization(),
#     ConvLSTM2D(filters=32, kernel_size=(3,3), activation='relu', return_sequences=False),
#     Flatten(),
#     Dense(128, activation='relu'),
#     Dense(1)  # Predicting IV at a future time
# ])

# # Compile model
# model.compile(optimizer='adam', loss='mse')
# print(model.summary())

In [None]:
TIME_STEPS = window_size
HEIGHT = len(data_train['moneyness_enc'].unique())
WIDTH = len(data_train['maturity'].unique())
CHANNELS = 1 +len(covariate_columns)

model = Sequential()

# ConvLSTM2D expects 5D input: (batch, time, height, width, channels)
model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3),
                     padding='same', return_sequences=True,
                     input_shape=(TIME_STEPS, HEIGHT, WIDTH, CHANNELS)))
model.add(BatchNormalization())

model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3),
                     padding='same', return_sequences=False))
model.add(BatchNormalization())

# Final 3D convolution to map to the next frame
model.add(tf.keras.layers.Conv2D(filters=1, kernel_size=(1, 1),
                                 activation='sigmoid', padding='same'))

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=epsilon)
model.compile(loss='mse', optimizer=optimizer)

# Double check the architecture, and the activaiton function
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d_4 (ConvLSTM2D)  (None, 21, 5, 260, 64)    156928    
                                                                 
 batch_normalization_4 (Batc  (None, 21, 5, 260, 64)   256       
 hNormalization)                                                 
                                                                 
 conv_lstm2d_5 (ConvLSTM2D)  (None, 5, 260, 64)        295168    
                                                                 
 batch_normalization_5 (Batc  (None, 5, 260, 64)       256       
 hNormalization)                                                 
                                                                 
 conv2d_2 (Conv2D)           (None, 5, 260, 1)         65        
                                                                 
Total params: 452,673
Trainable params: 452,417
Non-tr

In [29]:
print(dataset_val)

<BatchDataset element_spec=(TensorSpec(shape=(None, None, 5, 260, 4), dtype=tf.float64, name=None), TensorSpec(shape=(None, 5, 260, 1), dtype=tf.float64, name=None))>


In [30]:
model.fit(dataset_train, epochs=epochs, validation_data=dataset_val, callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')])

# Takes about 40 min on dell xps laptop, 6.5 min on PC for short term
# 24 min on PC for long term

Epoch 1/100

KeyboardInterrupt: 

In [None]:
# Next step; functions for the IVRMSE and R_oos!!!!!
# H-step ahead performance!!!!!!
# WRITE IT ALL TO RESULTS!!!!!!
# Lower learning rate, beneficial, or leave it? Probably leave it
# All the HYPERPARAMETERS!!!!!! kernel strides, window sizes, parameters, layers, ALL OF THEM
# COVARIATES!!!!!!, Do all the hyperparameters again, and then only save the TEST PERFORMANCE
# MODEL ARCHITECTURE!!!!
# Investigate what happens, if you leave it as 0... We cannot do the interpolation properly, to be frank.

#THEN LONG TERM !!!!!!
# Transformer models!!!!!!!!!!!!!!!!
pred_val = model.predict(dataset_val)
pred_test = model.predict(dataset_test)



In [None]:
# Put it in the compile, but also call it afterward
# double check formula and also make it for R_oos
# Should also work per h-step ahead 
def calculate_ivrmse(y_true, y_pred, all_points=False):
    if not all_points:
        ivrmse = tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
    else:
        sq_error = tf.square(y_true - y_pred)
        error_surface = tf.reduce_mean(sq_error, axis=[1 , 2])
        ivrmse = tf.sqrt(error_surface)

    return ivrmse.numpy()

# model.compile(optimizer='adam', loss='mse', metrics=[ivrmse_metric])


In [None]:
print(IV_val.shape, pred_val.shape, IV_test_input.shape, IV_test_input[:-window_size].shape)

(218, 5, 260, 1) (218, 5, 260, 1) (274, 5, 260, 1) (253, 5, 260, 1)


In [None]:
print(dataset_val)

<BatchDataset element_spec=(TensorSpec(shape=(None, None, 5, 260, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 5, 260, 1), dtype=tf.float32, name=None))>


In [None]:
for x_batch, y_batch in dataset_val.take(1):
    print("Input batch shape:", x_batch.shape)
    print("Target batch shape:", y_batch.shape)

Input batch shape: (32, 21, 5, 260, 1)
Target batch shape: (32, 5, 260, 1)


In [None]:
print(calculate_ivrmse(IV_val, pred_val))
print(calculate_ivrmse(IV_val, pred_val, all_points=False))
# The loss is less, than in the optimization.. probably a different metric, or formula than mse
# check values of the papers that are like yours

0.027347255
0.027347255


In [None]:
def calculate_r_oos(y_true, y_pred, all_points=False):
    if not all_points:
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
        mean_IV = tf.reduce_mean(y_true, axis=[1, 2], keepdims=True) # should be shape of 114 long
        # print(mean_IV[0], mean_IV[1], mean_IV[2])
        # print(y_true[0], y_true[1], y_true[2])
        ss_tot = tf.reduce_sum(tf.square(y_true - mean_IV))
        # print((y_true - mean_IV)[0])
        r2 = 1 - ss_res/ss_tot
    else:
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=[1, 2])
        mean_IV = tf.reduce_mean(y_true, axis=[1, 2], keepdims=True)
        ss_tot = tf.reduce_sum(tf.square(y_true - mean_IV), axis=[1, 2])
        r2 = 1 - ss_res/ss_tot
    return r2.numpy()

print(calculate_r_oos(IV_val, pred_val))

0.6825859


In [None]:
# Train model again on BOTH train and validation, and then investigate the TEST PERFORMANCE!!

In [None]:
print(calculate_ivrmse(IV_test, pred_test))
print(calculate_r_oos(IV_test, pred_test))

0.05364826
0.70764846


In [None]:
def get_results(y_real, y_pred):
    ivrmse = calculate_ivrmse(y_real, y_pred)
    ivrmse_h = calculate_ivrmse(y_real, y_pred, all_points=True)
    r_oos = calculate_r_oos(y_real, y_pred)
    r_oos_h = calculate_r_oos(y_real, y_pred, all_points=True)

    return ivrmse, ivrmse_h, r_oos, r_oos_h

In [None]:
def write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, surface, surface_pred):

    ivrmse_path = folder_path / Path("ivrmse")
    r_oos_path = folder_path / Path("r_oos")
    ivrmse_h_path = folder_path / Path("ivrmse_h")
    r_oos_h_path = folder_path / Path("r_oos_h")
    surface_path = folder_path / Path("surface")
    surface_pred_path = folder_path / Path("surface_pred")

    if not ivrmse_path.exists():
        ivrmse_path.mkdir(parents=True, exist_ok=True)

    if not r_oos_path.exists():
        r_oos_path.mkdir(parents=True, exist_ok=True)

    if not ivrmse_h_path.exists():
        ivrmse_h_path.mkdir(parents=True, exist_ok=True)

    if not r_oos_h_path.exists():
        r_oos_h_path.mkdir(parents=True, exist_ok=True)

    if not surface_path.exists():
        surface_path.mkdir(parents=True, exist_ok=True)

    if not surface_pred_path.exists():
        surface_pred_path.mkdir(parents=True, exist_ok=True)

    np.save(ivrmse_path / f"{window_size}_{h_step}.npy", ivrmse)
    np.save(r_oos_path / f"{window_size}_{h_step}.npy", r_oos)
    np.save(ivrmse_h_path / f"{window_size}_{h_step}.npy", ivrmse_h)
    np.save(r_oos_h_path / f"{window_size}_{h_step}.npy", r_oos_h)
    np.save(surface_path/ f"{window_size}_{h_step}.npy", surface)
    np.save(surface_pred_path / f"{window_size}_{h_step}.npy", surface_pred)

In [None]:
folder_path = Path(f"results/test_{run}")
ivrmse, ivrmse_h, r_oos, r_oos_h = get_results(IV_test, pred_test)
write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, IV_test, pred_test)

In [None]:
print(r_oos_h)

[[ 0.227813  ]
 [ 0.83448577]
 [ 0.82664824]
 [ 0.86329985]
 [ 0.82719207]
 [ 0.82161885]
 [ 0.859445  ]
 [ 0.8038464 ]
 [ 0.82923454]
 [ 0.8613956 ]
 [ 0.8831786 ]
 [ 0.8605362 ]
 [ 0.80928403]
 [ 0.7487365 ]
 [ 0.82359606]
 [ 0.8038391 ]
 [ 0.870738  ]
 [ 0.879892  ]
 [ 0.88754183]
 [ 0.715389  ]
 [ 0.8423885 ]
 [ 0.83931565]
 [ 0.71127   ]
 [ 0.7170203 ]
 [ 0.83064073]
 [ 0.88917625]
 [ 0.8844631 ]
 [ 0.8380521 ]
 [ 0.78856057]
 [ 0.7900923 ]
 [ 0.8124328 ]
 [ 0.81729823]
 [ 0.8404102 ]
 [ 0.802576  ]
 [ 0.83177733]
 [-0.4169848 ]
 [ 0.808445  ]
 [ 0.76371753]
 [ 0.6336098 ]
 [ 0.7836803 ]
 [ 0.84073335]
 [ 0.6562091 ]
 [ 0.8399776 ]
 [ 0.7637213 ]
 [ 0.92217815]
 [ 0.7660601 ]
 [ 0.7994304 ]
 [ 0.87237227]
 [ 0.7656763 ]
 [ 0.8554688 ]
 [ 0.7668767 ]
 [ 0.85255224]
 [ 0.76106083]
 [ 0.793423  ]
 [ 0.8994974 ]
 [ 0.7147583 ]
 [ 0.7361706 ]
 [ 0.67779636]
 [ 0.7745245 ]
 [ 0.761217  ]
 [ 0.76068795]
 [ 0.8049597 ]
 [ 0.7416584 ]
 [ 0.8276144 ]
 [ 0.7777232 ]
 [-0.09736204]
 [ 0.72919

In [None]:
folder_path = Path(f"results/validation_{run}")
ivrmse, ivrmse_h, r_oos, r_oos_h = get_results(IV_val, pred_val)
write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, IV_test, pred_test)

In [None]:
# For the h-step ahead forecast, an autoregressive approach is used
# Predict the one step ahead forecast (done now)
# After, use this one step ahead forecast in the predict command. 

In [None]:
print(pred_test.shape)

(253, 5, 260, 1)
