In [1]:
import tensorflow as tf
from tensorflow import keras
from keras import models, layers
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization, Conv3D, Conv2D
import yaml
from tensorflow.keras.layers import Input, ConvLSTM2D, BatchNormalization
from tensorflow.keras.layers import Flatten, Dense, RepeatVector, Reshape, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM



In [2]:
with open('configs/config_file_covs.yaml') as file:
    params = yaml.safe_load(file)

print(params)

window_size = params['training']['window_size']
h_step = params['forecast']['h_step']
patience = params['training']['patience']
epsilon = params['training']['epsilon']
batch_size = params['training']['batch_size']
epochs = params['training']['epochs']

run = params['model']['run']
learning_rate = params['model']['lr']
covariate_columns = params['model']['covariates']
option_type = params['model']['option']

{'training': {'window_size': 21, 'batch_size': 32, 'patience': 15, 'epsilon': 1e-06, 'random_state': 42, 'epochs': 100, 'covariates': 'None'}, 'model': {'run': 'short_ttm', 'option': 'put', 'filters': 2, 'kernel_size': [2, 2], 'strides': 1, 'kernel_initializer': 'glorot_uniform', 'recurrent_initializer': 'orthogonal', 'optimizer': 'adam', 'lr': 0.001, 'covariates': ['VIX', 'VVIX', 'SKEW', 'RVOL']}, 'forecast': {'h_step': 1}}


In [3]:
# Let's reshape our input data of the thing... we are going to need labels, and we are going to need train surface.
# The labels will be, the smoothed IVs of our data
# The train will be the dimensions, with time x ttm x moneyness encoders
# If we have covariates, the channels will be larger? -> Yes, starting channels will be added to the layers

# Load the data first
if run == 'short_ttm':
    data_train = pd.read_csv('data/final/smoothed/data_train.csv')
    data_val = pd.read_csv('data/final/evaluation/validation_set.csv')
    data_test = pd.read_csv('data/final/evaluation/test_set.csv')

    if covariate_columns is not None:
        covar_df = pd.read_excel('data/final/covariates/covariates_train.xlsx')
        covar_df_val = pd.read_excel('data/final/covariates/covariates_validation.xlsx')

        covar_df = covar_df.rename(columns={'Date':'date'})
        covar_df_val = covar_df_val.rename(columns={'Date':'date'})
        covar_df = covar_df[['date'] + covariate_columns]
        covar_df_val = covar_df_val[['date'] + covariate_columns]

elif run == 'long_ttm':
    data_train = pd.read_csv('data/final/smoothed/data_train_long.csv')
    data_val = pd.read_csv('data/final/evaluation/validation_set_long.csv')
    data_test = pd.read_csv('data/final/evaluation/test_set_long.csv')

    if covariate_columns is not None:
        covar_df = pd.read_excel('data/final/covariates/covariates_train_long.xlsx')
        covar_df_val = pd.read_excel('data/final/covariates/covariates_validation_long.xlsx')

        covar_df = covar_df.rename(columns={'Date':'date'})
        covar_df_val = covar_df_val.rename(columns={'Date':'date'})
        covar_df = covar_df[['date'] + covariate_columns]
        covar_df_val = covar_df_val[['date'] + covariate_columns]
   
else:
    print('Select a dataset')


In [4]:
print(covar_df.isna().sum())
covar_df = covar_df.sort_values('date').ffill()
print(covar_df.isna().sum())

date    0
VIX     0
VVIX    1
SKEW    3
RVOL    2
dtype: int64
date    0
VIX     0
VVIX    0
SKEW    0
RVOL    0
dtype: int64


In [5]:
data_train['date'] = pd.to_datetime(data_train['date'])
data_val['date'] = pd.to_datetime(data_val['date'])
data_test['date'] = pd.to_datetime(data_test['date'])
data_train = pd.merge(data_train, covar_df, on='date', how='left')
data_val = pd.merge(data_val, covar_df, on='date', how='left')
data_test = pd.merge(data_test, covar_df, on='date', how='left')

In [6]:
pd.set_option('display.max_row', None)

In [7]:
def process(data, eval=False, option_type='both'):
    data = data.drop(columns="Unnamed: 0")

    if option_type =='put':
        data = data[data['cp_flag']=='P']
    elif option_type =='call':
        data = data[data['cp_flag']=='C']
    # Let's implement the thing, where deep OTM, OTM, ATM, ITM, deep ITM is a thing

    # we have to discriminate between calls and puts
    # Coding; deep OTM = 1, OTM =2, ATM =3, ITM = 4, deep ITM=5 
    # outliers, sort of?
    print(data.shape)
    data = data[data['moneyness'] >= 0.8]
    data = data[data['moneyness'] <= 1.6]
    print(data.shape)

    # Also consider what to do with low volume... probably include them and acknowledge them as a limitation

    data.loc[(data['cp_flag']=='C') & (data['moneyness'] <0.90), 'moneyness_enc'] = 1
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.97), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.97) & (data['moneyness'] < 1.03), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.03) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.10), 'moneyness_enc'] = 5

    data.loc[(data['cp_flag']=='P') & (data['moneyness'] <0.90), 'moneyness_enc'] = 5
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.97), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.97) & (data['moneyness'] < 1.03), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.03) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.10), 'moneyness_enc'] = 1

    # multiple values of the same moneyness and maturity encoding, if that's the case, we take the average
    if eval:
        # data_avg = data.groupby(['date', 'moneyness_enc', 'maturity'], as_index=False)['impl_volatility'].mean()
        # print(data.shape)
        # print(data.groupby(['date','maturity', 'moneyness_enc']).size())
        # data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
        # print(data.shape)
        # data = data.drop(columns='impl_volatility')

        # data = pd.merge(data, data_avg, on=['date','moneyness_enc', 'maturity'], how='left')
        print('eval')
    else:
        data_avg = data.groupby(['date', 'moneyness_enc', 'maturity'], as_index=False)['IV_smooth'].mean()
        print(data.shape)
        print(data.groupby(['date', 'maturity', 'moneyness_enc']).size())
        data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
        print(data.shape)
        data = data.drop(columns='IV_smooth')

        data = pd.merge(data, data_avg, on=['date','moneyness_enc', 'maturity'], how='left')

    # data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
    print(data.shape)
    return data

# data_train = process(data_train, False, option_type=option_type)
# data_val = process(data_val, eval=True, option_type=option_type)
# data_test = process(data_test, eval=True, option_type=option_type)

# Thing to fix: The moneyness encoded, results in multiple impl volatility values for the same moneyness, maturity
# combination. To fix this, take the average, and omit the others

In [8]:
def process(data, eval=False, option_type='both'):
    data = data.drop(columns="Unnamed: 0")

    if option_type =='put':
        data = data[data['cp_flag']=='P']
    elif option_type =='call':
        data = data[data['cp_flag']=='C']
    # Let's implement the thing, where deep OTM, OTM, ATM, ITM, deep ITM is a thing

    # we have to discriminate between calls and puts
    # Coding; deep OTM = 1, OTM =2, ATM =3, ITM = 4, deep ITM=5 
    # outliers, sort of?
    print(data.shape)
    data = data[data['moneyness'] >= 0.8]
    data = data[data['moneyness'] <= 1.6]
    print(data.shape)

    # Also consider what to do with low volume... probably include them and acknowledge them as a limitation

    data.loc[(data['cp_flag']=='C') & (data['moneyness'] <0.85), 'moneyness_enc'] = 1
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.85) & (data['moneyness'] < 0.90), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.95), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.95) & (data['moneyness'] < 1.00), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.00) & (data['moneyness'] < 1.05), 'moneyness_enc'] = 5
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.05) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 6
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.10) & (data['moneyness'] < 1.15), 'moneyness_enc'] = 7
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.15), 'moneyness_enc'] = 8

    data.loc[(data['cp_flag']=='P') & (data['moneyness'] <0.85), 'moneyness_enc'] = 8
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.85) & (data['moneyness'] < 0.90), 'moneyness_enc'] = 7
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.95), 'moneyness_enc'] = 6
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.95) & (data['moneyness'] < 1.00), 'moneyness_enc'] = 5
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.00) & (data['moneyness'] < 1.05), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.05) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.10) & (data['moneyness'] < 1.15), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.15), 'moneyness_enc'] = 1
    # multiple values of the same moneyness and maturity encoding, if that's the case, we take the average
    if eval:
        # data_avg = data.groupby(['date', 'moneyness_enc', 'maturity'], as_index=False)['impl_volatility'].mean()
        # print(data.shape)
        # print(data.groupby(['date','maturity', 'moneyness_enc']).size())
        # data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
        # print(data.shape)
        # data = data.drop(columns='impl_volatility')

        # data = pd.merge(data, data_avg, on=['date','moneyness_enc', 'maturity'], how='left')
        print('eval')
    else:
        data_avg = data.groupby(['date', 'moneyness_enc', 'maturity'], as_index=False)['IV_smooth'].mean()
        print(data.shape)
        print(data.groupby(['date', 'maturity', 'moneyness_enc']).size())
        data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
        print(data.shape)
        data = data.drop(columns='IV_smooth')

        data = pd.merge(data, data_avg, on=['date','moneyness_enc', 'maturity'], how='left')

    # data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
    print(data.shape)
    return data

data_train = process(data_train, False, option_type=option_type)
data_val = process(data_val, eval=True, option_type=option_type)
data_test = process(data_test, eval=True, option_type=option_type)

# Thing to fix: The moneyness encoded, results in multiple impl volatility values for the same moneyness, maturity
# combination. To fix this, take the average, and omit the others

(356419, 33)
(355364, 33)
(355364, 34)
date        maturity  moneyness_enc
2012-01-03  3         3.0                5
                      4.0               12
                      5.0                9
2012-01-04  2         3.0                2
                      4.0               12
                      5.0                4
2012-01-05  1         4.0               11
                      5.0                5
2012-01-06  5         2.0                3
                      3.0               11
                      4.0               12
                      5.0                7
2012-01-09  4         3.0                4
                      4.0               13
                      5.0                6
2012-01-10  3         3.0                5
                      4.0               12
                      5.0                9
                      6.0                1
2012-01-11  2         3.0                1
                      4.0               12
                      

In [9]:
def frame_to_numpy(data, covariate_cols=None, eval=False):
    
    data['time_step'] = data['date']
    time_step_index = pd.to_datetime(data['time_step']).dt.strftime('%Y-%m-%d').unique()
    date_to_index = {date: idx for idx, date in enumerate(time_step_index)}

    data['time_step_str'] = data['time_step'].dt.strftime('%Y-%m-%d')
    data['time_step_idx'] = data['time_step_str'].map(date_to_index)
    #data['time_step_idx'] = data['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])

    maturity_values = np.sort(data['maturity'].unique())
    maturity_to_idx = {mat: i for i, mat in enumerate(maturity_values)}

    time_steps = len(time_step_index)
    money_dim = len(data['moneyness_enc'].unique())
    ttm_dim = len(maturity_values)

    # Base IV tensor
    IV_array = np.zeros((time_steps, money_dim, ttm_dim, 1))
    cov_array = np.zeros((time_steps, len(covariate_cols)))

    for idx, row in data.iterrows():
        time_step_idx = row['time_step_idx']
        height = int(row['moneyness_enc']) - 1 
        width = maturity_to_idx[row['maturity']]
        value = row['IV_smooth'] if not eval else row['impl_volatility']
        IV_array[time_step_idx, height, width, 0] = value

        for i, cov in enumerate(covariate_cols):
            cov_array[time_step_idx,i] = row[cov]

    return IV_array, cov_array


In [10]:
IV_train, cov_train = frame_to_numpy(data_train, covariate_columns)
IV_val, cov_val = frame_to_numpy(data_val, covariate_columns, eval=True)
IV_test, cov_test = frame_to_numpy(data_test, covariate_columns, eval=True)

In [11]:
def create_rolling_window_dataset(iv_array, cov_array, window_size):

    T = iv_array.shape[0]
    N = T - window_size

    x_iv = np.zeros((N, window_size, *iv_array.shape[1:]))        # (N, window_size, H, W, 1)
    x_cov = np.zeros((N, window_size, cov_array.shape[1]))        # (N, window_size, C)
    y = np.zeros((N, *iv_array.shape[1:]))                   # (N, H, W, 1)

    for i in range(N):
        x_iv[i] = iv_array[i:i+window_size]
        x_cov[i] = cov_array[i:i+window_size]
        y[i] = iv_array[i+window_size]

    return x_iv, x_cov, y
x_iv_train, x_cov_train, target_train = create_rolling_window_dataset(IV_train, cov_train, window_size)

IV_val_input = np.concatenate((IV_train[-window_size:], IV_val), axis=0)
cov_val_input = np.concatenate((cov_train[-window_size:], cov_val), axis=0)
x_iv_val, x_cov_val, target_val = create_rolling_window_dataset(IV_val_input, cov_val_input, window_size)

IV_test_input = np.concatenate((IV_val[-window_size:], IV_test), axis=0)
cov_test_input = np.concatenate((cov_val[-window_size:], cov_test))
x_iv_test, x_cov_test, target_test = create_rolling_window_dataset(IV_test_input, cov_test_input, window_size)




In [12]:
time_steps = window_size
height = len(data_train['moneyness_enc'].unique())
width = len(data_train['maturity'].unique())
num_covariates = len(covariate_columns)

In [13]:
from tensorflow.keras.layers import Input

iv_input = Input(shape=(window_size, height, width, 1), name="iv_input")
cov_input = Input(shape=(window_size, num_covariates), name="cov_input")


In [14]:
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Dropout, Add

def transformer_block(inputs, num_heads=4, ff_dim=64, dropout=0.1):
    attention = MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(inputs, inputs)
    attention = Dropout(dropout)(attention)
    out1 = LayerNormalization(epsilon=1e-6)(Add()([inputs, attention]))

    ffn = Dense(ff_dim, activation="relu")(out1)
    ffn = Dense(inputs.shape[-1])(ffn)
    ffn = Dropout(dropout)(ffn)
    return LayerNormalization(epsilon=1e-6)(Add()([out1, ffn]))


In [15]:
from tensorflow.keras.layers import TimeDistributed

x_cov_transformed = transformer_block(cov_input)
x_cov_flat = Dense(128, activation='relu')(x_cov_transformed[:, -1])  # Use last timestep


In [16]:
from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization

x_iv = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=False)(iv_input)
x_iv = BatchNormalization()(x_iv)


In [17]:
from tensorflow.keras.layers import Reshape, Concatenate, RepeatVector, Permute

x_cov_reshaped = Dense(height * width * 1)(x_cov_flat)
x_cov_reshaped = Reshape((height, width, 1))(x_cov_reshaped)

x = Concatenate()([x_iv, x_cov_reshaped])

x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
output = Conv2D(1, (1,1), activation='linear', padding='same')(x)


In [18]:
from tensorflow.keras.models import Model

model = Model(inputs=[iv_input, cov_input], outputs=output)
model.compile(optimizer='adam', loss='mse')


In [19]:
IV_train[window_size:].shape

(2385, 8, 5, 1)

In [20]:
model.fit([x_iv_train, x_cov_train], target_train,
          validation_data=([x_iv_val, x_cov_val], target_val),
          epochs=epochs, batch_size=batch_size, 
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


<keras.callbacks.History at 0x1daa97ffa30>

In [21]:
# Next step; functions for the IVRMSE and R_oos!!!!!
# H-step ahead performance!!!!!!
# WRITE IT ALL TO RESULTS!!!!!!
# Lower learning rate, beneficial, or leave it? Probably leave it
# All the HYPERPARAMETERS!!!!!! kernel strides, window sizes, parameters, layers, ALL OF THEM
# COVARIATES!!!!!!, Do all the hyperparameters again, and then only save the TEST PERFORMANCE
# MODEL ARCHITECTURE!!!!
# Investigate what happens, if you leave it as 0... We cannot do the interpolation properly, to be frank.

#THEN LONG TERM !!!!!!
# Transformer models!!!!!!!!!!!!!!!!
pred_val = model.predict([x_iv_val, x_cov_val])
pred_test = model.predict([x_iv_test, x_cov_test])



In [22]:
# Put it in the compile, but also call it afterward
# double check formula and also make it for R_oos
# Should also work per h-step ahead 
def calculate_ivrmse(y_true, y_pred, all_points=False):
    if not all_points:
        ivrmse = tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
    else:
        sq_error = tf.square(y_true - y_pred)
        error_surface = tf.reduce_mean(sq_error, axis=[1 , 2])
        ivrmse = tf.sqrt(error_surface)

    return ivrmse.numpy()

# model.compile(optimizer='adam', loss='mse', metrics=[ivrmse_metric])


In [23]:
print(calculate_ivrmse(IV_val, pred_val))
print(calculate_ivrmse(IV_val, pred_val, all_points=False))
# The loss is less, than in the optimization.. probably a different metric, or formula than mse
# check values of the papers that are like yours

0.1785035872144709
0.1785035872144709


In [24]:
def calculate_r_oos(y_true, y_pred, all_points=False):
    if not all_points:
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
        mean_IV = tf.reduce_mean(y_true, axis=[1, 2], keepdims=True) # should be shape of 114 long
        # print(mean_IV[0], mean_IV[1], mean_IV[2])
        # print(y_true[0], y_true[1], y_true[2])
        ss_tot = tf.reduce_sum(tf.square(y_true - mean_IV))
        # print((y_true - mean_IV)[0])
        r2 = 1 - ss_res/ss_tot
    else:
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=[1, 2])
        mean_IV = tf.reduce_mean(y_true, axis=[1, 2], keepdims=True)
        ss_tot = tf.reduce_sum(tf.square(y_true - mean_IV), axis=[1, 2])
        r2 = 1 - ss_res/ss_tot
    return r2.numpy()

print(calculate_r_oos(IV_val, pred_val))

0.2844973292965991


In [25]:
# Train model again on BOTH train and validation, and then investigate the TEST PERFORMANCE!!

In [26]:
print(calculate_ivrmse(IV_test, pred_test))
print(calculate_r_oos(IV_test, pred_test))

0.2684094861313546
-0.1623549586771389


In [27]:
def get_results(y_real, y_pred):
    ivrmse = calculate_ivrmse(y_real, y_pred)
    ivrmse_h = calculate_ivrmse(y_real, y_pred, all_points=True)
    r_oos = calculate_r_oos(y_real, y_pred)
    r_oos_h = calculate_r_oos(y_real, y_pred, all_points=True)

    return ivrmse, ivrmse_h, r_oos, r_oos_h

In [28]:
def write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, surface, surface_pred, covariate_columns, option_type):

    ivrmse_path = folder_path / Path("ivrmse")
    r_oos_path = folder_path / Path("r_oos")
    ivrmse_h_path = folder_path / Path("ivrmse_h")
    r_oos_h_path = folder_path / Path("r_oos_h")
    surface_path = folder_path / Path("surface")
    surface_pred_path = folder_path / Path("surface_pred")

    if not ivrmse_path.exists():
        ivrmse_path.mkdir(parents=True, exist_ok=True)

    if not r_oos_path.exists():
        r_oos_path.mkdir(parents=True, exist_ok=True)

    if not ivrmse_h_path.exists():
        ivrmse_h_path.mkdir(parents=True, exist_ok=True)

    if not r_oos_h_path.exists():
        r_oos_h_path.mkdir(parents=True, exist_ok=True)

    if not surface_path.exists():
        surface_path.mkdir(parents=True, exist_ok=True)

    if not surface_pred_path.exists():
        surface_pred_path.mkdir(parents=True, exist_ok=True)

    cov = ""
    for i in covariate_columns:
        cov = cov+ "_" +i
    np.save(ivrmse_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", ivrmse)
    np.save(r_oos_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", r_oos)
    np.save(ivrmse_h_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", ivrmse_h)
    np.save(r_oos_h_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", r_oos_h)
    np.save(surface_path/ f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", surface)
    np.save(surface_pred_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", surface_pred)

In [29]:
folder_path = Path(f"results/test_{run}")
ivrmse, ivrmse_h, r_oos, r_oos_h = get_results(IV_test, pred_test)
write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, IV_test, pred_test, covariate_columns, option_type)

In [30]:
print(r_oos_h)

[[-3.52141013e-01]
 [-8.92296636e-02]
 [-7.46177134e-02]
 [ 4.02201342e-01]
 [ 4.83886297e-01]
 [ 3.40993492e-01]
 [-7.24503267e-01]
 [-1.70409985e+00]
 [ 4.33097346e-01]
 [-6.83335190e-02]
 [-1.15640829e-01]
 [-8.07673070e-01]
 [ 2.27183467e-02]
 [-3.90266849e-01]
 [-4.66023562e-02]
 [-9.49493843e-01]
 [-2.26711713e-01]
 [ 4.26626198e-01]
 [-6.42511284e-01]
 [-1.45136471e+00]
 [-5.53537819e-01]
 [-2.72180338e+00]
 [ 3.98433839e-01]
 [-5.38769727e-01]
 [-2.35298840e-01]
 [-1.59989779e+00]
 [ 7.89004183e-02]
 [-1.13815324e-01]
 [-1.33773291e-01]
 [ 1.10512139e-01]
 [ 5.74582009e-01]
 [-1.17796486e+00]
 [-1.90946864e-01]
 [-4.64464065e-01]
 [-1.20316919e+00]
 [ 2.12705735e-01]
 [ 1.87825821e-01]
 [ 1.41524035e-02]
 [-7.44042983e-01]
 [ 3.21760759e-01]
 [-2.42505054e-01]
 [ 1.31491019e-01]
 [-1.20005648e-01]
 [-1.20339463e-01]
 [-4.54215018e-01]
 [-8.39298607e-01]
 [-4.71053916e-02]
 [-4.49965993e-01]
 [ 4.17599690e-01]
 [-1.13684240e-01]
 [ 2.98745573e-01]
 [-4.16278340e-02]
 [-6.7161967

In [31]:
folder_path = Path(f"results/validation_{run}")
ivrmse, ivrmse_h, r_oos, r_oos_h = get_results(IV_val, pred_val)
write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, IV_test, pred_test, covariate_columns, option_type)

In [32]:
# For the h-step ahead forecast, an autoregressive approach is used
# Predict the one step ahead forecast (done now)
# After, use this one step ahead forecast in the predict command. 

In [33]:
print(pred_test.shape)

(194, 8, 5, 1)
