In [22]:
import tensorflow as tf
from tensorflow import keras
from keras import models, layers
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import ConvLSTM2D, LSTM, BatchNormalization, Conv3D, Conv2D,Input, Dense, Reshape, Concatenate
import yaml
from scipy.interpolate import bisplrep, bisplev, griddata, interp1d

In [23]:
with open('configs/config_file_covs.yaml') as file:
    params = yaml.safe_load(file)

print(params)

window_size = params['training']['window_size']
h_step = params['forecast']['h_step']
patience = params['training']['patience']
epsilon = params['training']['epsilon']
batch_size = params['training']['batch_size']
epochs = params['training']['epochs']

run = params['model']['run']
learning_rate = params['model']['lr']
covariate_columns = params['model']['covariates']
option_type = params['model']['option']

{'training': {'window_size': 21, 'batch_size': 32, 'patience': 15, 'epsilon': 1e-06, 'random_state': 42, 'epochs': 100, 'covariates': 'None'}, 'model': {'run': 'short_ttm', 'option': 'put', 'filters': 2, 'kernel_size': [2, 2], 'strides': 1, 'kernel_initializer': 'glorot_uniform', 'recurrent_initializer': 'orthogonal', 'optimizer': 'adam', 'lr': 0.001, 'covariates': ['VIX', 'VVIX', 'SKEW', 'RVOL', 'TMS', 'CRS', 'EPU', 'ADS']}, 'forecast': {'h_step': 1}}


In [24]:
# Let's reshape our input data of the thing... we are going to need labels, and we are going to need train surface.
# The labels will be, the smoothed IVs of our data
# The train will be the dimensions, with time x ttm x moneyness encoders
# If we have covariates, the channels will be larger? -> Yes, starting channels will be added to the layers

# Load the data first
if run == 'short_ttm':
    data_train = pd.read_csv('data/final/smoothed/data_train.csv')
    data_val = pd.read_csv('data/final/evaluation/validation_set.csv')
    data_test = pd.read_csv('data/final/evaluation/test_set.csv')

    if covariate_columns is not None:
        covar_df = pd.read_excel('data/final/covariates/covariates_train.xlsx')
        covar_df_val = pd.read_excel('data/final/covariates/covariates_validation.xlsx')

        covar_df = covar_df.rename(columns={'Date':'date'})
        covar_df_val = covar_df_val.rename(columns={'Date':'date'})
        covar_df = covar_df[['date'] + covariate_columns]
        covar_df_val = covar_df_val[['date'] + covariate_columns]

elif run == 'long_ttm':
    data_train = pd.read_csv('data/final/smoothed/data_train_long.csv')
    data_val = pd.read_csv('data/final/evaluation/validation_set_long.csv')
    data_test = pd.read_csv('data/final/evaluation/test_set_long.csv')

    if covariate_columns is not None:
        covar_df = pd.read_excel('data/final/covariates/covariates_train_long.xlsx')
        covar_df_val = pd.read_excel('data/final/covariates/covariates_validation_long.xlsx')

        covar_df = covar_df.rename(columns={'Date':'date'})
        covar_df_val = covar_df_val.rename(columns={'Date':'date'})
        covar_df = covar_df[['date'] + covariate_columns]
        covar_df_val = covar_df_val[['date'] + covariate_columns]
   
else:
    print('Select a dataset')


In [25]:
moneyness_grid = np.round(np.arange(0.80, 1.21, 0.05), 2)

def interpolate_moneyness_per_maturity(df_day):
    """
    Perform 1D interpolation on moneyness per maturity level for a single day.
    """
    date = df_day['date'].iloc[0]
    results = []

    for maturity in sorted(df_day['maturity'].unique()):
        df_maturity = df_day[df_day['maturity'] == maturity]
        x = df_maturity['moneyness'].values
        y = df_maturity['impl_volatility'].values

        if len(np.unique(x)) < 2:
            # Not enough points to interpolate; skip this maturity
            continue

        try:
            # You can use kind='linear', 'quadratic', 'cubic' as you wish
            f_interp = interp1d(x, y, kind='cubic', bounds_error=False, fill_value="extrapolate")
            ivs_interp = f_interp(moneyness_grid)

            df_interp = pd.DataFrame({
                'date': date,
                'maturity': maturity,
                'moneyness': moneyness_grid,
                'impl_volatility': ivs_interp
            })
            results.append(df_interp)
        except Exception as e:
            print(f"Interpolation error at maturity={maturity}, date={date}: {e}")

    if results:
        return pd.concat(results, ignore_index=True)
    else:
        return pd.DataFrame(columns=['datetime', 'maturity', 'moneyness', 'impl_volatility'])

data_train['date'] = pd.to_datetime(data_train['date'])
data_val['date'] = pd.to_datetime(data_val['date'])
data_test['date'] = pd.to_datetime(data_test['date'])
ivs_interp_train = pd.concat([
    interpolate_moneyness_per_maturity(day)
    for _, day in data_train.groupby('date')
], ignore_index=True)


Interpolation error at maturity=3, date=2012-01-03 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=2, date=2012-01-04 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=1, date=2012-01-05 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=5, date=2012-01-06 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=4, date=2012-01-09 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=3, date=2012-01-10 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=2, date=2012-01-11 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=1, date=2012-01-12 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=3, date=2012-01-17 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=2, date=2012-01-18 00:00:00: Expect x to not have duplicates
Interpolation error at maturity=5, date=2012-01-20 00:00:00: Expect x 

In [None]:

data_train = pd.merge(data_train, covar_df, on='date', how='left')
data_val = pd.merge(data_val, covar_df, on='date', how='left')
data_test = pd.merge(data_test, covar_df, on='date', how='left')

In [20]:
for i in data_train['date'].unique():
    print(i, (data_train[data_train['date']==i]['maturity'].unique()))

2012-01-03 00:00:00 [3]
2012-01-04 00:00:00 [2]
2012-01-05 00:00:00 [1]
2012-01-06 00:00:00 [5]
2012-01-09 00:00:00 [4]
2012-01-10 00:00:00 [3]
2012-01-11 00:00:00 [2]
2012-01-12 00:00:00 [1]
2012-01-17 00:00:00 [3]
2012-01-18 00:00:00 [2]
2012-01-20 00:00:00 [5]
2012-01-23 00:00:00 [4]
2012-01-24 00:00:00 [3]
2012-01-25 00:00:00 [2]
2012-01-26 00:00:00 [1]
2012-01-27 00:00:00 [5]
2012-01-30 00:00:00 [4]
2012-01-31 00:00:00 [3]
2012-02-01 00:00:00 [2]
2012-02-02 00:00:00 [1]
2012-02-03 00:00:00 [5]
2012-02-06 00:00:00 [4]
2012-02-07 00:00:00 [3]
2012-02-08 00:00:00 [2]
2012-02-09 00:00:00 [1]
2012-02-13 00:00:00 [4]
2012-02-14 00:00:00 [3]
2012-02-15 00:00:00 [2]
2012-02-17 00:00:00 [5]
2012-02-21 00:00:00 [3]
2012-02-22 00:00:00 [2]
2012-02-23 00:00:00 [1]
2012-02-24 00:00:00 [5]
2012-02-27 00:00:00 [4]
2012-02-28 00:00:00 [3]
2012-02-29 00:00:00 [2]
2012-03-01 00:00:00 [1]
2012-03-02 00:00:00 [5]
2012-03-05 00:00:00 [4]
2012-03-06 00:00:00 [3]
2012-03-07 00:00:00 [2]
2012-03-08 00:00

In [18]:
# Sample setup
maturity_grid = np.array([1, 2, 3, 4, 5])  # in days
moneyness_grid = np.round(np.arange(0.80, 1.21, 0.05), 2)
maturity_grid_mesh, moneyness_grid_mesh = np.meshgrid(maturity_grid, moneyness_grid)

# Flattened grid coordinates for interpolation
interp_points = np.column_stack([
    maturity_grid_mesh.ravel(),
    moneyness_grid_mesh.ravel()
])

# A helper function to interpolate for a single date
def interpolate_single_day(df_day):
    # Get raw inputs
    points = df_day[['maturity', 'moneyness']].values
    values = df_day['impl_volatility'].values

    # Interpolate using 'linear' method
    interpolated = griddata(points, values, interp_points, method='linear')

    # Construct a DataFrame with the same shape
    interpolated_df = pd.DataFrame({
        'date': df_day['date'].iloc[0],  # assuming there's a datetime column
        'maturity': interp_points[:, 0],
        'moneyness': interp_points[:, 1],
        'impl_volatility': interpolated
    })

    return interpolated_df.dropna()  # drop grid points that couldn't be interpolated

# Apply over time
def interpolate_full_surface(df):
    grouped = df.groupby('date')  # or 'date' if you’ve pre-processed to daily
    interpolated_list = [interpolate_single_day(group) for _, group in grouped]
    return pd.concat(interpolated_list, ignore_index=True)

# Example usage:
ivs_train_interp = interpolate_full_surface(data_train)
ivs_val_interp = interpolate_full_surface(data_val)
ivs_test_interp = interpolate_full_surface(data_test)


QhullError: QH6013 qhull input error: input is less than 3-dimensional since all points have the same x coordinate    3

While executing:  | qhull d Qc Qt Qbb Qz Q12
Options selected for Qhull 2019.1.r 2019/06/21:
  run-id 231245261  delaunay  Qcoplanar-keep  Qtriangulate  Qbbound-last
  Qz-infinity-point  Q12-allow-wide  _pre-merge  _zero-centrum  Qinterior-keep
  Pgood  _max-width 0.11  Error-roundoff 4.2e-15  _one-merge 2.9e-14
  Visible-distance 8.3e-15  U-max-coplanar 8.3e-15  Width-outside 1.7e-14
  _wide-facet 5e-14  _maxoutside 3.3e-14


In [6]:
pd.set_option('display.max_row', None)

In [None]:
def process(data, eval=False, option_type='both'):
    data = data.drop(columns="Unnamed: 0")

    if option_type =='put':
        data = data[data['cp_flag']=='P']
    elif option_type =='call':
        data = data[data['cp_flag']=='C']
    # Let's implement the thing, where deep OTM, OTM, ATM, ITM, deep ITM is a thing

    # we have to discriminate between calls and puts
    # Coding; deep OTM = 1, OTM =2, ATM =3, ITM = 4, deep ITM=5 
    # outliers, sort of?
    print(data.shape)
    data = data[data['moneyness'] >= 0.8]
    data = data[data['moneyness'] <= 1.6]
    print(data.shape)

    # Also consider what to do with low volume... probably include them and acknowledge them as a limitation

    data.loc[(data['cp_flag']=='C') & (data['moneyness'] <0.90), 'moneyness_enc'] = 1
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.97), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.97) & (data['moneyness'] < 1.03), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.03) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.10), 'moneyness_enc'] = 5

    data.loc[(data['cp_flag']=='P') & (data['moneyness'] <0.90), 'moneyness_enc'] = 5
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.97), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.97) & (data['moneyness'] < 1.03), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.03) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.10), 'moneyness_enc'] = 1

    # multiple values of the same moneyness and maturity encoding, if that's the case, we take the average
    if eval:
        data_avg = data.groupby(['date', 'moneyness_enc', 'maturity'], as_index=False)['impl_volatility'].mean()
        print(data.shape)
        print(data.groupby(['date','maturity', 'moneyness_enc']).size())
        data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
        print(data.shape)
        data = data.drop(columns='impl_volatility')

        data = pd.merge(data, data_avg, on=['date','moneyness_enc', 'maturity'], how='left')
        print('eval')
    else:
        data_avg = data.groupby(['date', 'moneyness_enc', 'maturity'], as_index=False)['IV_smooth'].mean()
        print(data.shape)
        print(data.groupby(['date', 'maturity', 'moneyness_enc']).size())
        data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
        print(data.shape)
        data = data.drop(columns='IV_smooth')

        data = pd.merge(data, data_avg, on=['date','moneyness_enc', 'maturity'], how='left')

    # data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
    print(data.shape)
    return data

# data_train = process(data_train, False, option_type=option_type)
# data_val = process(data_val, eval=True, option_type=option_type)
# data_test = process(data_test, eval=True, option_type=option_type)

# Thing to fix: The moneyness encoded, results in multiple impl volatility values for the same moneyness, maturity
# combination. To fix this, take the average, and omit the others

In [8]:
def process(data, eval=False, option_type='both'):
    data = data.drop(columns="Unnamed: 0")

    if option_type =='put':
        data = data[data['cp_flag']=='P']
    elif option_type =='call':
        data = data[data['cp_flag']=='C']
    # Let's implement the thing, where deep OTM, OTM, ATM, ITM, deep ITM is a thing

    # we have to discriminate between calls and puts
    # Coding; deep OTM = 1, OTM =2, ATM =3, ITM = 4, deep ITM=5 
    # outliers, sort of?
    print(data.shape)
    data = data[data['moneyness'] >= 0.8]
    data = data[data['moneyness'] <= 1.6]
    print(data.shape)

    # Also consider what to do with low volume... probably include them and acknowledge them as a limitation

    data.loc[(data['cp_flag']=='C') & (data['moneyness'] <0.85), 'moneyness_enc'] = 1
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.85) & (data['moneyness'] < 0.90), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.95), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=0.95) & (data['moneyness'] < 1.00), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.00) & (data['moneyness'] < 1.05), 'moneyness_enc'] = 5
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.05) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 6
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.10) & (data['moneyness'] < 1.15), 'moneyness_enc'] = 7
    data.loc[(data['cp_flag']=='C') & (data['moneyness'] >=1.15), 'moneyness_enc'] = 8

    data.loc[(data['cp_flag']=='P') & (data['moneyness'] <0.85), 'moneyness_enc'] = 8
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.85) & (data['moneyness'] < 0.90), 'moneyness_enc'] = 7
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.90) & (data['moneyness'] < 0.95), 'moneyness_enc'] = 6
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=0.95) & (data['moneyness'] < 1.00), 'moneyness_enc'] = 5
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.00) & (data['moneyness'] < 1.05), 'moneyness_enc'] = 4
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.05) & (data['moneyness'] < 1.10), 'moneyness_enc'] = 3
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.10) & (data['moneyness'] < 1.15), 'moneyness_enc'] = 2
    data.loc[(data['cp_flag']=='P') & (data['moneyness'] >=1.15), 'moneyness_enc'] = 1
    # multiple values of the same moneyness and maturity encoding, if that's the case, we take the average
    if eval:
        # data_avg = data.groupby(['date', 'moneyness_enc', 'maturity'], as_index=False)['impl_volatility'].mean()
        # print(data.shape)
        # print(data.groupby(['date','maturity', 'moneyness_enc']).size())
        # data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
        # print(data.shape)
        # data = data.drop(columns='impl_volatility')

        # data = pd.merge(data, data_avg, on=['date','moneyness_enc', 'maturity'], how='left')
        print('eval')
    else:
        data_avg = data.groupby(['date', 'moneyness_enc', 'maturity'], as_index=False)['IV_smooth'].mean()
        print(data.shape)
        print(data.groupby(['date', 'maturity', 'moneyness_enc']).size())
        data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
        print(data.shape)
        data = data.drop(columns='IV_smooth')

        data = pd.merge(data, data_avg, on=['date','moneyness_enc', 'maturity'], how='left')

    # data = data.drop_duplicates(subset=['date', 'moneyness_enc', 'maturity'])
    print(data.shape)
    return data

data_train = process(data_train, False, option_type=option_type)
data_val = process(data_val, eval=True, option_type=option_type)
data_test = process(data_test, eval=True, option_type=option_type)

# Thing to fix: The moneyness encoded, results in multiple impl volatility values for the same moneyness, maturity
# combination. To fix this, take the average, and omit the others

(356419, 37)
(355364, 37)
(355364, 38)
date        maturity  moneyness_enc
2012-01-03  3         3.0                5
                      4.0               12
                      5.0                9
2012-01-04  2         3.0                2
                      4.0               12
                      5.0                4
2012-01-05  1         4.0               11
                      5.0                5
2012-01-06  5         2.0                3
                      3.0               11
                      4.0               12
                      5.0                7
2012-01-09  4         3.0                4
                      4.0               13
                      5.0                6
2012-01-10  3         3.0                5
                      4.0               12
                      5.0                9
                      6.0                1
2012-01-11  2         3.0                1
                      4.0               12
                      

In [9]:
# def frame_to_numpy(data, eval=False):
#     # Convert 'time_step' to datetime
#     data['time_step'] = pd.to_datetime(data['date'])

#     # Create a time_step index (e.g., from the first unique date)
#     time_step_index = pd.to_datetime(data['time_step']).dt.strftime('%Y-%m-%d').unique()

#     # Map time_step dates to integer index
#     data['time_step_idx'] = data['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])
#     print(data['time_step_idx'])

#     maturity_values = np.sort(data['maturity'].unique())
#     maturity_to_idx = {mat: i for i, mat in enumerate(maturity_values)}

#     time_steps = len(time_step_index)
#     money_dim = len(data['moneyness_enc'].unique())
#     ttm_dim = len(data['maturity'].unique())

#     # Create an empty numpy array with the shape (time_steps, height_dim, width_dim)
#     IV_array = np.zeros((time_steps, money_dim, ttm_dim, ), dtype=np.float32)

#     # Populate the numpy array with values from the DataFrame
#     for idx, row in data.iterrows():
#         time_step_idx = row['time_step_idx']
#         height = int(row['moneyness_enc']) - 1 
#         width = maturity_to_idx[row['maturity']]
        
#         if eval==False:
#             value = row['IV_smooth']
#         else:
#             value = row['impl_volatility']
            
#         # print(time_step_idx, height, width, value)
#         # Assign the value to the corresponding position in the numpy array
#         IV_array[time_step_idx, height, width] = value
        
#     IV_array = IV_array.reshape((IV_array.shape[0], money_dim, ttm_dim, 1))
#     return IV_array

In [10]:
# def frame_to_numpy(data, covariate_cols=None, eval=False):
    
#     data['time_step'] = pd.to_datetime(data['date'])
#     time_step_index = pd.to_datetime(data['time_step']).dt.strftime('%Y-%m-%d').unique()
#     data['time_step_idx'] = data['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])

#     maturity_values = np.sort(data['maturity'].unique())
#     maturity_to_idx = {mat: i for i, mat in enumerate(maturity_values)}

#     time_steps = len(time_step_index)
#     money_dim = len(data['moneyness_enc'].unique())
#     ttm_dim = len(maturity_values)

#     # Base IV tensor
#     IV_array = np.zeros((time_steps, money_dim, ttm_dim))

#     # If covariates provided, create tensor to hold them
#     covariate_arrays = {}
#     if covariate_cols:
#         for cov in covariate_cols:
#             covariate_arrays[cov] = np.zeros((time_steps, money_dim, ttm_dim), dtype=np.float32)

#     for idx, row in data.iterrows():
#         time_step_idx = row['time_step_idx']
#         height = int(row['moneyness_enc']) - 1 
#         width = maturity_to_idx[row['maturity']]
#         value = row['IV_smooth'] if not eval else row['impl_volatility']
#         IV_array[time_step_idx, height, width] = value

#         # Also fill in covariates
#         if covariate_cols:
#             for cov in covariate_cols:
#                 covariate_arrays[cov][time_step_idx, height, width] = row[cov]

#     # Reshape and concatenate
#     IV_array = IV_array.reshape((time_steps, money_dim, ttm_dim, 1))

#     if covariate_cols:
#         covariate_stack = [arr.reshape((time_steps, money_dim, ttm_dim, 1)) for arr in covariate_arrays.values()]
#         covariate_stack = np.concatenate(covariate_stack, axis=-1)  # shape: (T, H, W, C)
#         IV_array = np.concatenate([IV_array, covariate_stack], axis=-1)  # final shape: (T, H, W, 1+C)

#     return IV_array


In [11]:
pd.set_option('display.max_row', 10)
print(data_train)

            date               symbol      exdate   last_date cp_flag  \
0     2012-01-03  SPXW 120106P1195000  2012-01-06  03/01/2012       P   
1     2012-01-03  SPXW 120106P1220000  2012-01-06  03/01/2012       P   
2     2012-01-03  SPXW 120106P1280000  2012-01-06  03/01/2012       P   
3     2012-01-04  SPXW 120106P1210000  2012-01-06  04/01/2012       P   
4     2012-01-04  SPXW 120106P1220000  2012-01-06  04/01/2012       P   
...          ...                  ...         ...         ...     ...   
21257 2021-12-06  SPXW 211213P4000000  2021-12-13  06/12/2021       P   
21258 2021-12-06  SPXW 211213P4175000  2021-12-13  06/12/2021       P   
21259 2021-12-06  SPXW 211213P4375000  2021-12-13  06/12/2021       P   
21260 2021-12-06  SPXW 211213P4595000  2021-12-13  06/12/2021       P   
21261 2021-12-06  SPXW 211213P4840000  2021-12-13  06/12/2021       P   

       strike_price  best_bid  best_offer  volume  open_interest  ...  \
0              1195      0.10        0.20     479 

In [12]:
def frame_to_numpy(data, covariate_cols=None, eval=False):
    
    data['time_step'] = data['date']
    time_step_index = pd.to_datetime(data['time_step']).dt.strftime('%Y-%m-%d').unique()
    date_to_index = {date: idx for idx, date in enumerate(time_step_index)}

    data['time_step_str'] = data['time_step'].dt.strftime('%Y-%m-%d')
    data['time_step_idx'] = data['time_step_str'].map(date_to_index)
    #data['time_step_idx'] = data['time_step'].apply(lambda x: np.where(time_step_index == x.strftime('%Y-%m-%d'))[0][0])

    maturity_values = np.sort(data['maturity'].unique())
    maturity_to_idx = {mat: i for i, mat in enumerate(maturity_values)}

    time_steps = len(time_step_index)
    money_dim = len(data['moneyness_enc'].unique())
    ttm_dim = len(maturity_values)

    # Base IV tensor
    IV_array = np.zeros((time_steps, money_dim, ttm_dim, 1))
    cov_array = np.zeros((time_steps, len(covariate_cols)))

    for idx, row in data.iterrows():
        time_step_idx = row['time_step_idx']
        height = int(row['moneyness_enc']) - 1 
        width = maturity_to_idx[row['maturity']]
        value = row['IV_smooth'] if not eval else row['impl_volatility']
        IV_array[time_step_idx, height, width, 0] = value

        for i, cov in enumerate(covariate_cols):
            cov_array[time_step_idx,i] = row[cov]

    return IV_array, cov_array


In [13]:
IV_train, cov_train = frame_to_numpy(data_train, covariate_columns)
IV_val, cov_val = frame_to_numpy(data_val, covariate_columns, eval=True)
IV_test, cov_test = frame_to_numpy(data_test, covariate_columns, eval=True)

In [14]:
def create_rolling_window_dataset(iv_array, cov_array, window_size):

    T = iv_array.shape[0]
    N = T - window_size

    x_iv = np.zeros((N, window_size, *iv_array.shape[1:]))        # (N, window_size, H, W, 1)
    x_cov = np.zeros((N, window_size, cov_array.shape[1]))        # (N, window_size, C)
    y = np.zeros((N, *iv_array.shape[1:]))                   # (N, H, W, 1)

    for i in range(N):
        x_iv[i] = iv_array[i:i+window_size]
        x_cov[i] = cov_array[i:i+window_size]
        y[i] = iv_array[i+window_size]

    return x_iv, x_cov, y
x_iv_train, x_cov_train, target_train = create_rolling_window_dataset(IV_train, cov_train, window_size)

IV_val_input = np.concatenate((IV_train[-window_size:], IV_val), axis=0)
cov_val_input = np.concatenate((cov_train[-window_size:], cov_val), axis=0)
x_iv_val, x_cov_val, target_val = create_rolling_window_dataset(IV_val_input, cov_val_input, window_size)

IV_test_input = np.concatenate((IV_val[-window_size:], IV_test), axis=0)
cov_test_input = np.concatenate((cov_val[-window_size:], cov_test))
x_iv_test, x_cov_test, target_test = create_rolling_window_dataset(IV_test_input, cov_test_input, window_size)




In [15]:
time_steps = window_size
height = len(data_train['moneyness_enc'].unique())
width = len(data_train['maturity'].unique())
num_covariates = len(covariate_columns)

In [16]:

# 1. IV input (spatio-temporal)
iv_input = Input(shape=(time_steps, height, width, 1), name="iv_input")

x_iv = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=True)(iv_input)
x_iv = BatchNormalization()(x_iv)

x_iv = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=False)(x_iv)
x_iv = BatchNormalization()(x_iv)


In [17]:
# 2. Covariate input (temporal only)
cov_input = Input(shape=(time_steps, num_covariates), name="cov_input")

# Option 1: Use an LSTM to capture temporal pattern

x_cov = LSTM(units=64, return_sequences=False)(cov_input)   # (batch_size, 64)
x_cov = Dense(units=height * width, activation='relu')(x_cov)  # Make it spatial
x_cov = Reshape((height, width, 1))(x_cov)                   # (batch_size, H, W, 1)


In [18]:
x = Concatenate(axis=-1)([x_iv, x_cov])  # Combine along channel axis -> (H, W, 65)

x = tf.keras.layers.Conv2D(filters=1, kernel_size=(1, 1), activation='sigmoid', padding='same')(x)


In [19]:
model = Model(inputs=[iv_input, cov_input], outputs=x)

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=epsilon)
model.compile(loss='mse', optimizer=optimizer)
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 iv_input (InputLayer)          [(None, 21, 8, 5, 1  0           []                               
                                )]                                                                
                                                                                                  
 conv_lstm2d (ConvLSTM2D)       (None, 21, 8, 5, 64  150016      ['iv_input[0][0]']               
                                )                                                                 
                                                                                                  
 cov_input (InputLayer)         [(None, 21, 8)]      0           []                               
                                                                                              

In [20]:
IV_train[window_size:].shape

(2385, 8, 5, 1)

In [21]:
model.fit([x_iv_train, x_cov_train], target_train,
          validation_data=([x_iv_val, x_cov_val], target_val),
          epochs=epochs, batch_size=batch_size, 
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100


<keras.callbacks.History at 0x1d8880af9d0>

In [22]:
# Next step; functions for the IVRMSE and R_oos!!!!!
# H-step ahead performance!!!!!!
# WRITE IT ALL TO RESULTS!!!!!!
# Lower learning rate, beneficial, or leave it? Probably leave it
# All the HYPERPARAMETERS!!!!!! kernel strides, window sizes, parameters, layers, ALL OF THEM
# COVARIATES!!!!!!, Do all the hyperparameters again, and then only save the TEST PERFORMANCE
# MODEL ARCHITECTURE!!!!
# Investigate what happens, if you leave it as 0... We cannot do the interpolation properly, to be frank.

#THEN LONG TERM !!!!!!
# Transformer models!!!!!!!!!!!!!!!!
pred_val = model.predict([x_iv_val, x_cov_val])
pred_test = model.predict([x_iv_test, x_cov_test])



In [23]:
# Put it in the compile, but also call it afterward
# double check formula and also make it for R_oos
# Should also work per h-step ahead 
def calculate_ivrmse(y_true, y_pred, all_points=False):
    if not all_points:
        ivrmse = tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
    else:
        sq_error = tf.square(y_true - y_pred)
        error_surface = tf.reduce_mean(sq_error, axis=[1 , 2])
        ivrmse = tf.sqrt(error_surface)

    return ivrmse.numpy()

# model.compile(optimizer='adam', loss='mse', metrics=[ivrmse_metric])


In [24]:
print(calculate_ivrmse(IV_val, pred_val))
print(calculate_ivrmse(IV_val, pred_val, all_points=False))
# The loss is less, than in the optimization.. probably a different metric, or formula than mse
# check values of the papers that are like yours

0.18825847408726074
0.18825847408726074


In [25]:
def calculate_r_oos(y_true, y_pred, all_points=False):
    if not all_points:
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
        mean_IV = tf.reduce_mean(y_true, axis=[1, 2], keepdims=True) # should be shape of 114 long
        # print(mean_IV[0], mean_IV[1], mean_IV[2])
        # print(y_true[0], y_true[1], y_true[2])
        ss_tot = tf.reduce_sum(tf.square(y_true - mean_IV))
        # print((y_true - mean_IV)[0])
        r2 = 1 - ss_res/ss_tot
    else:
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=[1, 2])
        mean_IV = tf.reduce_mean(y_true, axis=[1, 2], keepdims=True)
        ss_tot = tf.reduce_sum(tf.square(y_true - mean_IV), axis=[1, 2])
        r2 = 1 - ss_res/ss_tot
    return r2.numpy()

print(calculate_r_oos(IV_val, pred_val))

0.20415877633435853


In [26]:
# Train model again on BOTH train and validation, and then investigate the TEST PERFORMANCE!!

In [27]:
print(calculate_ivrmse(IV_test, pred_test))
print(calculate_r_oos(IV_test, pred_test))

0.2815185088980103
-0.2786655323880638


In [28]:
def get_results(y_real, y_pred):
    ivrmse = calculate_ivrmse(y_real, y_pred)
    ivrmse_h = calculate_ivrmse(y_real, y_pred, all_points=True)
    r_oos = calculate_r_oos(y_real, y_pred)
    r_oos_h = calculate_r_oos(y_real, y_pred, all_points=True)

    return ivrmse, ivrmse_h, r_oos, r_oos_h

In [29]:
def write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, surface, surface_pred, covariate_columns, option_type):

    ivrmse_path = folder_path / Path("ivrmse")
    r_oos_path = folder_path / Path("r_oos")
    ivrmse_h_path = folder_path / Path("ivrmse_h")
    r_oos_h_path = folder_path / Path("r_oos_h")
    surface_path = folder_path / Path("surface")
    surface_pred_path = folder_path / Path("surface_pred")

    if not ivrmse_path.exists():
        ivrmse_path.mkdir(parents=True, exist_ok=True)

    if not r_oos_path.exists():
        r_oos_path.mkdir(parents=True, exist_ok=True)

    if not ivrmse_h_path.exists():
        ivrmse_h_path.mkdir(parents=True, exist_ok=True)

    if not r_oos_h_path.exists():
        r_oos_h_path.mkdir(parents=True, exist_ok=True)

    if not surface_path.exists():
        surface_path.mkdir(parents=True, exist_ok=True)

    if not surface_pred_path.exists():
        surface_pred_path.mkdir(parents=True, exist_ok=True)

    cov = ""
    for i in covariate_columns:
        cov = cov+ "_" +i
    np.save(ivrmse_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", ivrmse)
    np.save(r_oos_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", r_oos)
    np.save(ivrmse_h_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", ivrmse_h)
    np.save(r_oos_h_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", r_oos_h)
    np.save(surface_path/ f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", surface)
    np.save(surface_pred_path / f"{option_type}_ws_{window_size}_h_{h_step}{cov}.npy", surface_pred)

In [30]:
folder_path = Path(f"results/test_{run}")
ivrmse, ivrmse_h, r_oos, r_oos_h = get_results(IV_test, pred_test)
write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, IV_test, pred_test, covariate_columns, option_type)

In [31]:
print(r_oos_h)

[[-1.37796267e-01]
 [-1.05386481e-01]
 [ 3.26984500e-03]
 [ 1.44451985e-01]
 [ 3.62870498e-01]
 [ 4.09372865e-01]
 [-4.73957587e-01]
 [-1.35121595e+00]
 [ 5.87006216e-01]
 [ 6.61699308e-02]
 [ 1.19558728e-01]
 [-8.29125796e-01]
 [-2.99449339e-01]
 [-2.41969633e-01]
 [ 8.92049789e-02]
 [-1.04583891e+00]
 [ 2.70376560e-01]
 [-8.82175002e-02]
 [-9.95909273e-01]
 [-8.39125156e-01]
 [-8.31803702e-01]
 [-2.40402244e+00]
 [-1.18826130e-01]
 [-5.64469701e-01]
 [-3.45249066e-01]
 [-2.06131306e+00]
 [-5.09991585e-01]
 [-2.47066761e-01]
 [ 5.79050510e-01]
 [-4.51236313e-01]
 [-1.59884520e-01]
 [-1.42953466e+00]
 [-5.61610298e-01]
 [-7.30780791e-01]
 [-4.13066844e-01]
 [ 2.76239248e-02]
 [-3.15226049e+00]
 [ 1.29811957e-01]
 [-6.31014530e-01]
 [-2.87632133e-01]
 [-4.41010000e-01]
 [-2.69905008e-01]
 [-3.38917243e-01]
 [-3.48978275e-01]
 [-4.68534623e-01]
 [-7.25046391e-01]
 [-4.05778899e-01]
 [-4.20117241e-01]
 [ 8.34372390e-02]
 [-6.04086853e-02]
 [-3.62312976e-02]
 [ 1.07787327e-01]
 [-5.1266941

In [32]:
folder_path = Path(f"results/validation_{run}")
ivrmse, ivrmse_h, r_oos, r_oos_h = get_results(IV_val, pred_val)
write_results(folder_path, ivrmse, r_oos, ivrmse_h, r_oos_h, IV_test, pred_test, covariate_columns, option_type)

In [33]:
# For the h-step ahead forecast, an autoregressive approach is used
# Predict the one step ahead forecast (done now)
# After, use this one step ahead forecast in the predict command. 

In [34]:
print(pred_test.shape)

(194, 8, 5, 1)


In [None]:
def recursive_forecast(model, initial_iv, covariates_seq, h):
    """
    Generates h-step ahead forecast for a given input (initial_iv) and covariates sequence (covariates_seq).
    """
    preds = []
    x_iv = initial_iv.copy()  # shape: (window_size, height, width, 1)
    
    # Iterate over the forecast horizon (h-step ahead)
    for t in range(h):
        cov_step = covariates_seq[t:t+1]  # shape: (1, cov_dim)
        
        # Add batch dimension for the model's input
        x_iv_input = x_iv[np.newaxis, ...]  # shape: (1, window_size, height, width, 1)
        x_cov_input = cov_step.reshape(1, 1, -1)  # reshape to (1, 1, cov_dim) for a single timestep
        
        # Make the prediction
        pred = model.predict([x_iv_input, x_cov_input], verbose=0)
        
        # Append the prediction (remove batch dimension)
        preds.append(pred[0])  # shape: (height, width, 1)
        
        # Update x_iv by rolling the window and adding the prediction
        pred_squeezed = pred.squeeze(axis=0)  # shape: (height, width, 1)
        x_iv = np.concatenate([x_iv[1:], pred_squeezed[np.newaxis, ...]], axis=0)

    return np.array(preds)  # shape: (h, height, width, 1)


def generate_forecasts_for_test_set(model, x_iv_test, cov_test, h):
    """
    Generates h-step forecasts for the entire test set.
    
    Args:
    - model: The trained model.
    - x_iv_test: The test data for IV inputs (shape: (num_samples, window_size, height, width, 1)).
    - cov_test: The covariate data for the test set (shape: (num_samples, num_covariates)).
    - h: The number of steps to predict ahead.
    
    Returns:
    - forecasts: A list of predicted IV surfaces for all test samples, each of shape (h, height, width, 1).
    """
    all_preds = []
    
    for i in range(len(x_iv_test)):  # Iterate over all samples in the test set
        initial_iv = x_iv_test[i]  # shape: (window_size, height, width, 1)
        covariates_seq = cov_test[i]  # shape: (num_covariates,)
        
        # Get the h-step forecast for this sample
        preds = recursive_forecast(model, initial_iv, covariates_seq, h)
        
        # Store the predictions for this sample
        all_preds.append(preds)
    
    return np.array(all_preds)

h = 5  # Forecast horizon (5 steps ahead)
forecasts = generate_forecasts_for_test_set(model, x_iv_test, cov_test, h)


InvalidArgumentError: Graph execution error:

Detected at node 'model/conv_lstm2d/transpose' defined at (most recent call last):
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request
      await super().execute_request(stream, ident, parent)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute
      res = shell.run_cell(
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3077, in run_cell
      result = self._run_cell(
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3132, in _run_cell
      result = runner(coro)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3336, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3519, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3579, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Deepm\AppData\Local\Temp\ipykernel_18216\2453847007.py", line 56, in <module>
      forecasts = generate_forecasts_for_test_set(model, x_iv_test, cov_test, h)
    File "C:\Users\Deepm\AppData\Local\Temp\ipykernel_18216\2453847007.py", line 48, in generate_forecasts_for_test_set
      preds = recursive_forecast(model, initial_iv, covariates_seq, h)
    File "C:\Users\Deepm\AppData\Local\Temp\ipykernel_18216\2453847007.py", line 18, in recursive_forecast
      pred = model.predict([x_iv_input, x_cov_input])
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 2253, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 2041, in predict_function
      return step_function(self, iterator)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 2027, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 2015, in run_step
      outputs = model.predict_step(data)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
      return self(x, training=False)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\layers\rnn\base_rnn.py", line 553, in __call__
      return super().__call__(inputs, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\layers\rnn\base_conv_lstm.py", line 506, in call
      return super().call(
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\layers\rnn\base_conv_rnn.py", line 327, in call
      last_output, outputs, states = backend.rnn(
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\backend.py", line 4783, in rnn
      inputs = tf.nest.map_structure(swap_batch_timestep, inputs)
    File "c:\Users\Deepm\anaconda3\envs\tf_gpu\lib\site-packages\keras\backend.py", line 4780, in swap_batch_timestep
      return tf.compat.v1.transpose(input_t, axes)
Node: 'model/conv_lstm2d/transpose'
transpose expects a vector of size 4. But input(1) is a vector of size 5
	 [[{{node model/conv_lstm2d/transpose}}]] [Op:__inference_predict_function_89746]

In [None]:
print(forecasts.shape)
print(IV_test.shape)

(194, 5, 8, 5, 1)
(194, 8, 5, 1)


In [None]:
print(forecasts[0][0])

[[[0.16486967]
  [0.57145566]
  [0.48652163]
  [0.6856394 ]
  [0.69946826]]

 [[0.6313992 ]
  [0.78946275]
  [0.7236876 ]
  [0.749447  ]
  [0.6089316 ]]

 [[0.28329638]
  [0.7100642 ]
  [0.42562798]
  [0.67632073]
  [0.6045381 ]]

 [[0.42057732]
  [0.69108444]
  [0.37215552]
  [0.6370163 ]
  [0.6606845 ]]

 [[0.4099188 ]
  [0.45772284]
  [0.43094543]
  [0.42072645]
  [0.78371346]]

 [[0.85129285]
  [0.3531753 ]
  [0.91894907]
  [0.5364034 ]
  [0.5345526 ]]

 [[0.7131472 ]
  [0.09219097]
  [0.75762695]
  [0.8334897 ]
  [0.9054322 ]]

 [[0.7161368 ]
  [0.01688491]
  [0.64517736]
  [0.76551414]
  [0.6663528 ]]]


In [None]:
print(pred_test[0])

[[[0.01862592]
  [0.19200858]
  [0.48652163]
  [0.38989863]
  [0.2503377 ]]

 [[0.12925439]
  [0.31987518]
  [0.34741732]
  [0.31093243]
  [0.16580123]]

 [[0.28329638]
  [0.3125413 ]
  [0.22858293]
  [0.2531191 ]
  [0.13199875]]

 [[0.19599621]
  [0.2632183 ]
  [0.17080551]
  [0.16857839]
  [0.15079342]]

 [[0.22710778]
  [0.18509802]
  [0.43094543]
  [0.10276678]
  [0.20051493]]

 [[0.53172   ]
  [0.08196539]
  [0.6815705 ]
  [0.09997155]
  [0.5345526 ]]

 [[0.2838202 ]
  [0.0354126 ]
  [0.30491433]
  [0.14313084]
  [0.31286046]]

 [[0.11301403]
  [0.00079787]
  [0.09002484]
  [0.15843824]
  [0.6663528 ]]]


In [None]:
print(pred_test.shape)

(194, 8, 5, 1)


In [None]:
print(calculate_ivrmse(IV_test, forecasts[:,0,:,:,:]))
print(calculate_r_oos(IV_test, forecasts[:,0,:,:,:]))

0.4183882341231208
-1.8242424545792209


In [None]:
print(calculate_ivrmse(IV_test, pred_test))
print(calculate_r_oos(IV_test, pred_test))