# Evaluate End-Of-Day Options Data (OptionMetrics via WRDS)

In [54]:
import os

import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['savefig.bbox'] = 'tight'
import torch

from op_ds.gno.gno import GNOLayer, GNO
from op_ds.gno.kernel import NonlinearKernelTransformWithSkip
from op_ds.utils.fnn import FNN
from volatility_smoothing.utils.options_data import OptionsDataset
from volatility_smoothing.utils.gno.train import Trainer
from volatility_smoothing.utils.gno.dataset import GNOOptionsDataset

## Create Dataset Class and Load

In [55]:
class WRDSOptionsDataset(OptionsDataset):
    """Dataset for OptionsMetrics data as provided through the WRDS
    
    When downloading the options chain (for a specified index and a certain date range) from WRDS, be sure to include the following fields:
    
    * ``date``
    * ``exdate``
    * ``cp_flag``
    * ``strike_price``
    * ``best_bid``
    * ``best_offer``
    * ``am_settlement`` (to be able to discard weekly options)
    
    Then, place the generated csv-file into an apposite directory, and provide as ``data_dir`` during init.
    """
    
    @classmethod
    def load_data(cls, data_dir: str) -> pd.DataFrame:
        """Load the options data contained in the WRDS csv-file

        Parameters
        ----------
        data_dir
            The directory in which the csv-file is placed (must be the only csv-file in this directory)

        Returns
        -------
            The data frame with columns as described in the documentation of :meth:`OptionsDataset.load_data`

        Raises
        ------
        FileNotFoundError
            If no csv-file was found in ``data_dir``
        ValueError
            If there were multiple csv-files in ``data_dir``
        """

        csv_files = [file for file in os.listdir(data_dir) if file.endswith('.csv')]
        if len(csv_files) == 0:
            raise FileNotFoundError(f"No csv files found in {data_dir}")
        elif len(csv_files) > 1:
            raise ValueError(f"Multiple csv files found in {data_dir}")
        else:
            filepath = os.path.join(data_dir, csv_files[0])
       
        col_names = {            
            'date': 'quote_datetime',
            'exdate': 'expiry_datetime',
            'strike_price': 'strike',
            'cp_flag': 'option_type',
            'best_bid': 'bid',
            'best_offer': 'ask'
        }
        data = (pd.read_csv(filepath, engine='python')
            .query('am_settlement == 1')
            .assign(strike_price=lambda df: df['strike_price'] / 1000)
            .rename(columns=col_names)
            .astype({'quote_datetime': 'datetime64[ns]', 'expiry_datetime': 'datetime64[ns]'})
            .get(col_names.values()))

        return data


In [None]:
spx_dir = "../data/wrds/spx"
spx_dataset = WRDSOptionsDataset(spx_dir)
spx_gno_dataset = GNOOptionsDataset(spx_dataset, subsample=False)

## Model and Optimizer

### Instantiate

In [57]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [58]:
in_channels = 1
out_channels = 1
channels = (in_channels, 16, 16, 16, out_channels)
spatial_dim = 2
gno_channels = 16
hidden_channels = 64

gno_layers = []

for i in range(m := (len(channels) - 1)):
    lifting = FNN.from_config((channels[i], hidden_channels, gno_channels), hidden_activation='gelu', batch_norm=False)
    projection = None if i < m - 1 else FNN.from_config((gno_channels, hidden_channels, channels[i+1]), hidden_activation='gelu', batch_norm=False)
    transform = NonlinearKernelTransformWithSkip(in_channels=gno_channels, out_channels=gno_channels, skip_channels=in_channels, spatial_dim=spatial_dim, hidden_channels=(hidden_channels, hidden_channels), hidden_activation='gelu', batch_norm=False)

    if i == 0:
        local_linear = False
    else:
        local_linear = True
        
    activation = torch.nn.GELU() if i < m - 1 else torch.nn.Softplus(beta=0.5)
        
    gno_layer = GNOLayer(gno_channels, transform=transform, local_linear=local_linear, local_bias=True,
                         activation=activation, lifting=lifting, projection=projection)
    gno_layers.append(gno_layer)
    
gno = GNO(*gno_layers, in_channels=in_channels)

In [59]:
optimizer = torch.optim.AdamW(gno.parameters())

### Load Checkpoint

In [60]:
def load_checkpoint(model, optimizer, path):
    checkpoint = torch.load(path, map_location=device)
    model.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    # logger.info(f"Loaded checkpoint from {path}")
    return model, optimizer

In [61]:
path = "../train/store/9448705/checkpoints/checkpoint_final.pt"
gno, optimizer = load_checkpoint(gno, optimizer, path)

## Evaluation

In [62]:
step_r = 0.05
step_z = 0.01
trainer = Trainer(step_r=step_r, step_z=step_z)

### Evaluate Dataset

In [63]:
df_val, df_rel, df_fit, data_list = trainer.evaluate(gno, spx_gno_dataset, device=device)

### Plots

In [None]:
i = 10
trainer.plot_example(gno, spx_gno_dataset[i], nrows=5, ncols=2, figsize=(9, 14));