# DeepAR model 

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd 
import sys, os 
import json 
import logging 
logger = logging.getLogger()

# Add folder location to Python paths so that packages can be imported 
p = os.path.abspath('../')
if p not in sys.path:
    sys.path.append(p)
    
from src.data import open_and_transform_csv2

## Data transformation 

For gluonts models, the data needs to be in form of iterable objects. 

The datasets provided by GluonTS consists of three main members:
- train: iterable collection of data entries used for training. Each entry corresponds to one time series
- test: iterable collection of data entries used for inference. The test dataset is an extended version of the train dataset that contains a window in the end of each time series that was not seen during training. This window has length equal to the recommended prediction length. 
- metadata: contains metadata of the dataset such as the frequency of the time series, a recommended prediction horizon, associated features, etc.

Moreover, gluonts use "field_names", the attributes of the type of data in the train, test and metadata. For instance train has to contain a ``target`` and a ``start`` fields for each entry, but we can add features such as ``feat_static_cat``, ``feat_static_real``, etc. The test has to contain the ``start``, ``target`` and  ``prediction_length`` fields for each entries. 


The fields are split into three categories: the required ones, the optional ones, and the ones that can be added by the Transformation (explained in a while).

#### Required:
- start: start date of the time series
- target: values of the time series

#### Optional:

- feat_static_cat: static (over time) categorical features, list with dimension equal to the number of features
- feat_static_real: static (over time) real features, list with dimension equal to the number of features
- feat_dynamic_cat: dynamic (over time) categorical features, array with shape equal to (number of features, target length)
- feat_dynamic_real: dynamic (over time) real features, array with shape equal to (number of features, target length)

#### Added by Transformation:
- time_feat: time related features such as the month or the day
- feat_dynamic_const: expands a constant value feature along the time axis
- feat_dynamic_age: age feature, i.e., a feature that its value is small for distant past timestamps and it monotonically increases the more we approach the current timestamp
- observed_values: indicator for observed values, i.e., a feature that equals to 1 if the value is observed and 0 if the value is missing
- is_pad: indicator for each time step that shows if it is padded (if the length is not enough)
- forecast_start: forecast start date

Finally, the metadata contains general information about the model. 

In [None]:
# Fieldnames are used to store data
from gluonts.dataset.field_names import FieldName
[f"FieldName.{k} = '{v}'" for k, v in FieldName.__dict__.items() if not k.startswith('_')]

In [None]:
spx_daily = open_and_transform_csv2(r'../data/spx_daily.xlsx')
spx_daily.head()

In [None]:
strikes = [40,  60,  80,  90, 100, 110, 120]
maturities = ["6M", "1Y", "18M", "2Y", "3Y"]

# Start date
start = spx_daily.Dates.min()

# Organize the data as gluonts inputs 
target = []
nb_series = 0
feat_static_cat = []
feat_static_real = []
# Initialize the number of steps (this is the # of observations per TS)
num_steps =  3230

for s in strikes:
    for m in maturities: 
        df_temp = spx_daily[(spx_daily.Strike == s) & (spx_daily.Duration == m)].sort_values(by="Dates")
        if num_steps != df_temp.shape[0]: 
            print("There is a TS with different number of observations")
            break
        else: 
            target.append(df_temp.Change_in_implied_vol.values)
            nb_series += 1
            feat_static_real.append(s)
            feat_static_cat.append(m)

target, feat_static_real, feat_static_cat = np.array(target), np.array(feat_static_real), np.array(feat_static_cat)

In [None]:
# Now define the metadata containing the parameters of the dataset
metadata = {'num_series': len(strikes)*len(maturities),
                      'num_steps': num_steps,
                      'prediction_length': 10,
                      'freq': '1D',
                      'start': [pd.Timestamp(start, freq='1D')
                                for _ in range(num_steps)]
                     }

In [None]:
metadata

In [None]:
# We have 35 TS, each with 3230 observations. 
# For each TS, the feat_static_real contains the strike, the feat_static_cat contains the maturity. 
print(type(target), type(feat_static_real), type(feat_static_cat))
print(target.shape, feat_static_real.shape, feat_static_cat.shape)

In [None]:
feat_static_real, feat_static_cat

In [None]:
list_of_dicts = []
for ts in range(0, 35): 
    dict1 = {FieldName.TARGET: target[ts, :-metadata['prediction_length']], FieldName.START: start, FieldName.FEAT_STATIC_REAL: feat_static_real[ts]}
    list_of_dicts.append(dict1) 

In [None]:
list_of_dicts

In [None]:
from gluonts.dataset.common import ListDataset

# Create the train dataset
train_ds = ListDataset(list_of_dicts, freq=metadata['freq'])
print(train_ds)

train_entry = next(iter(train_ds))
print(f"Keys of train_ds : {train_entry.keys()}")

In [None]:
# Creating the test dataset 
list_of_dicts = []
for ts in range(0, 35): 
    dict1 = {FieldName.TARGET: target[ts], FieldName.START: start, FieldName.FEAT_STATIC_REAL: feat_static_real[ts]}
    list_of_dicts.append(dict1) 
test_ds = ListDataset(list_of_dicts, freq=metadata['freq'])
test_ds

test_entry = next(iter(test_ds))
print(test_entry.keys())

In [None]:
to_pandas(train_entry)

In [None]:
to_pandas(test_entry)

In [None]:
from gluonts.dataset.util import to_pandas
import matplotlib.pyplot as plt

test_series = to_pandas(test_entry)
train_series = to_pandas(train_entry)

fig, ax = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(20, 8))

train_series.plot(ax=ax[0])
ax[0].grid(which="both")
ax[0].legend(["train series"], loc="upper left")

test_series.plot(ax=ax[1])
ax[1].axvline(train_series.index[-10], color='r') # end of train dataset
ax[1].grid(which="both")
ax[1].legend(["test series", "end of train series"], loc="upper left")

plt.show()

In [None]:
print(f"Length of forecasting window in test dataset: {len(test_series) - len(train_series)}")
print(f"Recommended prediction horizon: {metadata['prediction_length']}")
print(f"Frequency of the time series: {metadata['freq']}")

# Models 

## 1) SimpleFeedForwardEstimator

In [None]:
estimator1 = SimpleFeedForwardEstimator(
    num_hidden_dimensions=[2],
    prediction_length=custom_ds_metadata["prediction_length"],
    context_length=custom_ds_metadata["num_steps"],
    freq=custom_ds_metadata["freq"],
    trainer=Trainer(ctx="cpu",
                    epochs=5,
                    learning_rate=1e-10,
                    num_batches_per_epoch=1
                   )
)


In [None]:
predictor1 = estimator1.train(train_ds)

In [None]:
from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)

forecasts = list(forecast_it)
tss = list(ts_it)

ts_entry = tss[0]
np.array(ts_entry[:5]).reshape(-1,)

In [None]:
dataset_test_entry = next(iter(test_ds))
dataset_test_entry['target'][:5]

In [None]:
# first entry of the forecast list
forecast_entry = forecasts[0]
forecast_entry

In [None]:
def plot_prob_forecasts(ts_entry, forecast_entry):
    plot_length = 50
    prediction_intervals = (50.0, 90.0)
    legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

    fig, ax = plt.subplots(1, 1, figsize=(20, 8))
    ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
    forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
    plt.grid(which="both")
    plt.legend(legend, loc="upper left")
    plt.show()

In [None]:
plot_prob_forecasts(ts_entry, forecast_entry)

In [None]:
from gluonts.evaluation import Evaluator

evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))
print(json.dumps(agg_metrics, indent=4))

## 2) DeepAR

In [None]:
from gluonts.model.deepar import DeepAREstimator

estimator = DeepAREstimator(
    prediction_length=custom_ds_metadata["prediction_length"],
    context_length=100,
    freq=custom_ds_metadata["freq"]
)

predictor = estimator.train(train_ds)

In [None]:
forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)

forecasts = list(forecast_it)
tss = list(ts_it)

In [None]:
forecast_entry = forecasts[0]

plot_prob_forecasts(ts_entry, forecast_entry)

In [None]:
from gluonts.evaluation import Evaluator

evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))

In [None]:
print(json.dumps(agg_metrics, indent=4))