Notebook for visualizing and comparing models

In [15]:
from ipywidgets import *
import plotly.graph_objs as go
import chart_studio.plotly as py
import pandas as pd
from data import data_handler, data_formatter
from src.models.model_handler import *
import os
# Cufflinks wrapper on plotly
import cufflinks

# Data science imports
import numpy as np

# Options for pandas
pd.options.display.max_columns = 30

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot
cufflinks.go_offline()

# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')

# Test period
start_time = "01.01.2018"
end_time = "31.01.2018"

data = data_handler.get_data(start_time, end_time, ["System Price"], os.getcwd())
data = data_formatter.combine_hour_day_month_year_to_datetime_index(data)
split = 150
train, test = data[:split], data[split:]

In [17]:
# Test for stationarity
from pmdarima.arima import ADFTest
adf_test = ADFTest(alpha = 0.05)
adf_test.should_diff(data)

(0.01, False)

In [20]:
# Load model
filepath = os.getcwd()
filepath = filepath[:filepath.index("src")]
arima_tuple = load_pickle(filepath + "src\\results\\validation\\2018-01-15_2018-01-28\\arima_1.pkl")
arima_forecasts = arima_tuple[3]
arima_model = arima_tuple[2]
print(arima_model.model.summary())
arima_forecasts.iplot(
    mode='lines',
    opacity=0.8,
    size=8,
    symbol=1,
    xTitle='Date',
    yTitle='Price/€',
    title='Arima Benchmark'
)

                                     SARIMAX Results                                      
Dep. Variable:                                  y   No. Observations:                  336
Model:             SARIMAX(2, 1, 1)x(2, 0, 1, 12)   Log Likelihood                -560.094
Date:                            Tue, 16 Feb 2021   AIC                           1134.188
Time:                                    19:02:32   BIC                           1160.887
Sample:                                         0   HQIC                          1144.832
                                            - 336                                         
Covariance Type:                              opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          1.2316      0.102     12.025      0.000       1.031       1.432
ar.L2         -0.4652      0.051   

In [16]:
filepath = os.getcwd()
filepath = filepath[:filepath.index("src")]
ets_tuple = load_pickle(filepath + "src\\results\\validation\\2018-01-15_2018-01-28\\ets_2.pkl")
ets_forecasts = ets_tuple[3]
ets_forecasts.iplot(
    mode='lines',
    opacity=0.8,
    size=8,
    symbol=1,
    xTitle='Date',
    yTitle='Price/€',
    title='Arima Benchmark'
)


Model visualization:
- Inputs: Test and forecasted data; Number of models; Model name(s). This can be read by model_handler which returns
tuples.
- Output: An interactive graph that contains
    - (Point) Forecasts (models), upper and lower bounds (PIs) and test data in different colors
    - Possibility for double axis (For e.g. MAE or exogenous variable)
    - Range selection
    - Dropdown menu to select models to view
    - Scores view that adapts to selection of items above

In [12]:
# Fetch test data: returns pandas DataFrame

test_data = data_handler.get_data("01.01.2018", "31.03.2018", ["System Price"], os.getcwd())

# Input forecasting models (model name) with forecasted data: returns 2D array [[Model 1, Model 2, ...], [F1, F2, ...]]
# NB! Important that the models have forecasted the same period as the fetched test data
models = [['arima', 'ets', 'mlp'],[[33, 44, 55], [33, 44, 55], [44, 44, 55]]]

models = []
# Combine test DataFrame with models and the corresponding forecasts