# Time Series Forecasting

In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import dates
import seaborn as sns
import joblib
import statsmodels.api as sm
# import bamboolib as bam

from tkinter import *
import dtale as dt
import plotly.graph_objects as go

# import glob

# from sklearn.preprocessing import MinMaxScaler
from datetime import datetime, timedelta

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.api import ExponentialSmoothing
from statsmodels.tsa.stattools import adfuller

from sklearn import metrics
from sklearn.metrics import  mean_absolute_percentage_error
from sklearn.model_selection import train_test_split

pd.set_option('display.float_format', '{:,.6}'.format)
# pd.set_option('max_columns', 100)


  from pandas import Int64Index as NumericIndex


In [None]:
# Standard imports 
from bokeh.io import output_notebook, show
from bokeh.plotting import figure, output_file, save

from bokeh.models.tools import HoverTool
from bokeh.models import Legend, ColumnDataSource, Range1d
output_notebook()

In [2]:

#* Ignore Warnings
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warnings.simplefilter('ignore', ConvergenceWarning)

### 1.] Preparations

#### Loading & Preparing the Data

In [None]:
df_sys = joblib.load("./data/GTMA Trades_df_sys")
df_enr = joblib.load("./data/GTMA Trades_df_enr")

# df_sys.head(6)

In [None]:

#? Resampling to Daily frequency.
df_sys_D1 = df_sys.set_index(['End Time_D&T'])[['Volume', 'Cost']].resample('D').sum()
df_sys_D2 = df_sys.set_index(['End Time_D&T'])[['Price', 'Trade Hours Duration']].resample('D').mean()
df_sys_D = df_sys_D1.join(df_sys_D2,how='left')
df_sys_D.head(4)

In [None]:
df_enr_D1 = df_enr.set_index(['End Time_D&T'])[['Volume', 'Cost']].resample('D').sum()
df_enr_D2 = df_enr.set_index(['End Time_D&T'])[['Price', 'Trade Hours Duration']].resample('D').mean()
df_enr_D = df_enr_D1.join(df_enr_D2,how='left')
# df_enr_D.head(4)

In [None]:

#? Using DTale to remove rows from 2018 and older.
dt.show(df_sys_D, open_browser=True, ignore_index=True)

In [None]:
dt.show(df_enr_D, open_browser=True, ignore_index=True)

In [None]:

#? Reloading the .csv after removing rows
df_sys_D = pd.read_csv("./data/df_sysD_19-21.csv")
df_sys_D.head(4)

In [None]:
df_enr_D = pd.read_csv("./data/df_enrD_19-21.csv")
joblib.dump(df_enr_D,"./data/GTMA Trades_df_enrD_19-21")

In [None]:
joblib.dump(df_sys_D,"./data/GTMA Trades_df_sysD_19-21")

In [3]:
df_sysD_19to21 = joblib.load('./data/GTMA Trades_df_sysD_19-21')
# df_sysD_19to21.head(4)

In [4]:
df_enrD_19to21 = joblib.load('./data/GTMA Trades_df_enrD_19-21')

> FORMATTED DF

In [5]:

df_sysD_19to21["End Time_D&T"] = pd.to_datetime(df_sysD_19to21["End Time_D&T"], format="%Y-%m-%d")
# df_sysD_19to21.head(6)

In [None]:
df_sysD_19to21.dtypes

In [6]:
df_enrD_19to21["End Time_D&T"] = pd.to_datetime(df_enrD_19to21["End Time_D&T"], format="%Y-%m-%d")

#### Auxillary Functions

In [7]:
dict_train = {
        "Q1" : 365,            #? Start of 2020
        "Q2" : 365 + 89,       #? Start of March
        "Q3" : 365 + 182,      #? 2020 midyear, Start of July     
        "Q4" : 365 + 182 + 92  #? Start of Oct.
        }

layout = go.Layout(
        margin=go.layout.Margin(
        l=20, #left margin
        r=15, #right margin
        b=15, #bottom margin
        t=35 ),#top margin
        xaxis_title="Date", yaxis_title="Cost (£)")

In [16]:

def go_plotter(df_train, df_test, df_forecast, FC, LABEL, CAT):
        fig = go.Figure(layout=layout)
        fig.update_layout(title= str(CAT) + " Trades " + str(FC) + "-Day Forecast" + " -- " + LABEL)

        #? Note: Index of incoming dataframes are Datetime
        for i, t in zip([df_train, df_test, df_forecast],["Train","Test","Forecast"]):
                mode_var = 'lines+markers' if (t =="Forecast") else 'lines'
                line_dash = 'dot' if (t =="Train") else 'solid'
                fig.add_trace(go.Scatter(x=i.index, y=i["Cost"].values, name=t, mode=mode_var,
                line_dash=line_dash, hovertemplate="Date = %{x}<br>Cost = %{y}"))
        # fig.for_each_trace(lambda trace: trace.update(fill='tonextx') if trace.name == "Forecast" else ())
        fig.show()

In [10]:
EPSILON = 1e-10
def maape(actual, predicted):
    #? Mean Arctangent Absolute Percentage Error
    #? Note: result is NOT multiplied by 100
    return np.mean(np.arctan(np.abs((actual - predicted) / (actual + EPSILON))))

In [None]:
def bokeh_plotter(train,test,forecast):
    #? Specific only for this project: Because of rigidly-named variables.
    
    #? ds_ == "data source", as required by bokeh plots
    ds_train = ColumnDataSource(train)
    ds_test = ColumnDataSource(test)
    ds_forecast = ColumnDataSource(forecast)

    TOOLTIPS = [('Date', '@{End Time_D&T}{%F}'),
                ('Cost', '@Cost{($ 0.00 a)}')]
    
    hover = HoverTool(tooltips=TOOLTIPS, 
                    formatters={
                        '@{End Time_D&T}' : 'datetime',
                            },
                    mode="mouse")
    
    #? Establishing plot
    p = figure(x_axis_type="datetime", plot_width=1100, plot_height=420)
    p.add_tools(hover)

    #? Plot configurations
    p.xgrid.grid_line_color=None
    p.ygrid.grid_line_alpha=0.5
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Cost'
    
    #* Main plots
    line_train = p.line(x='End Time_D&T', y='Cost', source=ds_train,color="grey", line_alpha=.35)
    line_test = p.line(x='End Time_D&T', y='Cost', source=ds_test,color="orange", line_dash="dotted",legend_label="Test",line_width=2)
    line_forecast = p.line(x='End Time_D&T', y='Cost', source=ds_forecast, legend_label="ExpoSmooth Forecast -- System Trade")
    
    #? Configs must be AFTER Main plots 
    p.toolbar.autohide = True
    p.legend.click_policy="hide"

    show(p)

### 2.] Exponential Smoothing

#### System Trades

In [26]:
# CAT = "System"
LABEL = "Q2"

FC = 30

df_train = df_sysD_19to21[['End Time_D&T','Cost']][dict_train.get(LABEL):-152].set_index("End Time_D&T", drop=True)
df_test = df_sysD_19to21[['End Time_D&T',"Cost"]].set_index("End Time_D&T", drop=True)
df_test = df_test[["Cost"]][df_train.index[-1]+timedelta(days=1): (df_train.index[-1] + timedelta(days=(FC)))]


In [27]:

#* Daily Frequency

#? Optional arguments: seasonal_periods=30, seasonal='add'
model_fit_sysD = ExponentialSmoothing(df_train, trend='add', damped_trend=True, freq='D',
                                initialization_method="heuristic").fit(optimized=True)

df_forecast = pd.DataFrame(model_fit_sysD.forecast(len(df_test.index)).values, 
                        index=df_test.index, columns=['Cost'])

# fig.update_layout(title="System Trades " + str(FC) + "-Day Forecast" + " -- " + LABEL)


In [28]:
go_plotter(df_train, df_test, df_forecast, FC, LABEL, "System")
maape(df_test.values, df_forecast.values)

1.1509091395900442

In [None]:

#? Basis for Bokeh plotting
model_fit_sysD = ExponentialSmoothing(df_sysD_19to21['Cost'][(365+182+1):-152], seasonal_periods=30, trend='add', seasonal='add',
                                    damped_trend=True, initialization_method="heuristic").fit(optimized=True)

df_forecast = pd.DataFrame(model_fit_sysD.forecast(60).values, 
                            index=df_sysD_19to21["End Time_D&T"][-152:-92], columns=['Cost'])

bokeh_plotter(df_train,
            df_test, 
            df_forecast)

#### Energy Trades

In [23]:

LABEL = "Q2"

FC = 45

df_train = df_enrD_19to21[['End Time_D&T','Cost']][dict_train.get(LABEL):-152].set_index("End Time_D&T", drop=True)
df_test = df_enrD_19to21[['End Time_D&T',"Cost"]].set_index("End Time_D&T", drop=True)
df_test = df_test[["Cost"]][df_train.index[-1]+timedelta(days=1): (df_train.index[-1] + timedelta(days=(FC)))]

In [24]:

#? Optional arguments: seasonal_periods=30, seasonal='add'
model_fit_sysD = ExponentialSmoothing(df_train, trend='add', damped_trend=True, freq='D',
                                initialization_method="heuristic").fit(optimized=True)

df_forecast = pd.DataFrame(model_fit_sysD.forecast(len(df_test.index)).values, 
                        index=df_test.index, columns=['Cost'])


In [25]:
go_plotter(df_train, df_test, df_forecast, FC, LABEL, "Energy")
maape(df_test.values, df_forecast.values)

1.073410356961242

_______________________

##### From RitvickMath?

In [None]:

#? Reference O
rolling_predictions = test_data.copy()
for train_end in test_data.index:
    train_data = lim_catfish_sales[:train_end-timedelta(days=1)]
    model = SARIMAX(train_data, order=my_order, seasonal_order=my_seasonal_order)
    model_fit = model.fit()

    pred = model_fit.forecast()
    rolling_predictions[train_end] = pred


### References

1.] MAAPE -- "Thus, MAAPE is scale-independent, can be interpreted intuitively as an absolute percentage error, and is simple to calculate." [https://www.sciencedirect.com/science/article/pii/S0169207016000121]

2.] Forecasting metrics -- forecasting_metrics.py, https://gist.github.com/bshishov/5dc237f59f019b26145648e2124ca1c9

3.] Ignoring warnings -- https://stackoverflow.com/questions/34444607/how-to-ignore-statsmodels-maximum-likelihood-convergence-warning