In [1]:
import pandas as pd
import numpy as np

In [2]:
from glob import glob
from pprint import pprint
import dill
import json

In [3]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.layouts import gridplot
import bokeh.palettes as bpal
from bokeh.models import CategoricalColorMapper
from bokeh.transform import factor_cmap, linear_cmap
from bokeh.models import Legend, LinearAxis, Range1d, DatetimeTickFormatter
from bokeh.io import export_png
output_notebook()

import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
import statsmodels.api as sm  
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# from pmdarima import auto_arima
from sklearn.preprocessing import StandardScaler

In [5]:
feat_files = glob('../data/ready-for-model/*.csv')
feat_files

['../data/ready-for-model/20190809_QLDdf_interpolated_features.csv',
 '../data/ready-for-model/2009-18_NEMtotaldemand.csv',
 '../data/ready-for-model/20190809_SAdf_interpolated_features.csv',
 '../data/ready-for-model/20190226_SAdf_features.csv',
 '../data/ready-for-model/20190226_TASdf_features.csv',
 '../data/ready-for-model/20190809_NSWdf_interpolated_features.csv',
 '../data/ready-for-model/20190226_NSWdf_features.csv',
 '../data/ready-for-model/20190226_VICdf_features.csv',
 '../data/ready-for-model/20190809_TASdf_interpolated_features.csv',
 '../data/ready-for-model/20190226_QLDdf_features.csv',
 '../data/ready-for-model/20190809_VICdf_interpolated_features.csv']

In [6]:
fvic = glob('../data/ready-for-model/*VIC*interp*.csv')[0]
fvic

'../data/ready-for-model/20190809_VICdf_interpolated_features.csv'

In [7]:
ftarget = feat_files = glob('../data/ready-for-model/*NEM*.csv')[0]
ftarget

'../data/ready-for-model/2009-18_NEMtotaldemand.csv'

In [8]:
dftarget = pd.read_csv(ftarget, index_col=0, parse_dates=[0])
dftarget.head(3)

Unnamed: 0_level_0,NSW1,QLD1,SA1,TAS1,VIC1,NEMtotal
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-01-01 00:30:00,7535.0,5611.54,1310.89,909.71,4799.87,20167.01
2009-01-01 01:00:00,7229.24,5457.34,1272.69,896.63,4646.21,19502.11
2009-01-01 01:30:00,6857.62,5294.12,1178.87,897.52,4950.16,19178.29


### Setting target variable as the electricity demand ('VIC1') up to (& including) year 2018

In [9]:
target = dftarget[['VIC1']][dftarget.index.year != 2019]
target = target.asfreq('30T')
target.head(3)

Unnamed: 0_level_0,VIC1
SETTLEMENTDATE,Unnamed: 1_level_1
2009-01-01 00:30:00,4799.87
2009-01-01 01:00:00,4646.21
2009-01-01 01:30:00,4950.16


In [10]:
target.isnull().sum()

VIC1    1
dtype: int64

In [11]:
target[target.isnull().any(axis=1)]

Unnamed: 0_level_0,VIC1
SETTLEMENTDATE,Unnamed: 1_level_1
2011-05-01,


In [12]:
target.fillna(method='bfill', inplace=True)

<font color = 'purple'><br>
### dfvic contains all independent variable data, on matching timescale

In [13]:
dfvic = pd.read_csv(fvic, index_col=0, parse_dates=[0])
dfvic = dfvic.asfreq('30T')
dfvic.head(2)

Unnamed: 0_level_0,Date,Hour_of_day,Year,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2009-01-01 00:30:00,2009-01-01,0.5,2009.0,0.0,1.0,0.0,0.0,13.8,15.9,20.1,11.2,12.7,27.4,19.9,9.4
2009-01-01 01:00:00,2009-01-01,1.0,2009.0,0.0,1.0,0.0,0.0,13.8,15.9,20.1,11.2,12.7,27.4,19.9,9.4


In [14]:
dfvic.tail(2)

Unnamed: 0_level_0,Date,Hour_of_day,Year,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-12-31 23:00:00,2018-12-31,23.0,2018.0,0.0,1.0,0.0,1.0,21.3,21.4,31.4,15.8,14.9,36.5,25.4,17.1
2018-12-31 23:30:00,2018-12-31,23.5,2018.0,0.0,1.0,0.0,1.0,21.3,21.4,31.4,15.8,14.9,36.5,25.4,17.1


In [15]:
dfvic.isnull().sum()

Date                            0
Hour_of_day                     0
Year                            0
shoulder                        0
summer                          0
winter                          0
workdayVIC                      0
MILDURA-AIRPORT_MinT_76031      0
CAPE-NELSON_MaxT_90184          0
MORWELL_MaxT_85280              0
MELBOURNE-AIRPORT_MinT_86282    0
CAPE-NELSON_MinT_90184          0
MILDURA-AIRPORT_MaxT_76031      0
MELBOURNE-AIRPORT_MaxT_86282    0
MORWELL_MinT_85280              0
dtype: int64

<font color = 'purple'><br>
### dfmerged will contain both the target variable and all independent variables 2009 - 2018

<font color = 'purple'><br>
### Creating StandardScaled set of exogenous features

In [16]:
dfvic = dfvic.drop(columns=['Date','Year','Hour_of_day'])
dfvic = dfvic.asfreq('30T')
dfvic.head(3)

Unnamed: 0_level_0,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2009-01-01 00:30:00,0.0,1.0,0.0,0.0,13.8,15.9,20.1,11.2,12.7,27.4,19.9,9.4
2009-01-01 01:00:00,0.0,1.0,0.0,0.0,13.8,15.9,20.1,11.2,12.7,27.4,19.9,9.4
2009-01-01 01:30:00,0.0,1.0,0.0,0.0,13.8,15.9,20.1,11.2,12.7,27.4,19.9,9.4


In [17]:
# print(dfvic.index.freq)
# print(dfvic.isnull().sum())

In [18]:
ssv = StandardScaler()

In [19]:
vfeatures = pd.DataFrame(data=ssv.fit_transform(dfvic), index=dfvic.index, columns=dfvic.columns)
vfeatures.head()

Unnamed: 0_level_0,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2009-01-01 00:30:00,-1.002199,1.746096,-0.580304,-1.477836,0.489082,-0.439855,-0.074605,0.277046,0.323261,0.308065,-0.107087,0.182368
2009-01-01 01:00:00,-1.002199,1.746096,-0.580304,-1.477836,0.489082,-0.439855,-0.074605,0.277046,0.323261,0.308065,-0.107087,0.182368
2009-01-01 01:30:00,-1.002199,1.746096,-0.580304,-1.477836,0.489082,-0.439855,-0.074605,0.277046,0.323261,0.308065,-0.107087,0.182368
2009-01-01 02:00:00,-1.002199,1.746096,-0.580304,-1.477836,0.489082,-0.439855,-0.074605,0.277046,0.323261,0.308065,-0.107087,0.182368
2009-01-01 02:30:00,-1.002199,1.746096,-0.580304,-1.477836,0.489082,-0.439855,-0.074605,0.277046,0.323261,0.308065,-0.107087,0.182368


In [20]:
dfmerged = pd.merge(target, vfeatures, how='left', left_index=True, right_index=True)
dfmerged = dfmerged.asfreq('30T')
# dfmerged.dropna(inplace=True)  # this should only get rid of last row, first half hour of 2019 with some NaN values
dfmerged.tail(3)

Unnamed: 0_level_0,VIC1,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-12-31 22:30:00,4257.85,-1.002199,1.746096,-0.580304,0.676665,1.711513,0.71865,1.664203,1.290242,1.012775,1.482731,0.716808,1.787423
2018-12-31 23:00:00,4247.55,-1.002199,1.746096,-0.580304,0.676665,1.711513,0.71865,1.664203,1.290242,1.012775,1.482731,0.716808,1.787423
2018-12-31 23:30:00,4439.96,-1.002199,1.746096,-0.580304,0.676665,1.711513,0.71865,1.664203,1.290242,1.012775,1.482731,0.716808,1.787423


In [21]:
dfmerged.isnull().sum()

VIC1                            0
shoulder                        0
summer                          0
winter                          0
workdayVIC                      0
MILDURA-AIRPORT_MinT_76031      0
CAPE-NELSON_MaxT_90184          0
MORWELL_MaxT_85280              0
MELBOURNE-AIRPORT_MinT_86282    0
CAPE-NELSON_MinT_90184          0
MILDURA-AIRPORT_MaxT_76031      0
MELBOURNE-AIRPORT_MaxT_86282    0
MORWELL_MinT_85280              0
dtype: int64

<font color = 'purple'><i>
### This was just for creating graphs for a powerpoint presentation

In [22]:
dtformats = DatetimeTickFormatter(microseconds = ['%fus'],
milliseconds = ['%3Nms', '%S.%3Ns'],
seconds = ['%Ss'],
minsec = [':%M:%S'],
minutes = [':%M', '%Mm'],
hourmin = ['%H:%M'],
hours = ['%Hh', '%H:%M'],
days = ['%d/%m', '%a%d'],
months = ['%m/%Y', '%b %Y'],
years = ['%Y'])

In [23]:
# v = figure(plot_height = 400, plot_width = 850, x_axis_type = 'datetime')

# v.extra_y_ranges = {'temp' : Range1d(start=0, end=45)}
# v.xaxis.formatter = dtformats
# v.add_layout(LinearAxis(y_range_name='temp'), 'right')
# v.line(target.index, target.VIC1, color = 'purple')
# v.line(target.index, dfvic['MELBOURNE-AIRPORT_MaxT_86282'][dfvic.index.isin(target.index)], color = 'orange', y_range_name='temp')
# v.line(target.index, dfvic['MELBOURNE-AIRPORT_MinT_86282'][dfvic.index.isin(target.index)], color = 'aqua', y_range_name='temp')
# v.vbar(target.index - pd.to_timedelta(15, unit='m'), width=pd.to_timedelta(30, unit='m'), 
#        top=dfvic['workdayVIC'][dfvic.index.isin(target.index)]*100, fill_alpha=0.3, fill_color = 'green', 
#        line_alpha=0, y_range_name='temp')

# # show(v)

In [24]:
# from statsmodels.tsa.stattools import adfuller
# def test_stationarity(timeseries):

#     #Determing rolling statistics
#     rolmean = timeseries.rolling(window=12, center=False).mean()
#     rolstd = timeseries.rolling(window=12, center=False).std()

#     #Plot rolling statistics:
#     fig = plt.figure(figsize=(12, 8))
#     orig = plt.plot(timeseries, color='blue',label='Original')
#     mean = plt.plot(rolmean, color='red', label='Rolling Mean')
#     std = plt.plot(rolstd, color='black', label = 'Rolling Std')
#     plt.legend(loc='best')
#     plt.title('Rolling Mean & Standard Deviation')
#     plt.show()
    
#     #Perform Dickey-Fuller test:
#     print('Results of Dickey-Fuller Test:')
#     dftest = adfuller(timeseries, autolag='AIC')
#     dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
#     for key,value in list(dftest[4].items()):
#         dfoutput['Critical Value (%s)'%key] = value
#     print(dfoutput) 

In [25]:
# test_stationarity(target.VIC1)

In [26]:
print(dfmerged.index.freq)

<30 * Minutes>


In [90]:
dfroll = dfmerged[dfmerged.index.year > 2016].copy()
dfroll.head(3)

Unnamed: 0_level_0,VIC1,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-01-01 00:00:00,4341.78,-1.002199,1.746096,-0.580304,-1.477836,1.010653,0.023547,0.140823,1.24619,0.636677,0.553325,0.177531,1.47475
2017-01-01 00:30:00,4097.58,-1.002199,1.746096,-0.580304,-1.477836,1.010653,0.023547,0.140823,1.24619,0.636677,0.553325,0.177531,1.47475
2017-01-01 01:00:00,4034.74,-1.002199,1.746096,-0.580304,-1.477836,1.010653,0.023547,0.140823,1.24619,0.636677,0.553325,0.177531,1.47475


In [91]:
dfroll.insert(1,"predicted",np.NaN)
dfroll.head(3)

Unnamed: 0_level_0,VIC1,predicted,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2017-01-01 00:00:00,4341.78,,-1.002199,1.746096,-0.580304,-1.477836,1.010653,0.023547,0.140823,1.24619,0.636677,0.553325,0.177531,1.47475
2017-01-01 00:30:00,4097.58,,-1.002199,1.746096,-0.580304,-1.477836,1.010653,0.023547,0.140823,1.24619,0.636677,0.553325,0.177531,1.47475
2017-01-01 01:00:00,4034.74,,-1.002199,1.746096,-0.580304,-1.477836,1.010653,0.023547,0.140823,1.24619,0.636677,0.553325,0.177531,1.47475


In [81]:
# %%time
# X_train = dfroll.iloc[:48*30,2:]
# Y_train = dfroll.iloc[:48*30,0]
# x_test = dfroll.iloc[48*30:48*37,2:]

# model = sm.tsa.statespace.SARIMAX(endog=Y_train, exog=X_train, order=(1,1,1), seasonal_order=(1,0,0,48)).fit(disp=False)



CPU times: user 1min 52s, sys: 368 ms, total: 1min 53s
Wall time: 57.7 s


In [96]:
%%time
X_train = dfroll[dfroll.index.year == 2017].iloc[:,2:]
Y_train = dfroll[dfroll.index.year == 2017].iloc[:,0]

model = sm.tsa.statespace.SARIMAX(endog=Y_train, exog=X_train, order=(1,1,1), seasonal_order=(1,0,0,48)).fit(disp=False)

CPU times: user 17min 12s, sys: 3.28 s, total: 17min 16s
Wall time: 8min 44s


In [82]:
# print(model.summary())

In [100]:
# with open('./pickles/smx_model_vic_2017.pickle', 'wb') as outfile:
#     dill.dump(model, outfile)
# ## is 3.6Gb ...

In [108]:
x_test = dfroll[dfroll.index.year == 2018].iloc[:,2:]
x_test.head(3)

Unnamed: 0_level_0,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-01 00:00:00,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 00:30:00,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 01:00:00,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905


In [109]:
x_test = x_test.iloc[:48*7,:]
x_test.tail(3)

Unnamed: 0_level_0,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-07 22:30:00,-1.002199,1.746096,-0.580304,-1.477836,1.613718,0.634396,0.540903,1.268216,1.1068,1.792534,0.207491,1.745733
2018-01-07 23:00:00,-1.002199,1.746096,-0.580304,-1.477836,1.613718,0.634396,0.540903,1.268216,1.1068,1.792534,0.207491,1.745733
2018-01-07 23:30:00,-1.002199,1.746096,-0.580304,-1.477836,1.613718,0.634396,0.540903,1.268216,1.1068,1.792534,0.207491,1.745733


In [110]:
%%time
yhat = model.predict(start=X_train.index.max()+1, end=X_train.index.max()+48*7, exog=x_test)
yhat

CPU times: user 2.18 s, sys: 633 ms, total: 2.81 s
Wall time: 1.51 s


In [111]:
yhat.head()

2018-01-01 00:00:00    4469.344680
2018-01-01 00:30:00    4378.367193
2018-01-01 01:00:00    4292.777278
2018-01-01 01:30:00    4239.025831
2018-01-01 02:00:00    4144.631774
Freq: 30T, dtype: float64

In [112]:
dfroll.loc[yhat.index,'predicted'] = yhat
dfroll.loc[yhat.index]

Unnamed: 0,VIC1,predicted,shoulder,summer,winter,workdayVIC,MILDURA-AIRPORT_MinT_76031,CAPE-NELSON_MaxT_90184,MORWELL_MaxT_85280,MELBOURNE-AIRPORT_MinT_86282,CAPE-NELSON_MinT_90184,MILDURA-AIRPORT_MaxT_76031,MELBOURNE-AIRPORT_MaxT_86282,MORWELL_MinT_85280
2018-01-01 00:00:00,4445.07,4469.344680,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 00:30:00,4251.18,4378.367193,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 01:00:00,4092.53,4292.777278,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 01:30:00,3958.95,4239.025831,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 02:00:00,3785.27,4144.631774,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 02:30:00,3673.72,4075.526184,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 03:00:00,3574.67,4051.417408,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 03:30:00,3522.81,4040.396660,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 04:00:00,3524.25,4042.200102,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905
2018-01-01 04:30:00,3534.15,4064.830445,-1.002199,1.746096,-0.580304,-1.477836,0.570578,0.655459,0.864044,0.893774,1.012775,0.863127,0.836648,1.453905


## Conclusion to date: SARIMAX seems more trouble than it is worth...

In [113]:
p = figure(x_axis_type='datetime', plot_width = 850, plot_height = 400)
p.xaxis.formatter = dtformats
p.line(dfroll.index, dfroll.VIC1, color='blue')
p.line(dfroll.index, dfroll.predicted, color='red')

show(p)