# Load necessary packages

In [1]:
import pandas as pd
import numpy as np

hex_salmon = '#F68F83'
hex_gold = '#BC9661'
hex_indigo = '#2D2E5F'
hex_maroon = '#8C4750'
hex_white = '#FAFAFA'
hex_blue = '#7EB5D2'

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.dates as dates

import matplotlib.font_manager as font_manager
mpl.font_manager._rebuild()

mpl.rcParams['font.family'] = 'SF Mono'
mpl.rcParams['font.weight'] = 'medium'
mpl.rcParams['axes.titleweight'] = 'semibold'
mpl.rcParams['axes.labelweight'] = 'medium'
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[hex_indigo, hex_salmon, hex_maroon])
mpl.rcParams["figure.titlesize"] = 'large'
mpl.rcParams["figure.titleweight"] = 'semibold'

from termcolor import colored

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression, Ridge, ElasticNet, LassoCV, RidgeCV, ElasticNetCV
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import roc_auc_score, accuracy_score

import tensorflow as tf

In [2]:
features = pd.read_pickle(f"./features.pkl")

In [3]:
features.head(5)

Unnamed: 0,ID3,VOL,MCP,LOAD,LOAD_F,LOAD_FE,ID3 (-4),ID3 (-5),ID3 (-6),ID3 (-7),...,HOD 14,HOD 15,HOD 16,HOD 17,HOD 18,HOD 19,HOD 20,HOD 21,HOD 22,HOD 23
2015-01-08 01:00:00+00:00,22.953776,439.5,32.32,9008.0,8505.25,502.75,29.934792,61.666667,61.118812,61.37037,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 02:00:00+00:00,23.168355,261.5,31.1,8889.25,8222.25,667.0,29.853669,29.934792,61.666667,61.118812,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 03:00:00+00:00,21.0,420.5,30.17,8929.25,8122.25,807.0,24.012378,29.853669,29.934792,61.666667,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 04:00:00+00:00,30.0,460.6,24.54,9423.75,8323.5,1100.25,23.26981,24.012378,29.853669,29.934792,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 05:00:00+00:00,30.0,250.0,32.0,10884.5,9015.0,1869.5,22.953776,23.26981,24.012378,29.853669,...,0,0,0,0,0,0,0,0,0,0


In [4]:
features.tail(5)

Unnamed: 0,ID3,VOL,MCP,LOAD,LOAD_F,LOAD_FE,ID3 (-4),ID3 (-5),ID3 (-6),ID3 (-7),...,HOD 14,HOD 15,HOD 16,HOD 17,HOD 18,HOD 19,HOD 20,HOD 21,HOD 22,HOD 23
2018-12-30 14:00:00+00:00,53.79074,446.6,46.19,13842.5,15329.25,1486.75,76.370821,87.755884,78.709213,52.958116,...,1,0,0,0,0,0,0,0,0,0
2018-12-30 15:00:00+00:00,59.477646,131.6,47.64,14319.25,15644.5,1325.25,63.690401,76.370821,87.755884,78.709213,...,0,1,0,0,0,0,0,0,0,0
2018-12-30 16:00:00+00:00,59.883829,310.1,55.94,15120.75,16285.75,1165.0,56.170316,63.690401,76.370821,87.755884,...,0,0,1,0,0,0,0,0,0,0
2018-12-30 17:00:00+00:00,59.471501,220.9,58.4,14728.75,15555.75,827.0,51.675229,56.170316,63.690401,76.370821,...,0,0,0,1,0,0,0,0,0,0
2018-12-30 18:00:00+00:00,58.44895,288.1,69.48,14296.5,14992.25,695.75,53.79074,51.675229,56.170316,63.690401,...,0,0,0,0,1,0,0,0,0,0


In [5]:
X = features[['ID3 (-4)', 'ID3 (-5)']]
y = features['ID3']

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size = 0.3,
    random_state = 0,
    shuffle = False)

X_train.shape, X_test.shape

((23691, 2), (10154, 2))

In [6]:
X_train_unscaled = X_train
X_test_unscaled = X_test

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train_unscaled)
X_test = scaler.transform(X_test_unscaled)

In [7]:
from ngboost import NGBRegressor

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

ngb = NGBRegressor().fit(X_train, y_train)
Y_preds = ngb.predict(X_test)
Y_dists = ngb.pred_dist(X_test)

# test Mean Squared Error
test_MSE = mean_squared_error(Y_preds, y_test)
print('Test MSE', test_MSE)

# test Negative Log Likelihood
test_NLL = -Y_dists.logpdf(y_test).mean()
print('Test NLL', test_NLL)

[iter 0] loss=4.1456 val_loss=0.0000 scale=1.0000 norm=10.8704
[iter 100] loss=3.8468 val_loss=0.0000 scale=2.0000 norm=17.2446
[iter 200] loss=3.8178 val_loss=0.0000 scale=1.0000 norm=8.5121
[iter 300] loss=3.8080 val_loss=0.0000 scale=1.0000 norm=8.4830
[iter 400] loss=3.8046 val_loss=0.0000 scale=1.0000 norm=8.4613
Test MSE 267.7746395140094
Test NLL 4.251351006034637


In [8]:
# conf = '99.73%'
# conf_std = 3

conf = '95.45%'
conf_std = 2

# conf = '68.27%'
# conf_std = 1

pred_mean = Y_dists.params['loc']
pred_std = Y_dists.params['scale']

test_uncertainty_df = pd.DataFrame()

test_uncertainty_df['mean'] = pred_mean
test_uncertainty_df['lower_bound'] = test_uncertainty_df['mean'] - conf_std*pred_std
test_uncertainty_df['upper_bound'] = test_uncertainty_df['mean'] + conf_std*pred_std
test_uncertainty_df['actual'] = y_test.values

test_uncertainty_df.head(20)

Unnamed: 0,mean,lower_bound,upper_bound,actual
0,40.812165,19.209929,62.414401,45.834303
1,41.357446,19.505841,63.209052,49.971656
2,40.153868,18.850823,61.456914,53.021902
3,40.892899,19.290663,62.495136,17.579661
4,43.523622,20.055009,66.992234,18.143989
5,46.659148,23.144694,70.173602,17.68311
6,47.927365,23.663828,72.190903,17.911371
7,26.009713,0.534195,51.48523,18.389004
8,25.576292,6.651329,44.501255,17.469411
9,24.729843,4.774883,44.684803,10.102772


In [9]:
import plotly.graph_objects as go

test_uncertainty_plot_df = test_uncertainty_df.copy(deep=True)

upper_trace = go.Scatter(
    x=X_test_unscaled.index,
    y=test_uncertainty_plot_df['upper_bound'],
    mode='lines',
    fill=None,
    name=f'{conf} Upper Confidence Bound'
    )

lower_trace = go.Scatter(
    x=X_test_unscaled.index,
    y=test_uncertainty_plot_df['lower_bound'],
    mode='lines',
    fill='tonexty',
    fillcolor='rgba(255, 211, 0, 0.5)',
    name=f'{conf} Lower Confidence Bound'
    )

real_trace = go.Scatter(
    x=X_test_unscaled.index,
    y=test_uncertainty_df['actual'],
    mode='lines',
    fill=None,
    name='Real Values'
    )

mean_trace = go.Scatter(
    x=X_test_unscaled.index,
    y=test_uncertainty_plot_df['mean'],
    mode='lines',
    fill=None,
    name='Mean Values'
    )

data = [upper_trace, lower_trace, mean_trace, real_trace]

fig = go.Figure(data=data)

fig.update_layout(  height = 500,
                    width = 1250,
                    title='Uncertainty Quantification for ID3 forecast',
                    xaxis_title='Timestamp',
                    yaxis_title='ID3 (€)')

fig.layout.font.family = 'SF Mono'

fig.show()

In [10]:
bounds_df = pd.DataFrame()

# Using 99% confidence bounds
bounds_df['lower_bound'] = test_uncertainty_plot_df['lower_bound']
bounds_df['mean'] = test_uncertainty_plot_df['mean']
bounds_df['actual'] = test_uncertainty_plot_df['actual']
bounds_df['upper_bound'] = test_uncertainty_plot_df['upper_bound']

bounds_df['contained'] = ((bounds_df['actual'] >= bounds_df['lower_bound']) &
                          (bounds_df['actual'] <= bounds_df['upper_bound']))

print(f'Proportion of points contained within {conf} confidence interval:', bounds_df['contained'].mean())

Proportion of points contained within 95.45% confidence interval: 0.9204254480992712
