In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=False)
pd.set_option('display.max_colwidth', 100)

In [None]:
from sklearn.model_selection import KFold, train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from evaluation_metrics import compute_metrics, compute_metrics_csv, mean_absolute_percentage_error, symetric_mean_absolute_percentage_error

In [None]:
from statsmodels.tsa.stattools import acf, pacf, ccf, ccovf
from statsmodels.tsa.seasonal import STL
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf 
from statsmodels.tsa.stattools import adfuller, kpss
from scipy.stats import boxcox, yeojohnson
from scipy.spatial.distance import cosine, euclidean
import rstl

In [None]:
from scipy.stats import boxcox, yeojohnson

In [None]:
from tqdm.notebook import trange, tqdm
from datetime import datetime
import itertools
from numpy.polynomial import Polynomial as P

In [None]:
import pickle

In [None]:
import ruptures as rpt
# from dtaidistance import dtw

In [None]:
from river import tree

In [None]:
df = pd.read_csv('./Data/ppnet_metar_v8_2021.csv', sep=';', index_col=0)

In [None]:
df = df[df.Year < 2021].copy()

In [None]:
df_temp_forecast = pd.read_csv('./Data/yrno_temperature.csv', sep=';', index_col=0, decimal=',')
df_temp_forecast.index = pd.DatetimeIndex(pd.Series(df_temp_forecast.index).apply(lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M')))
df_temp_forecast = df_temp_forecast.iloc[:, 1].groupby(df_temp_forecast.index).mean()

In [None]:
df_el = pd.read_csv('./Data/ppnet_metar_v8_electricity.csv', sep=';', index_col=0).Load_ffill
df_el.index = pd.DatetimeIndex(df_el.index)

In [None]:
df.loc[:, 'TestSet'] = 0
df.loc[df.Year == 2020, 'TestSet'] = 1

In [None]:
df.head()

In [None]:
df.shape

## We can see that variance and median levels differ among the months

In [None]:
# fig = px.box(df, y='Consumption', color='Month', facet_row='TestSet')
# fig.update_layout(
#     height=1000
# )

In [None]:
# px.line(y=df['Consumption'], x=df.index, color=df.Year)

In [None]:
px.line(y=df['Consumption'], x=df.index, color=df.TestSet)

In [None]:
df_gas = pd.read_csv('./Data/ppnet_metar_v8_2021.csv', sep=';', index_col=0)
df_gas.index = pd.DatetimeIndex(df_gas.index)
df_gas['Month'] = df_gas.index.month_name()
df_gas['Day'] = df_gas.index.day_name()

In [None]:
df_el = pd.read_csv('./Data/raw_electricity_2021.csv', sep=',', index_col=0)
df_el.index = pd.DatetimeIndex(df_el.index)
df_el['Month'] = df_el.index.month_name()
df_el['Day'] = df_el.index.day_name()

In [None]:
df_el

In [None]:
df.index = pd.DatetimeIndex(df.index)

In [None]:
df['Temperature_YRNO'] = pd.Series(df_temp_forecast)

In [None]:
temp_err = (df.Temperature - df.Temperature_YRNO).dropna()
temp_err.describe()

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
df.describe(exclude='O').columns

In [None]:
# df['Consumption'] = np.cbrt(df['Consumption'])

In [None]:
FORECAST_HORIZON = 24

In [None]:
df.loc[df.index <= '2014-12-31 23:00:00', 'Temperature_YRNO'] = df.loc[df.index <= '2014-12-31 23:00:00', 'Temperature']
df.loc[:, 'Temperature_YRNO'] = df.loc[:, 'Temperature_YRNO'].interpolate()

In [None]:
X, y = df.loc[:, df.columns != 'Consumption'].copy(), df.Consumption.copy()

In [None]:
X.columns

In [None]:
X.isna().sum()

In [None]:
y.isna().sum()

In [None]:
X = X.loc[:, ['Year', 'Month', 'Day', 'Hour', 'Day_of_week', 'Before_holiday', 'Holiday', 'Temperature', 'Cena', 'Temperature_YRNO', 'Wind_speed', 'Humidity']]

In [None]:
X = X.rename({'Cena' : 'Price'}, axis=1)

In [None]:
def create_lagged_dataset(X, y, past_lags=24, future_temp_forecast_lags=24):
    X_in, y_out = X.copy().drop(['Year'], axis=1), pd.DataFrame(columns=[f'H{x}' for x in range(1, 25)], index=y.index)
    
    for i, x in enumerate(y_out.columns):
        y_out[f'H{i+1}'] = y.shift(-i-1)
    
    lagged_columns = ['Temperature', 'Price', 'Temperature_YRNO', 'Wind_speed', 'Humidity']
    for c in lagged_columns:
        for i in range(1, past_lags+1):
            X_in[f'LAG_{c}_{i}'] = X[c].shift(i)
    
    for i in range(1, future_temp_forecast_lags + 1):
        c = 'Temperature_YRNO'
        X_in[f'LAG_{c}_{i}'] = X[c].shift(-i)
        
    for i in range(1, past_lags + 1):
        c = 'Consumption'
        X_in[f'LAG_{c}_{i}'] = y.shift(i)
    
    return (X_in, y_out)

In [None]:
past_lags, future_temp_forecast_lags = 72, 24
X_in, y_out = create_lagged_dataset(X, y, past_lags, future_temp_forecast_lags)

In [None]:
X_in.head()

In [None]:
y_out.head()

In [None]:
cp_params = {'l1': {111: 722870.3350949581, 62: 2132352.798169759, 46: 4274373.643209655},
 'l2': {144: 14563484775.012444,
  95: 59636233165.946365,
  49: 223876329036.64392,
  47: 244205309454.86548},
 'rbf': {88: 15.142857142857142, 66: 25.24489795918367, 47: 45.44897959183673}}

In [None]:
min_size = 24 * 7
jump = 24
model = 'l2'
pen = cp_params[model][47]
algo = rpt.Pelt(model=model, min_size=min_size, jump=jump)
algo.fit(y.values)
my_bkps = algo.predict(pen=pen)

In [None]:
def plot_bkps(data, my_bkps, title=None, ret=False):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data.index, y=data.values,
                            mode='lines',
                            line_color='#333C83'))
    # fig = px.line(x=data.index, y=data.values, title=title)
    clr_selection = 'green'
    color_switch = lambda x: 'blue' if x != 'blue' else 'green'

    for idx, cp in enumerate(my_bkps):
        if cp >= len(data):
            break

        fig.add_vline(x=data.index[cp], line_width=3, line_dash="dash", line_color="red")

        if idx < len(my_bkps) - 2:
            clr_selection = color_switch(clr_selection)
            fig.add_vrect(x0=data.index[cp], x1=data.index[my_bkps[idx+1]], line_width=0, fillcolor=clr_selection, opacity=0.1)
    
    # fig.data = fig.data[::-1]
    fig.update_layout(title=title)
    if ret:
        return fig
    fig.show()

In [None]:
plot_bkps(y, my_bkps)

In [None]:
X_in_nona, y_out_nona = X_in.iloc[past_lags:-future_temp_forecast_lags, :], y_out.iloc[past_lags:-future_temp_forecast_lags, :]
X_in_nona_midnight, y_out_nona_midnight = X_in_nona[X_in_nona.index.hour == 0], y_out_nona[y_out_nona.index.hour == 0]
X_in_nona_midnight, y_out_nona_midnight = X_in_nona_midnight.iloc[1:, :], y_out_nona_midnight.iloc[1:, :]

In [None]:
models_dict = {c: tree.HoeffdingTreeRegressor(grace_period=7, leaf_prediction='adaptive', model_selector_decay=0.2, tau=0.5) for c in y_out.columns}
# models_dict = {c: tree.SGTRegressor(grace_period=20) for c in y_out.columns} # ZeroDivisionError: float division by zero

In [None]:
y_out_nona_midnight[y_out_nona_midnight.index <= '2014-12-30']

In [None]:
df_forecast = pd.DataFrame(columns=y_out_nona_midnight.columns, index=y_out_nona_midnight.index)
limit = y_out_nona_midnight.shape[0]
# limit = y_out_nona_midnight[y_out_nona_midnight.index <= '2014-12-30'].shape[0]
idx = 0
for inp, out in tqdm(zip(X_in_nona_midnight[:limit].iterrows(), y_out_nona_midnight[:limit].iterrows()), total=limit):
    y_pred_arr = np.zeros(len(y_out_nona.columns))
    for k, c in enumerate(y_out_nona.columns):
        m = models_dict[c]
        y_pred_arr[k] = m.predict_one(inp[1].to_dict())
        # print(inp, out)
        m.learn_one(inp[1].to_dict(), out[1][k])
    
    df_forecast.iloc[idx, :] = y_pred_arr
    idx += 1

In [None]:
df_forecast.dropna()

In [None]:
y_out_selection = y_out_selection[(y_out_selection.index >= '2013-01-02 00:00:00') & (y_out_selection.index <= '2020-12-30 23:00:00')]
y_out_selection

In [None]:
df_forecast

In [None]:
# y_out_selection.index[72:24*364*2]

In [None]:
df_forecast_res = df_forecast[(df_forecast.index <= '2020-12-30') & (df_forecast.index >= '2013-01-03')]
df_forecast_res = df_forecast_res.stack().reset_index(name='y_pred')
# df_forecast_res.index = y_out_selection.index[72:24*364*2]
df_forecast_res.index = y_out_selection.index[72:]

df_forecast_res = df_forecast_res.shift()
df_forecast_res = df_forecast_res.dropna()
df_forecast_res = df_forecast_res.drop(['level_0', 'level_1'], axis=1)
df_forecast_res['y_true'] = y_out_selection.iloc[1:]
df_forecast_res

In [None]:
df_forecast_res.to_csv('Results/NGC_HT_Day2Day_Online_2021.csv', sep=',')

In [None]:
# df_forecast_res.to_csv('NCG_HT_Day2Day_Online_grace_period=100_model_selector_decay=0.9.csv', sep=';')

In [None]:
compute_metrics(df_forecast_res[df_forecast_res.index >= '2013-01-05 00:00:00'].dropna())

In [None]:
compute_metrics(df_forecast_res[df_forecast_res.index >= '2014-01-01 00:00:00'].dropna())

In [None]:
df_pred_all = pd.DataFrame(columns=['MAE', 'MSE', 'MAPE', 'SMAPE', 'R2', 'WAPE'], index=[x for x in range(2014, 2021)])

for year in range(2014, 2021):
    tmp = compute_metrics(df_forecast_res[(df_forecast_res.index >= f'{year}-01-01 00:00:00') & (df_forecast_res.index <= f'{year}-12-31 00:00:00')].dropna()).values
    df_pred_all.loc[year, :] = tmp
df_pred_all

In [None]:
df_forecast_res_s = df_forecast_res[df_forecast_res.index >= '2013-01-05 00:00:00'].stack().reset_index().rename({'level_1': 'Type', 0: 'Value'}, axis=1)

In [None]:
df_forecast_res_s

In [None]:
df_err = df_forecast_res[df_forecast_res.index >= '2013-01-05 00:00:00'].copy()
df_err.loc[:, 'AE'] = (df_err.y_true - df_err.y_pred).abs()
df_err.loc[:, 'SE'] = (df_err.y_true - df_err.y_pred) ** 2
df_err.loc[:, 'SAPE'] = ((df_err.y_pred - df_err.y_true).abs() / ((df_err.y_true.abs() + df_err.y_pred.abs())/2)) * 100
df_err

In [None]:
def plot_pred_bkps(data, my_bkps, from_dt=None, title=None):
    if from_dt:
        data = data[data.index >= from_dt]
    my_bkps = [x for x in my_bkps if x < len(data)]
    
    df_err = data.copy()
    df_err.loc[:, 'AE'] = (df_err.y_true - df_err.y_pred).abs()
    df_err.loc[:, 'SE'] = (df_err.y_true - df_err.y_pred) ** 2
    df_err.loc[:, 'SAPE'] = ((df_err.y_pred - df_err.y_true).abs() / ((df_err.y_true.abs() + df_err.y_pred.abs())/2)) * 100
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data.index, y=data.y_true,
                            mode='lines',
                            line_color='#333C83', name='y_true'))
    
    fig.add_trace(go.Scatter(x=data.index, y=data.y_pred,
                            mode='lines',
                            line_color='red', name='y_pred'))
    clr_selection = 'green'
    color_switch = lambda x: 'blue' if x != 'blue' else 'green'
    
    for idx, cp in enumerate(my_bkps):
        if cp >= len(data):
            break

        fig.add_vline(x=data.index[cp], line_width=3, line_dash="dash", line_color="red")

        if idx < len(my_bkps) - 2:
            clr_selection = color_switch(clr_selection)
            fig.add_vrect(x0=data.index[cp], x1=data.index[my_bkps[idx+1]], line_width=0, fillcolor=clr_selection, opacity=0.1)
    
    fig.update_layout(title=title)
    fig.show()
    
    for roll in [24, 24*7]:
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=data.index, y=df_err['SAPE'].rolling(roll).mean(),
                                mode='lines',
                                line_color='#333C83', name=f'SAPE rolling({roll})'))

        for idx, cp in enumerate(my_bkps):
                if cp >= len(data):
                    break

                fig.add_vline(x=data.index[cp], line_width=3, line_dash="dash", line_color="red")

        fig.update_layout(title=f'SAPE rolling({roll})')
        fig.show()
    
    for x in reversed(['AE', 'SE', 'SAPE']):
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=data.index, y=df_err[x],
                                mode='lines',
                                line_color='#333C83', name=x))

        for idx, cp in enumerate(my_bkps):
            if cp >= len(data):
                break

            fig.add_vline(x=data.index[cp], line_width=3, line_dash="dash", line_color="red")
        fig.update_layout(title=x)
        fig.show()

In [None]:
plot_pred_bkps(df_forecast_res, my_bkps, '2014-01-05 00:00:00')