In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=False)
pd.set_option('display.max_colwidth', 100)

In [None]:
from sklearn.model_selection import KFold, train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from evaluation_metrics import compute_metrics, compute_metrics_csv, mean_absolute_percentage_error, symetric_mean_absolute_percentage_error

In [None]:
from statsmodels.tsa.stattools import acf, pacf, ccf, ccovf
from statsmodels.tsa.seasonal import STL
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf 
from statsmodels.tsa.stattools import adfuller, kpss
from scipy.stats import boxcox, yeojohnson
from scipy.spatial.distance import cosine, euclidean
import rstl

In [None]:
from scipy.stats import boxcox, yeojohnson

In [None]:
from tqdm.notebook import trange, tqdm
from datetime import datetime
import itertools
from numpy.polynomial import Polynomial as P

In [None]:
import pickle

In [None]:
import ruptures as rpt
# from dtaidistance import dtw

In [None]:
from river import tree

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, GRU, Flatten
from tensorflow import random as tf_random

In [None]:
SEED = 13
tf_random.set_seed(SEED)
np.random.seed(SEED)

In [None]:
df = pd.read_csv('./Data/ppnet_metar_v8_2021.csv', sep=';', index_col=0)

In [None]:
df = df[df.Year < 2021].copy()

In [None]:
df_temp_forecast = pd.read_csv('./Data/yrno_temperature.csv', sep=';', index_col=0, decimal=',')
df_temp_forecast.index = pd.DatetimeIndex(pd.Series(df_temp_forecast.index).apply(lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M')))
df_temp_forecast = df_temp_forecast.iloc[:, 1].groupby(df_temp_forecast.index).mean()

In [None]:
df_el = pd.read_csv('./Data/ppnet_metar_v8_electricity.csv', sep=';', index_col=0).Load_ffill
df_el.index = pd.DatetimeIndex(df_el.index)

In [None]:
df.loc[:, 'TestSet'] = 0
df.loc[df.Year == 2020, 'TestSet'] = 1

In [None]:
df.head()

In [None]:
df.shape

## We can see that variance and median levels differ among the months

In [None]:
# fig = px.box(df, y='Consumption', color='Month', facet_row='TestSet')
# fig.update_layout(
#     height=1000
# )

In [None]:
# px.line(y=df['Consumption'], x=df.index, color=df.Year)

In [None]:
px.line(y=df['Consumption'], x=df.index, color=df.TestSet)

In [None]:
df_gas = pd.read_csv('./Data/ppnet_metar_v8_2021.csv', sep=';', index_col=0)
df_gas.index = pd.DatetimeIndex(df_gas.index)
df_gas['Month'] = df_gas.index.month_name()
df_gas['Day'] = df_gas.index.day_name()

In [None]:
# df_el = pd.read_csv('./Data/raw_electricity_2021.csv', sep=',', index_col=0)
# df_el.index = pd.DatetimeIndex(df_el.index)
# df_el['Month'] = df_el.index.month_name()
# df_el['Day'] = df_el.index.day_name()

In [None]:
# df_el

In [None]:
df.index = pd.DatetimeIndex(df.index)

In [None]:
df['Temperature_YRNO'] = pd.Series(df_temp_forecast)

In [None]:
temp_err = (df.Temperature - df.Temperature_YRNO).dropna()
temp_err.describe()

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
df.describe(exclude='O').columns

In [None]:
# df['Consumption'] = np.cbrt(df['Consumption'])

In [None]:
FORECAST_HORIZON = 24

In [None]:
df.loc[df.index <= '2014-12-31 23:00:00', 'Temperature_YRNO'] = df.loc[df.index <= '2014-12-31 23:00:00', 'Temperature']
df.loc[:, 'Temperature_YRNO'] = df.loc[:, 'Temperature_YRNO'].interpolate()

In [None]:
X, y = df.loc[:, df.columns != 'Consumption'].copy(), df.Consumption.copy()

In [None]:
X.columns

In [None]:
X.isna().sum()

In [None]:
y.isna().sum()

In [None]:
X = X.loc[:, ['Year', 'Month', 'Day', 'Hour', 'Day_of_week', 'Before_holiday', 'Holiday', 'Temperature', 'Cena', 'Temperature_YRNO', 'Wind_speed', 'Humidity']]

In [None]:
X = X.rename({'Cena' : 'Price'}, axis=1)

In [None]:
def create_lagged_dataset(X, y, past_lags=24, future_temp_forecast_lags=24):
    X_in, y_out = X.copy().drop(['Year'], axis=1), pd.DataFrame(columns=[f'H{x}' for x in range(1, 25)], index=y.index)
    
    for i, x in enumerate(y_out.columns):
        y_out[f'H{i+1}'] = y.shift(-i-1)
    
    lagged_columns = ['Temperature', 'Price', 'Temperature_YRNO', 'Wind_speed', 'Humidity']
    for c in lagged_columns:
        for i in range(1, past_lags+1):
            X_in[f'LAG_{c}_{i}'] = X[c].shift(i)
    
    for i in range(1, future_temp_forecast_lags + 1):
        c = 'Temperature_YRNO'
        X_in[f'LAG_{c}_{i}'] = X[c].shift(-i)
        
    for i in range(1, past_lags + 1):
        c = 'Consumption'
        X_in[f'LAG_{c}_{i}'] = y.shift(i)
    
    return (X_in, y_out)

In [None]:
X.head()

In [None]:
X_scaler, y_scaler = MinMaxScaler(), MinMaxScaler()
X_scaler.fit(X.loc[X.index.year == 2013])
X_scaled = pd.DataFrame(X_scaler.transform(X), columns=X.columns, index=X.index)

y_scaler.fit(y[X.index.year == 2013].values.reshape(-1, 1))
y_scaled = y_scaler.transform(y.values.reshape(-1, 1))
y_scaled = pd.DataFrame(y_scaled, columns=['Consumption'], index=y.index)

In [None]:
def create_lstm_dataset(X, y, lags=72, future_steps=24):
    lagged_columns = ['Temperature', 'Price', 'Temperature_YRNO', 'Wind_speed', 'Humidity', 'Month', 'Day', 'Hour', 'Day_of_week', 'Before_holiday',
       'Holiday', 'Consumption']
    df_combined = pd.concat([X, y], axis=1)
    X_selected = df_combined.loc[:, lagged_columns].dropna()

    X_seq, y_seq, index_seq = [], [], []
    for i in trange(len(X_selected) - lags - future_steps):
        X_past = X_selected.iloc[i:i+lags, :-1].values
        future_temp = X_selected.iloc[i+lags:i+lags+future_steps, 2].values
        padded = np.pad(future_temp, (0, lags - len(future_temp)), 'constant', constant_values=0.0)
        X_with_future_temp = np.append(X_past, padded.reshape(-1, 1), axis=1)
        current_index = X_selected.iloc[i+lags:i+lags+future_steps, -1].index

        if current_index[0].hour != 0:
            continue

        X_seq.append(X_with_future_temp)
        y_seq.append(X_selected.iloc[i+lags:i+lags+future_steps, -1].values)
        index_seq.append(current_index)

    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)
    index_seq = np.array(index_seq)

    return X_seq, y_seq, index_seq


In [None]:
past_lags, future_steps = 72, 24
X_in, y_out, index_seq = create_lstm_dataset(X_scaled, y_scaled, past_lags, future_steps)

In [None]:
X_in_orig, y_out_orig, _ = create_lstm_dataset(X, y, past_lags, future_steps)

In [None]:
X_in.shape

In [None]:
y_out.shape

In [None]:
index_seq.shape

In [None]:
index_seq

In [None]:
cp_params = {'l1': {111: 722870.3350949581, 62: 2132352.798169759, 46: 4274373.643209655},
 'l2': {144: 14563484775.012444,
  95: 59636233165.946365,
  49: 223876329036.64392,
  47: 244205309454.86548},
 'rbf': {88: 15.142857142857142, 66: 25.24489795918367, 47: 45.44897959183673}}

In [None]:
min_size = 24 * 7
jump = 24
model = 'l2'
pen = cp_params[model][47]
algo = rpt.Pelt(model=model, min_size=min_size, jump=jump)
algo.fit(y.values)
my_bkps = algo.predict(pen=pen)

In [None]:
def plot_bkps(data, my_bkps, title=None, ret=False):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data.index, y=data.values,
                            mode='lines',
                            line_color='#333C83'))
    # fig = px.line(x=data.index, y=data.values, title=title)
    clr_selection = 'green'
    color_switch = lambda x: 'blue' if x != 'blue' else 'green'

    for idx, cp in enumerate(my_bkps):
        if cp >= len(data):
            break

        fig.add_vline(x=data.index[cp], line_width=3, line_dash="dash", line_color="red")

        if idx < len(my_bkps) - 2:
            clr_selection = color_switch(clr_selection)
            fig.add_vrect(x0=data.index[cp], x1=data.index[my_bkps[idx+1]], line_width=0, fillcolor=clr_selection, opacity=0.1)
    
    # fig.data = fig.data[::-1]
    fig.update_layout(title=title)
    if ret:
        return fig
    fig.show()

In [None]:
plot_bkps(y, my_bkps)

In [None]:
X_in_nona_midnight, y_out_nona_midnight = X_in, y_out
# X_in_nona_midnight, y_out_nona_midnight = X_in_nona_midnight.iloc[1:, :], y_out_nona_midnight.iloc[1:, :]

In [None]:
X_in_nona_midnight.shape

In [None]:
ANN_SIZE = {
    'BIG': [256, 128], 
    'MEDIUM': [128, 64], 
    'SMALL': [64, 32]
}

SELECTED_SIZE = 'SMALL'

In [None]:
model = Sequential()
model.add(GRU(ANN_SIZE[SELECTED_SIZE][0], input_shape=(X_in_nona_midnight.shape[1],X_in_nona_midnight.shape[2]), activation='relu', return_sequences=True))  # Adjust units as needed
model.add(Flatten())
model.add(Dense(ANN_SIZE[SELECTED_SIZE][1], activation='relu'))  # Adjust units as needed
model.add(Dense(y_out_nona_midnight.shape[1], activation='sigmoid'))  # Output layer with linear activation for regression

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [None]:
model.summary()

In [None]:
predicted_values_arr = []
losses = []
for i in trange(len(X_in_nona_midnight)):
    instance_X = X_in_nona_midnight[i].reshape(1, X_in_nona_midnight.shape[1], X_in_nona_midnight.shape[2])
    instance_y = y_out_nona_midnight[i].reshape(1, y_out_nona_midnight.shape[1])
    # Predict before training
    predicted_values = model.predict(instance_X)
    predicted_values_arr.append(predicted_values)

    # Train on the new instance
    loss, mae = model.train_on_batch(instance_X, instance_y)
    losses.append((loss, mae))

In [None]:
# Plot losses
losses = np.array(losses)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(losses)), y=losses[:, 0], mode='lines', name='Loss'))
fig.add_trace(go.Scatter(x=np.arange(len(losses)), y=losses[:, 1], mode='lines', name='MAE'))
fig.show()

In [None]:
predicted_values_arr = np.array(predicted_values_arr).reshape(-1, 24)
predicted_values_arr.shape, predicted_values_arr.shape[0] * predicted_values_arr.shape[1]

In [None]:
ps_index_range = pd.date_range(start='2013-01-05 00:00:00', end='2020-12-30 23:00:00', freq='H',)
ps_index_range

In [None]:
y_out_orig

In [None]:
y_out_orig.shape, y_out_orig.shape[0] * y_out_orig.shape[1]

In [None]:
predicted_values_arr.shape

In [None]:
ps_index_range.shape

In [None]:
# Select only midnight values from ps_index_range
ps_index_range_midnight = ps_index_range[ps_index_range.hour == 0]
ps_index_range_midnight

In [None]:
y_pred = pd.DataFrame(predicted_values_arr, columns=[f'H{x}' for x in range(1, 25)], index=ps_index_range_midnight)
y_pred_orig = y_scaler.inverse_transform(y_pred)
df_forecast = pd.DataFrame(y_pred_orig, columns=y_pred.columns, index=y_pred.index)
df_forecast

In [None]:
y_out_selection = y[(y.index >= '2013-01-05 00:00:00') & (y.index <= '2020-12-30 23:00:00')]
y_out_selection

In [None]:
df_forecast_res = df_forecast[(df_forecast.index <= '2020-12-30') & (df_forecast.index >= '2013-01-03')]
df_forecast_res = df_forecast_res.stack().reset_index(name='y_pred')
# df_forecast_res.index = y_out_selection.index[72:24*364*2]
df_forecast_res.index = y_out_selection.index[0:]

df_forecast_res = df_forecast_res.shift()
df_forecast_res = df_forecast_res.dropna()
df_forecast_res = df_forecast_res.drop(['level_0', 'level_1'], axis=1)
df_forecast_res['y_true'] = y_out_selection.iloc[1:]
df_forecast_res

In [None]:
df_forecast_res.to_csv(f'Results/NGC_GRU_{SELECTED_SIZE}_Day2Day_Online.csv', sep=',')

In [None]:
compute_metrics(df_forecast_res[df_forecast_res.index >= '2013-01-05 00:00:00'].dropna())

In [None]:
compute_metrics(df_forecast_res[df_forecast_res.index >= '2014-01-01 00:00:00'].dropna())

In [None]:
df_pred_all = pd.DataFrame(columns=['MAE', 'MSE', 'MAPE', 'SMAPE', 'R2', 'WAPE'], index=[x for x in range(2014, 2021)])

for year in range(2014, 2021):
    tmp = compute_metrics(df_forecast_res[(df_forecast_res.index >= f'{year}-01-01 00:00:00') & (df_forecast_res.index <= f'{year}-12-31 00:00:00')].dropna()).values
    df_pred_all.loc[year, :] = tmp
df_pred_all

In [None]:
df_forecast_res_s = df_forecast_res[df_forecast_res.index >= '2013-01-05 00:00:00'].stack().reset_index().rename({'level_1': 'Type', 0: 'Value'}, axis=1)

In [None]:
df_forecast_res_s

In [None]:
df_err = df_forecast_res[df_forecast_res.index >= '2013-01-05 00:00:00'].copy()
df_err.loc[:, 'AE'] = (df_err.y_true - df_err.y_pred).abs()
df_err.loc[:, 'SE'] = (df_err.y_true - df_err.y_pred) ** 2
df_err.loc[:, 'SAPE'] = ((df_err.y_pred - df_err.y_true).abs() / ((df_err.y_true.abs() + df_err.y_pred.abs())/2)) * 100
df_err

In [None]:
def plot_pred_bkps(data, my_bkps, from_dt=None, title=None):
    if from_dt:
        data = data[data.index >= from_dt]
    my_bkps = [x for x in my_bkps if x < len(data)]
    
    df_err = data.copy()
    df_err.loc[:, 'AE'] = (df_err.y_true - df_err.y_pred).abs()
    df_err.loc[:, 'SE'] = (df_err.y_true - df_err.y_pred) ** 2
    df_err.loc[:, 'SAPE'] = ((df_err.y_pred - df_err.y_true).abs() / ((df_err.y_true.abs() + df_err.y_pred.abs())/2)) * 100
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data.index, y=data.y_true,
                            mode='lines',
                            line_color='#333C83', name='y_true'))
    
    fig.add_trace(go.Scatter(x=data.index, y=data.y_pred,
                            mode='lines',
                            line_color='red', name='y_pred'))
    clr_selection = 'green'
    color_switch = lambda x: 'blue' if x != 'blue' else 'green'
    
    for idx, cp in enumerate(my_bkps):
        if cp >= len(data):
            break

        fig.add_vline(x=data.index[cp], line_width=3, line_dash="dash", line_color="red")

        if idx < len(my_bkps) - 2:
            clr_selection = color_switch(clr_selection)
            fig.add_vrect(x0=data.index[cp], x1=data.index[my_bkps[idx+1]], line_width=0, fillcolor=clr_selection, opacity=0.1)
    
    fig.update_layout(title=title)
    fig.show()
    
    for roll in [24, 24*7]:
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=data.index, y=df_err['SAPE'].rolling(roll).mean(),
                                mode='lines',
                                line_color='#333C83', name=f'SAPE rolling({roll})'))

        for idx, cp in enumerate(my_bkps):
                if cp >= len(data):
                    break

                fig.add_vline(x=data.index[cp], line_width=3, line_dash="dash", line_color="red")

        fig.update_layout(title=f'SAPE rolling({roll})')
        fig.show()
    
    for x in reversed(['AE', 'SE', 'SAPE']):
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=data.index, y=df_err[x],
                                mode='lines',
                                line_color='#333C83', name=x))

        for idx, cp in enumerate(my_bkps):
            if cp >= len(data):
                break

            fig.add_vline(x=data.index[cp], line_width=3, line_dash="dash", line_color="red")
        fig.update_layout(title=x)
        fig.show()

In [None]:
plot_pred_bkps(df_forecast_res, my_bkps, '2014-01-05 00:00:00')