# VARIMA


In [None]:
# Load the saved model and make predictions on the test data    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from prophet.serialize import model_to_json, model_from_json
from prophet.utilities import regressor_coefficients
from sklearn.metrics import mean_squared_error, mean_absolute_error


: 

In [None]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    # Convert 'date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    #df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

: 

In [None]:
# Define the future covariates columns from your dataframe
future_covariates_columns = ['Solar_radiation (W/m2)', 'Wind_speed (m/s)', 'Temperature (°C)', 
                             'Biomass (GWh)', 'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 
                             'Natural_gas (GWh)', 'Other (GWh)', 'Pumped_storage_generation (GWh)', 
                             'Solar_energy (GWh)', 'Wind_offshore (GWh)', 'Wind_onshore (GWh)', 
                             'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)', 
                             'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

: 

In [None]:
# Load in the train and test data
train_df = load_and_prepare_data('../../data/Final_data/train_df.csv')
test_df = load_and_prepare_data('../../data/Final_data/test_df.csv')

# Concatenate the train and test data
df = pd.concat([train_df, test_df])
df['Date'] = pd.to_datetime(df['Date'])

df


: 

In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(df['Day_ahead_price (€/MWh)'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])


: 

In [12]:
from statsmodels.tsa.stattools import kpss

kpss_stat, p_value, lags, crit_values = kpss(df['Day_ahead_price (€/MWh)'])
print('KPSS Statistic:', kpss_stat)
print('p-value:', p_value)


KPSS Statistic: 3.5800671523012446
p-value: 0.01


look-up table. The actual p-value is smaller than the p-value returned.

  kpss_stat, p_value, lags, crit_values = kpss(df['Day_ahead_price (€/MWh)'])


In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Assuming 'df' contains your time series data
plot_acf(df['Day_ahead_price (€/MWh)'])
plt.show()

plot_pacf(df['Day_ahead_price (€/MWh)'])
plt.show()


: 

In [33]:
# Import necessary libraries
import pandas as pd
from darts import TimeSeries
from darts.models import VARIMA
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, mae, rmse, mse
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# Combine the covariates to create a multivariate covariate series (covariates only)
covariates_train_series = TimeSeries.from_dataframe(
    train_df, 'Date', future_covariates_columns).astype('float32')

# Create the target series (Day-ahead price)
series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Scale the target and covariates separately
scaler_target = Scaler()
scaler_covariates = Scaler()

# Fit and transform the training target and covariates series
series_train_scaled = scaler_target.fit_transform(series_train)
covariates_train_scaled = scaler_covariates.fit_transform(covariates_train_series)

# Transform the test target series
series_test_scaled = scaler_target.transform(series_test)

# Create full covariates time series for forecasting
future_covariates_full = TimeSeries.from_dataframe(
    df, 'Date', future_covariates_columns, fill_missing_dates=True, freq="D"
).astype('float32')

# Slice the covariates to match the test set for prediction
future_covariates_for_prediction = future_covariates_full.slice(
    series_test.start_time() - pd.DateOffset(days=(200 - 1)), series_test.end_time()
)

# Scale the covariates for prediction
future_covariates_for_prediction_scaled = scaler_covariates.transform(future_covariates_for_prediction)

# Combine the scaled target and covariates for the VARIMA model (multivariate time series)
combined_train_series = series_train_scaled.stack(covariates_train_scaled)

# Initialize and fit the VARIMA model with the multivariate series
model = VARIMA(p=2, d=1, q=2)  # Adjust p, d, q based on your ACF/PACF analysis
model.fit(combined_train_series)

# Forecasting
n = len(series_test_scaled)
forecast_scaled = model.predict(n=n)

# Inverse transform the forecast to original scale (for the target variable only)
forecast = scaler_target.inverse_transform(forecast_scaled)
test_series = scaler_target.inverse_transform(series_test_scaled)

# Plot the actual vs forecasted data
fig = go.Figure()

# Add actual test data
fig.add_trace(go.Scatter(x=test_series.time_index,
                         y=test_series.values().squeeze(),
                         mode='lines', name='Actual', line=dict(color='blue')))

# Add forecasted data
fig.add_trace(go.Scatter(x=forecast.time_index,
                         y=forecast.values().squeeze(),
                         mode='lines', name='Forecast', line=dict(color='red')))

# Update layout
fig.update_layout(title='VARIMA Model - Forecast',
                  xaxis_title='Date', yaxis_title='Day Ahead Price (€/MWh)',
                  legend=dict(x=1, y=1, xanchor='right', yanchor='top', bordercolor='black', borderwidth=1),
                  template='plotly_white')

fig.show()

# Calculate error metrics
print('Error Metrics on Test Set:')
print(f'Mean Absolute Percentage Error (MAPE): {mape(test_series, forecast):.2f}%')
print(f'Mean Absolute Error (MAE): {mae(test_series, forecast):.2f}')
print(f'Root Mean Squared Error (RMSE): {rmse(test_series, forecast):.2f}')
print(f'Mean Squared Error (MSE): {mse(test_series, forecast):.2f}')


  warn('Estimation of VARMA(p,q) models is not generically robust,'
  endog = np.require(endog.fillna(method='backfill'), requirements="W")


KeyboardInterrupt: 

In [13]:
# Import necessary libraries
import pandas as pd
from darts import TimeSeries
from darts.models import VARIMA
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, mae, rmse, mse
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# Combine the covariates to create a multivariate covariate series (covariates only)
covariates_train_series = TimeSeries.from_dataframe(
    train_df, 'Date', future_covariates_columns).astype('float32')

# Create the target series (Day-ahead price)
series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Scale the target and covariates separately
scaler_target = Scaler()
scaler_covariates = Scaler()

# Fit and transform the training target and covariates series
series_train_scaled = scaler_target.fit_transform(series_train)
covariates_train_scaled = scaler_covariates.fit_transform(covariates_train_series)

# Transform the test target series
series_test_scaled = scaler_target.transform(series_test)

# Combine the scaled target and covariates for the VARIMA model (multivariate time series)
combined_train_series = series_train_scaled.stack(covariates_train_scaled)

# Initialize and fit the VARIMA model with the multivariate series
model = VARIMA(p=1, d=1, q=0)  # Simpler VAR model
 # Adjust p, d, q based on your ACF/PACF analysis
model.fit(combined_train_series)

# Forecasting
n = len(series_test_scaled)
forecast_scaled = model.predict(n=n)

# Inverse transform the forecast to original scale (for the target variable only)
forecast = scaler_target.inverse_transform(forecast_scaled)
test_series = scaler_target.inverse_transform(series_test_scaled)

# Plot the actual vs forecasted data
fig = go.Figure()

# Add actual test data
fig.add_trace(go.Scatter(x=test_series.time_index,
                         y=test_series.values().squeeze(),
                         mode='lines', name='Actual', line=dict(color='blue')))

# Add forecasted data
fig.add_trace(go.Scatter(x=forecast.time_index,
                         y=forecast.values().squeeze(),
                         mode='lines', name='Forecast', line=dict(color='red')))

# Update layout
fig.update_layout(title='VARIMA Model - Forecast',
                  xaxis_title='Date', yaxis_title='Day Ahead Price (€/MWh)',
                  legend=dict(x=1, y=1, xanchor='right', yanchor='top', bordercolor='black', borderwidth=1),
                  template='plotly_white')

fig.show()

# Calculate error metrics
print('Error Metrics on Test Set:')
print(f'Mean Absolute Percentage Error (MAPE): {mape(test_series, forecast):.2f}%')
print(f'Mean Absolute Error (MAE): {mae(test_series, forecast):.2f}')
print(f'Root Mean Squared Error (RMSE): {rmse(test_series, forecast):.2f}')
print(f'Mean Squared Error (MSE): {mse(test_series, forecast):.2f}')


  endog = np.require(endog.fillna(method='backfill'), requirements="W")
