In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error
from datetime import timedelta
import warnings

## 3. SARIMAX MODEL with exogenous variables

### 3.1. One-day ahead forecasting using SARIMAX model with exogenous variables for the month of APRIL:


we will use this code to check the accuracy of the SARIMAX model to compare with(ANN,SARIMA AND LOGISTIC REGRESSION) other models .  we are doing this to select the best two models among SARIMA, ANN,SARIMAX AND LOGISTIC REGRESSION.(NOTE: # we are testing our model and our model will only predict whether next day is a peak day or not. so we will only forecast one day add actual data of that day to train data to retrain)

In [2]:
# Suppress warnings
warnings.filterwarnings("ignore", message="Optimization failed to converge")

# Loading the data
file_path = 'Database_1_capped.csv'  
print("Loading data...")
data = pd.read_csv(file_path)

# Converting 'Date and Time' to datetime 
data['Date and Time'] = pd.to_datetime(data['Date and Time'], format='%d/%m/%Y %H:%M', errors='coerce')
data.dropna(subset=['Date and Time'], inplace=True)
data.set_index('Date and Time', inplace=True)
data = data.asfreq('30T')  
# Defining exogenous variables and target variable
exog_vars = ['Temperature', 'DHI', 'GHI', 'DNI', 'Relative Humidity', 'Wind Speed', 'Holiday', 'Season']
data['Log_Demand'] = np.log(data['Demand_Capped'] + 1)  # Log transformation to stabilize variance
target_series = data['Log_Demand']

# Spliting the data into training and testing sets
train_start = '2020-04-01'
train_end = '2021-03-31'
test_start = '2021-04-01'
test_end = '2021-04-30'

train_data = target_series[train_start:train_end]
test_dates = pd.date_range(start=test_start, end=test_end, freq='D')
train_exog = data[exog_vars][train_start:train_end]

# Training the SARIMA model with exogenous variables once
print("Training SARIMA model on the full training dataset...")
model = SARIMAX(
    train_data,
    exog=train_exog,
    order=(4, 1, 3),
    seasonal_order=(1, 0, 1, 24),
    enforce_stationarity=False,
    enforce_invertibility=False
)
fitted_model = model.fit(disp=False, maxiter=500, method='lbfgs')
print("Model training completed.\n")

# Initializing a DataFrame to store actual and forecasted values
comparison_df = pd.DataFrame(columns=['Date and Time', 'Actual', 'Forecast'])

# Generating rolling forecasts for the entire test period
print("Generating rolling forecasts for the test period...")
for forecast_date in test_dates:
    print(f"Forecasting for {forecast_date}...")

    # Defining the forecast range for the next day (48 intervals for one day)
    forecast_range = pd.date_range(start=forecast_date, periods=48, freq='30T')

    # Defining exogenous variables for the forecast range from the test set
    forecast_exog = data[exog_vars].loc[forecast_range]

    # Generating forecasts
    forecast = fitted_model.get_forecast(steps=48, exog=forecast_exog)
    forecast_mean = forecast.predicted_mean
    forecast_series = pd.Series(np.exp(forecast_mean) - 1, index=forecast_range)  # Reverse log transformation

    # Storing the forecast for the current day
    for forecast_time in forecast_range:
        actual = data['Demand_Capped'][forecast_time] if forecast_time in data.index else None
        forecast_value = forecast_series[forecast_time]
        new_row = pd.DataFrame({
            'Date and Time': [forecast_time],
            'Actual': [actual],
            'Forecast': [forecast_value]
        })
        comparison_df = pd.concat([comparison_df, new_row], ignore_index=True)

    # Appending the actual data from the test set for the forecasted date to the training data
    actual_data_for_day = data['Log_Demand'].loc[forecast_range]  # Get actual test data for the forecast day
    train_data = pd.concat([train_data, actual_data_for_day])

    # Appending exogenous variables for the forecasted day from the test set to the training exogenous data
    train_exog = pd.concat([train_exog, forecast_exog])

    # Retraining the model with the updated training data
    print("Retraining model with updated training data...")
    model = SARIMAX(
        train_data,
        exog=train_exog,
        order=(4, 1, 3),
        seasonal_order=(1, 0, 1, 24),
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    fitted_model = model.fit(disp=False, maxiter=500, method='lbfgs')

# Calculating MAD and MAPE for the entire dataset
comparison_df.dropna(subset=['Actual'], inplace=True)
mad = (comparison_df['Actual'] - comparison_df['Forecast']).abs().mean()
mape = ((comparison_df['Actual'] - comparison_df['Forecast']).abs() / comparison_df['Actual']).mean() * 100

# Displaying MAD and MAPE
print(f"\nMean Absolute Deviation (MAD): {mad:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Saving results to CSV
comparison_df.to_csv('sarima_rolling_forecast_with_exog.csv', index=False)

# Printing the forecasted values for the test range
print("\nDaily Forecasts for Test Range (12 AM to 11:30 PM):")
print(comparison_df.head())


Loading data...
Training SARIMA model on the full training dataset...
Model training completed.

Generating rolling forecasts for the test period...
Forecasting for 2021-04-01 00:00:00...
Retraining model with updated training data...


  comparison_df = pd.concat([comparison_df, new_row], ignore_index=True)


Forecasting for 2021-04-02 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-03 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-04 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-05 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-06 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-07 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-08 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-09 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-10 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-11 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-12 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-13 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-14 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-15 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-16 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-17 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-18 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-19 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-20 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-21 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-22 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-23 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-24 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-25 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-26 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-27 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-28 00:00:00...
Retraining model with updated training data...
Forecasting for 2021-04-29 00:00:00...
Retraining model with updated training data...




Forecasting for 2021-04-30 00:00:00...
Retraining model with updated training data...

Mean Absolute Deviation (MAD): 953.60
Mean Absolute Percentage Error (MAPE): 9.04%

Daily Forecasts for Test Range (12 AM to 11:30 PM):
        Date and Time   Actual      Forecast
0 2021-04-01 00:00:00  10326.0  10427.255900
1 2021-04-01 00:30:00  10029.0  10072.374523
2 2021-04-01 01:00:00   9732.0   9843.946914
3 2021-04-01 01:30:00   9585.5   9526.322917
4 2021-04-01 02:00:00   9439.0   9343.779783


### We got Mean Absolute Deviation (MAD): 953.60 and Mean Absolute Percentage Error (MAPE): 9.04% for SARIMAX model.
its not performing well compared to other models so we are excluding this from inclusion in hybrid model.

In [None]:
print(fitted_model.summary())