In [23]:
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
%matplotlib inline
from matplotlib.pylab import rcParams

from statsmodels.tsa.stattools import adfuller
%pip install pmdarima -q
import pmdarima as pm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [24]:
df = pd.read_csv('D:\\bachelor\\dataset_resampled.csv')
print(df.head())

  utc_timestamp  DE_KN_residential4_grid_import  \
0    2015-10-10                        1.837840   
1    2015-10-11                        5.083840   
2    2015-10-12                        9.824345   
3    2015-10-13                       14.717635   
4    2015-10-14                       27.373166   

   DE_KN_residential4_grid_export      temp  holiday  season_Autumn  \
0                        0.000000  7.918333      0.0            1.0   
1                        3.193533  6.505729      0.0            1.0   
2                       21.286176  5.033854      0.0            1.0   
3                       39.081159  3.545833      0.0            1.0   
4                       46.060247  3.981771      0.0            1.0   

   season_Spring  season_Summer  season_Winter  day_of_week_Friday  \
0            0.0            0.0            0.0                 0.0   
1            0.0            0.0            0.0                 0.0   
2            0.0            0.0            0.0          

In [25]:
# Convert 'utc_timestamp' to datetime format if it's not already
df['utc_timestamp'] = pd.to_datetime(df['utc_timestamp'])

# Convert 'utc_timestamp' to ordinal date (number of days since a certain date)
df['utc_timestamp'] = df['utc_timestamp'].apply(lambda x: x.toordinal())

In [26]:
# Define the endogenous variable (the variable we want to predict)
endog = df['DE_KN_residential4_grid_import']

# Define the exogenous variables (the variables we use to predict the endogenous variable)
exog = df.drop(['DE_KN_residential4_grid_import', 'DE_KN_residential4_grid_export'], axis=1)

In [27]:
# Define the model
model = ARIMA(endog, order=(1, 1, 1), exog=exog)

# Fit the model
model_fit = model.fit()



In [28]:
# Fit the SARIMAX model
SARIMAX_model = pm.auto_arima(endog, exogenous=exog,
                              start_p=1, start_q=1,
                              test='adf',
                              max_p=3, max_q=3, m=12,
                              start_P=0, seasonal=True,
                              d=None, D=1, trace=False,
                              error_action='ignore',
                              suppress_warnings=True,
                              stepwise=True)

In [29]:
def sarimax_forecast(train, test, order, seasonal_order, exog_train, exog_test):
    model = SARIMAX(train, order=order, seasonal_order=seasonal_order, exog=exog_train)
    model_fit = model.fit(disp=False)
    predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, exog=exog_test)
    return predictions


In [30]:
# Split your data into training and test sets
train_size = int(len(df) * 0.8)
train, test = df['DE_KN_residential4_grid_import'][:train_size], df['DE_KN_residential4_grid_import'][train_size:]
exog_train, exog_test = exog[:train_size], exog[train_size:]

# Define the order and seasonal_order parameters
order = (1, 1, 1)
seasonal_order = (1, 1, 1, 12)

# Call the function
predictions = sarimax_forecast(train, test, order, seasonal_order, exog_train, exog_test)



In [31]:
# Fit the model
model_fit = model.fit()

# Use a method of the 'ARIMA' class
model_fit.summary()



0,1,2,3
Dep. Variable:,DE_KN_residential4_grid_import,No. Observations:,849.0
Model:,"ARIMA(1, 1, 1)",Log Likelihood,-2126.204
Date:,"Fri, 16 Feb 2024",AIC,4286.407
Time:,14:50:03,BIC,4367.036
Sample:,0,HQIC,4317.296
,- 849,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
utc_timestamp,-0.0130,2.577,-0.005,0.996,-5.063,5.037
temp,0.0764,0.040,1.919,0.055,-0.002,0.154
holiday,1.993e-15,1.08e-14,0.185,0.853,-1.91e-14,2.31e-14
season_Autumn,0.4609,1.292,0.357,0.721,-2.072,2.994
season_Spring,0.7059,1.712,0.412,0.680,-2.649,4.061
season_Summer,-0.1787,2.299,-0.078,0.938,-4.684,4.327
season_Winter,-0.9882,1.221,-0.809,0.418,-3.382,1.405
day_of_week_Friday,-0.5264,0.197,-2.672,0.008,-0.913,-0.140
day_of_week_Monday,0.4609,0.202,2.279,0.023,0.065,0.857

0,1,2,3
Ljung-Box (L1) (Q):,3.53,Jarque-Bera (JB):,810.25
Prob(Q):,0.06,Prob(JB):,0.0
Heteroskedasticity (H):,0.94,Skew:,0.31
Prob(H) (two-sided):,0.61,Kurtosis:,7.75
