In [1]:
from gdc.data_access import *
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import r2_score, mean_squared_error
from arch import arch_model

In [2]:
## OLS price ~ cons + temp

model = smf.ols(
    formula='{price} ~ {cons} + {temp}'.format(**CB.__dict__), 
    data=df_merged_real).fit()
model.summary()

0,1,2,3
Dep. Variable:,Price_EUR_MWh,R-squared:,0.251
Model:,OLS,Adj. R-squared:,0.251
Method:,Least Squares,F-statistic:,1471.0
Date:,"Mon, 10 Nov 2025",Prob (F-statistic):,0.0
Time:,21:47:50,Log-Likelihood:,-44610.0
No. Observations:,8760,AIC:,89230.0
Df Residuals:,8757,BIC:,89250.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,102.4348,22.141,4.627,0.000,59.033,145.836
Consommation,0.0010,2.51e-05,40.828,0.000,0.001,0.001
Temp,-0.3748,0.072,-5.218,0.000,-0.516,-0.234

0,1,2,3
Omnibus:,381.737,Durbin-Watson:,0.146
Prob(Omnibus):,0.0,Jarque-Bera (JB):,441.773
Skew:,-0.506,Prob(JB):,1.18e-96
Kurtosis:,3.431,Cond. No.,5340000.0


In [3]:
## OLS price ~ cons + temp + Dtemp^2

model = smf.ols(
    formula='{price} ~ {cons} + {temp} + {dtemp2}'.format(**CB.__dict__), 
    data=df_merged_real).fit()
model.summary()

0,1,2,3
Dep. Variable:,Price_EUR_MWh,R-squared:,0.268
Model:,OLS,Adj. R-squared:,0.268
Method:,Least Squares,F-statistic:,1071.0
Date:,"Mon, 10 Nov 2025",Prob (F-statistic):,0.0
Time:,21:47:50,Log-Likelihood:,-44509.0
No. Observations:,8760,AIC:,89030.0
Df Residuals:,8756,BIC:,89060.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,302.2978,25.980,11.636,0.000,251.372,353.224
Consommation,0.0008,2.81e-05,29.812,0.000,0.001,0.001
Temp,-1.0262,0.084,-12.159,0.000,-1.192,-0.861
DTemp_2,0.1052,0.007,14.282,0.000,0.091,0.120

0,1,2,3
Omnibus:,382.372,Durbin-Watson:,0.145
Prob(Omnibus):,0.0,Jarque-Bera (JB):,440.036
Skew:,-0.511,Prob(JB):,2.8e-96
Kurtosis:,3.4,Cond. No.,6340000.0


In [4]:
X = model.model.exog
vif = pd.Series(
    [variance_inflation_factor(X, i) for i in range(X.shape[1])],
    index=model.model.exog_names)
print(vif)


Intercept       3897.220078
Consommation       1.939110
Temp               2.139064
DTemp_2            1.449049
dtype: float64


In [5]:
y = df_merged_real[CB.price]
X = df_merged_real[[CB.cons]]

model = SARIMAX(y, exog=X, order=(2,0,0))  # 2 autoregressive lags, no differencing
res = model.fit()
print(res.summary())


y = res.model.endog
y_hat = res.fittedvalues

r2 = r2_score(y, y_hat)
print("Pseudo R²:", r2)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


                               SARIMAX Results                                
Dep. Variable:          Price_EUR_MWh   No. Observations:                 8760
Model:               SARIMAX(2, 0, 0)   Log Likelihood              -34675.375
Date:                Mon, 10 Nov 2025   AIC                          69358.750
Time:                        21:47:52   BIC                          69387.061
Sample:                    01-01-2023   HQIC                         69368.396
                         - 12-31-2023                                         
Covariance Type:                  opg                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
Consommation     0.0009   1.22e-05     72.055      0.000       0.001       0.001
ar.L1            1.3993      0.007    199.140      0.000       1.386       1.413
ar.L2           -0.5084      0.008    -66.86

In [6]:
df_merged_real["hour"] = df_merged_real.index.hour
df_merged_real["dow"] = df_merged_real.index.dayofweek       # 0=Mon
df_merged_real["month"] = df_merged_real.index.month

df_merged_real["hour"]  = pd.Categorical(df_merged_real["hour"],  categories=range(24), ordered=False)
df_merged_real["dow"]   = pd.Categorical(df_merged_real["dow"],   categories=range(7),  ordered=False)
df_merged_real["month"] = pd.Categorical(df_merged_real["month"], categories=range(1,13), ordered=False)

# One-hot encode (omit one category to avoid collinearity)
D = pd.get_dummies(df_merged_real[["hour","dow","month"]],
                   drop_first=True, prefix=["h","d","m"], dtype=float)

In [None]:
X = pd.concat([df_merged_real[[CB.cons]], D], axis=1)

y = df_merged_real[CB.price]

model = SARIMAX(
    y,
    exog=sm.add_constant(X),
    order=(2,0,0),
    seasonal_order=(1,0,0,24),
    enforce_stationarity=False,
    enforce_invertibility=False,
)

res = model.fit(disp=False)
print(res.summary())
y_hat = res.fittedvalues

r2 = r2_score(y, y_hat)
print("Pseudo R²:", r2)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [None]:
eps = res.resid.dropna()  # SARIMAX residuals (mean-adjusted)

# 2) Fit a vanilla GARCH(1,1) with Student-t innovations (robust to spikes)
garch = arch_model(eps, mean='Zero', vol='GARCH', p=1, q=1, dist='t')
garch_res = garch.fit(disp='off')
print(garch_res.summary())

# Quick check: stationarity of volatility
alpha1 = garch_res.params.get('alpha[1]', float('nan'))
beta1  = garch_res.params.get('beta[1]', float('nan'))
print(f"alpha+beta = {alpha1 + beta1:.3f}  (should be < 1 for stationary variance)")

In [None]:
alpha1, beta 