In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import zscore
from sklearn.preprocessing import StandardScaler
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.seasonal import seasonal_decompose
import seaborn as sns  

In [None]:
path = r"Nox/data/current.csv"
df = pd.read_csv(path, sep=';')

In [None]:
df.columns

In [None]:
# Select only numeric columns
num_df = df.select_dtypes(include='number')

# Compute correlation matrix
corr = num_df.corr()

# Plot heatmap
plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True)
plt.title("Correlation Heatmap of Numeric Columns", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
exog_vars = ['ACE', 'System imbalance', 'Marginal incremental price', 'Marginal decremental price']

Set correct index and convert to datetime

In [None]:
df = df.set_index('Datetime')
df.index = pd.to_datetime(df.index)

In [None]:
df.plot

In [None]:
df = df[['Imbalance Price', 'ACE', 'System imbalance', 'Marginal incremental price', 'Marginal decremental price']].copy()

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
df.isna().sum()

No missing values

Let's drop all columns except the price_eur_mwh (index is already set to datetime_utc)

Let's fit a timeseries model.

Preprocessing - There are not missing values, let's detect outliers

In [None]:
df.loc[:, 'z_scores'] = np.abs(zscore(df['Imbalance Price']))

In [None]:
df['z_scores'] 

Z_scores interpretation <br>

0 - Exactly average <br>
+-1 = Within 1 std (68 percent of data) <br>
+-2 = within 2 std (95 percent of data) <br>
+-3 = Within 3 std (99.7 percent of data) <br>

In [None]:
plt.hist(df['z_scores'] )

Filtering for z scores under 3 and removing outliers

In [None]:
df = df[df['z_scores'] < 3].copy()

In [None]:
df = df.drop('z_scores', axis=1)

Let's try some Smoothing, Differencing, Scaling and Normalization

Moving average

Differencing

In [None]:
differenced_series = df.diff().dropna()

Scaling

In [None]:
scaler = StandardScaler()
scaled_series = scaler.fit_transform(df.values.reshape(-1, 1))

Let's plot: <br>

A line plot for standard viz <br>
A seasonal plot for seasonal patterns <br>
An autocorrelation plot to check correlation of series with past values <br>

In [None]:
plt.figure(figsize=(15, 18))

In [None]:
df.columns

ACF plots correlation of time series with itself at different lags. <br>

A gradual decline suggests a long term dependency in data <br>

Each observation is highly dependant on its recent past values <br>

Non stationarity -> mean and variance likely change overtime (due to trend or seasonality) <br>

Need to difference and account for seasonal/trend component

PACF plots correlation between observations at two time points, removing influence of intermediary observations. <br>

Sharp drop after lag 1 suggests one autoregressive term could capture most of the dependency <br>

Correlations insignificant after few lags supports a short memory once first lag is accounted for <br>

ARIMA(1,1,0) after differencing

In [None]:
df.isna().sum()

In [None]:
p = 2
d = 0
q = 0

model = ARIMA(df['Imbalance Price'], order=(p, d, q), exog=df[exog_vars])
model_fit = model.fit()

In [None]:
pred = model_fit.predict(start=1, end=len(df)-1)

In [None]:
y_true = df['Imbalance Price'].iloc[1:]
rmse = np.sqrt(mean_squared_error(y_true, pred))
mae = mean_absolute_error(y_true, pred)

In [None]:
print(f"RMSE: {rmse:.2f}")
print(f"MAE:  {mae:.2f}")

In [None]:
# Plot
plt.figure(figsize=(10, 6))
plt.plot(y_true.index, y_true, label='Actual', linewidth=2)
plt.plot(y_true.index, pred, label='Predicted', linestyle='--', linewidth=2)

plt.title('Actual vs Predicted Imbalance Price')
plt.xlabel('Time')
plt.ylabel('Imbalance Price')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)

plt.tight_layout()
plt.show()