In [1]:
!pip install pmdarima



In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import statsmodels.tsa.stattools as sma
import pmdarima
import datetime
import matplotlib
import pytest

from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt, exp

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX

import matplotlib as mpl
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

sns.set_style('whitegrid')

import sys
sys.path.append('..')
from src.data.dataset_generator import DatasetGenerator
from src.data.data_transformer import DataTransformer

# 1. Reading in the Data from the API

Download the Data (Montel Data & Entsoe & Weather Data)

In [3]:
dg = DatasetGenerator(['all'])
dataset_latest = dg.get_dataset('2016-01-01', 'latest', 'ignored')

Downloading montel data.
Repaired missing montel data from dates: ['2021-03-28']
Downloading entsoe data.
Repaired missing entsoe data from dates: ['2021-08-12' '2021-08-13']
Downloading weather data.
Data coverage 100%


KeyError: 'Time'

# 2. Data Preprocessing

Set datetime as index of the dataframe and display the length and timerange of the downloaded dataset.

In [None]:
# Extract Length, and Timerange of the Downloaded Dataset
X = dataset_latest.Time

datapoints = X.index.max()
date_min = X.min()
date_max = X.max()

print('Number of datapoints in the Dataset: {}'.format(datapoints))
print('Minimum date from data set: {}'.format(date_min))
print('Maximum date from data set: {}'.format(date_max))

# Change Index from Dataframe (new index: Time)
data_hourly = dataset_latest.set_index('Time')
data_hourly.index = pd.date_range(date_min,date_max,freq='H')

## 2.1 Display Data Characteristics

In [None]:
data_hourly

In [None]:
data_hourly.describe()

## 2.3 Extract each Feature from the Dataframe

Each Feature of the Dataframe is extracted as Pandas Series

In [None]:
data_spot = data_hourly.SPOTPrice
data_temp = data_hourly.temp
data_dwpt = data_hourly.dwpt
data_rhum = data_hourly.rhum
data_prcp = data_hourly.prcp
data_wspd = data_hourly.wspd
data_pres = data_hourly.pres
data_tsun = data_hourly.tsun

## 2.4 Create Spotprice Datasets with different length (Final Week excluded)

For testing univariant spot price forecasting, subdivide the whole dataset into datasets with the following lengths \
(final week is excluded for the model evaluation; will be changed for a better quality in evaluation):
* previous 5 and 1 year/-s
* previous 6, 3 and 1 month/-s
* previous 3, 2 and 1 week/-s
* previous 5, 4, 3, 2 and 1 day/-s

In [None]:
# Testing set for Walk Forward Evaluation (+1h and +24h)
df_test = data_spot.iloc[-169:]
# Training and Validation Sets for Model Training and Hyperparameter Tuning
df_all = data_spot.iloc[:-169]
df_5a = data_spot.iloc[38568:-169]
df_1a = data_spot.iloc[73632:-169]
df_6m = data_spot.iloc[-4512:-169]
df_3m = data_spot.iloc[-2304:-169]
df_1m = data_spot.iloc[-912:-169]
df_3w = data_spot.iloc[-672:-169]
df_2w = data_spot.iloc[-504:-169]
df_1w = data_spot.iloc[-336:-169]
df_5d = data_spot.iloc[-288:-169]
df_4d = data_spot.iloc[-264:-169]
df_3d = data_spot.iloc[-240:-169]
df_2d = data_spot.iloc[-216:-169]
df_1d = data_spot.iloc[-192:-169]

## 2.5 Create Mean Spotprice Datasets with different Means (Daily, Weekly, Monthly)

In [None]:
data_daily = data_spot.resample('1D').mean()

data_weekly = data_spot.resample('1W').mean()

data_monthly = data_spot.resample('1M').mean()

# 3. Data Visualization

## 3.1 Seasonal Decomposition

In [None]:
mpl.rcParams['figure.figsize'] = 9, 7
mpl.rcParams['lines.linewidth'] = 0.8
decomposition = seasonal_decompose(df_1m, period=24)
fig = decomposition.plot()
plt.show()

## 3.2 ACF & PACF

In [None]:
fig, ax = plt.subplots(2,1)
fig = sm.graphics.tsa.plot_acf(data_spot, lags=200, ax=ax[0])
fig = sm.graphics.tsa.plot_pacf(data_spot, lags=200, ax=ax[1])
plt.show()

## 3.3 Plot Data

### 3.3.1 Plot Mean Data

In [None]:
ax = data_daily.plot(linewidth=0.2)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
ax = data_weekly.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
ax = data_monthly.plot(linewidth=0.8)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

### 3.3.2 Plot Hourly Data

In [None]:
# The data of the last week is used as the test set for the Statistical Models
ax = df_test.plot(linewidth=0.8)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
# Whole dataset
ax = data_spot.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
# Previous 5 years
ax = df_5a.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
# Previous year
ax = df_6m.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
# Previous one month
ax = df_1m.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
# Previous 2 weeks
ax = df_2w.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
# Previous week
ax = df_1w.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
# Previous 5 days
ax = df_5d.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

In [None]:
# Previous day
ax = df_1d.plot(linewidth=0.5)
ax.set_xlabel('Year')
ax.set_ylabel('Electricity Price [€/MWh]')
ax.set_title('EEX Day-Ahead Market')

# 4. Tests for Stationarity, Seasonality and other Characteristics for TSF

## 4.1 Augmented Dickey-Fuller (ADF) test

In [None]:
nonst_test = sma.adfuller(data_spot)

output = pd.Series(nonst_test[0:4], index=['t-statistic','p-value','lags-used','no-of-observations'])
for key,value in nonst_test[4].items():
    output['Critical Value (%s)'%key] = value
    
print(output)

In [None]:
log_data = np.log(data_spot) # Taking the log
ma_data = log_data.rolling(window=12).mean() # Taking moving average
log_minus_ma_data = log_data - ma_data
log_minus_ma_data.dropna(inplace=True)

ax = log_minus_ma_data.plot(linewidth = 0.8)
ax.set_xlabel('Year')
ax.set_ylabel('Normalized Electricity Price')
ax.set_title('EEX Day-Ahead Market')

st_test = sma.adfuller(log_minus_ma_data)

output = pd.Series(st_test[0:4], index=['t-statistic','p-value','lags-used','no-of-observations'])
for key,value in st_test[4].items():
    output['Critical Value (%s)'%key] = value
    
print(output)

## 4.2 PP test

In [None]:
test = pmdarima.arima.PPTest()
test.should_diff(data_spot)

In [None]:
test.should_diff(log_minus_ma_data)

## 4.3 CH Test (Test for seasonal differences)

In [None]:
results = pmdarima.arima.CHTest(m=24).estimate_seasonal_differencing_term(data_spot)
print(results)

## 4.4 OCSB Test (Test for Seasonal differencing D)

In [None]:
results = pmdarima.arima.OCSBTest(m=12).estimate_seasonal_differencing_term(data_spot)
print(results)