In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

from functools import lru_cache

from synthetic import interval, Consumption, HourlySeries, Dummy, exogenous_variables, Areas, Country, exog_list

pd.options.plotting.backend = "plotly"

# Setup

In [None]:
# Get consumption parameters
start = pd.Timestamp("201711010000")
end = pd.Timestamp("202012312300")
# end = pd.Timestamp("202307042300")

AREA = Areas.DE2
INTERVAL = interval(start=start, end=end)

DEFAULT_SERIES = HourlySeries(INTERVAL)
DEFAULT_DUMMY = Dummy(INTERVAL)

EXOG_DF = exogenous_variables(
    start=start, end=end, price_area=AREA
)

In [None]:
GERMANY = Country(Areas.DE2, DEFAULT_SERIES, remove_holidays=False)
SPAIN = Country(Areas.ES, DEFAULT_SERIES, remove_holidays=False)

# Summary statistics

In [None]:
def print_share_of_non_positive_values(series):
    total_values = len(series)
    non_positive_values = series[series <= 0]
    share = len(non_positive_values) / total_values
    print(f"Share of non-positive values: {share:.2%}")

print_share_of_non_positive_values(GERMANY.price)
print_share_of_non_positive_values(SPAIN.price)

In [None]:
df = pd.concat([GERMANY.consumption, GERMANY.price, GERMANY.wind_generation], axis=1)

summary_stats = df.describe().transpose()

summary_stats['Skewness'] = df.skew()
summary_stats['Kurtosis'] = df.kurt()

summary_stats.rename(
    columns={
        'mean': 'Mean',
        '50%': 'Median',
        'std': 'Standard Deviation',
        'min': 'Minimum',
        'max': 'Maximum'
    },
    inplace=True
)

print(summary_stats)

# Magnitude interpretation

In [None]:
print(GERMANY.wind_generation.describe())


In [None]:
std_deviation = GERMANY.wind_generation.std()
skewness = GERMANY.wind_generation.skew()
mean_value = GERMANY.wind_generation.mean()

print("Standard Deviation:", std_deviation)
print("Skewness:", skewness)
print("Mean:", mean_value)

In [None]:
plt.hist(GERMANY.wind_generation, bins='auto', color='#0504aa', alpha=0.7, rwidth=0.85)
plt.title('Histogram of Wind generation')
plt.xlabel('MWh')
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.75)

plt.show()

In [None]:
def filter_series_on_peak(series:pd.Series) -> pd.Series:
    assert isinstance(series.index, pd.DatetimeIndex)
    return series.loc[
    (series.index.hour >= 8) & (series.index.hour < 20)
]

def filter_dataframe_on_peak(df:pd.DataFrame) -> pd.DataFrame:
    assert isinstance(df.index, pd.DatetimeIndex)
    return df.loc[
    (df.index.hour >= 8) & (df.index.hour < 20)
]

In [None]:
wind = GERMANY.wind_generation
price = GERMANY.price
demand = GERMANY.consumption

exog_list_vars = exog_list(
    series=DEFAULT_SERIES, dummy=DEFAULT_DUMMY, price_area=AREA
)

wind_lags_list = [GERMANY.wind_generation.copy().shift(i).rename(f"wind_generation_t-{i}") for i in range(1, 51)]

civ_wind_exog = pd.concat(exog_list_vars+wind_lags_list, axis=1)
civ_wind_exog = civ_wind_exog[civ_wind_exog.index.isin(INTERVAL)]

on_peak_civ_wind_exog = filter_dataframe_on_peak(civ_wind_exog)
on_peak_civ_wind_exog = on_peak_civ_wind_exog
on_peak_wind = filter_series_on_peak(wind)
on_peak_price = filter_series_on_peak(price)
on_peak_demand = filter_series_on_peak(demand)

on_peak_residuals = sm.OLS(endog=on_peak_wind, exog=on_peak_civ_wind_exog, missing='drop').fit().resid
residuals = sm.OLS(endog=wind, exog=civ_wind_exog, missing='drop').fit().resid

In [None]:
res_series = pd.Series(residuals)

print(res_series.describe())


In [None]:
std_deviation = res_series.std()
skewness = res_series.skew()
mean_value = res_series.mean()

print("Standard Deviation:", std_deviation)
print("Skewness:", skewness)
print("Mean:", mean_value)

In [None]:
plt.hist(res_series, bins='auto', color='#0504aa', alpha=0.7, rwidth=0.85)
plt.title('Histogram of Residuals')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.75)

plt.show()

First stage

In [None]:
first_stage_exog = pd.concat([wind]+wind_lags_list+exog_list_vars, axis=1)
first_stage_exog = first_stage_exog[first_stage_exog.index.isin(INTERVAL)]

first_stage = sm.OLS(price, exog=first_stage_exog, missing='drop').fit()

In [None]:
print(round(first_stage.params['wind_generation'], 6))

In [None]:
pd.Series(first_stage.resid).std()

In [None]:
wind_first_stage_estimate = first_stage.params['wind_generation']

In [None]:
print("wind_first_stage_estimate", wind_first_stage_estimate)

In [None]:
GERMANY.consumption.std()

# ACF/PACF

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from plot_tools import save_figure, HEIGHT, REGULAR_WIDTH

def plot_correlation(axs, series: pd.Series, lags: int, pacf_limit=168):
        ax_acf, ax_pacf = axs
        plot_acf(series, lags=lags, ax=ax_acf, markersize=3, bartlett_confint=False, vlines_kwargs={'linewidth':0.6})
        plot_pacf(series, lags=min(lags, pacf_limit), ax=ax_pacf, method="ywm", markersize=3, vlines_kwargs={'linewidth':0.6})

def plot_acf_pacf(country: Country, height:float, width:float):
    fig, axss = plt.subplots(2, 2, sharex=True, sharey='row', figsize=(width, height))

    plt.style.use("seaborn-v0_8-whitegrid")

    resid_wind = sm.OLS(country.wind_generation, exog=EXOG_DF).fit().resid
    resid_demand = sm.OLS(country.consumption, exog=EXOG_DF).fit().resid

    plot_correlation(axss.transpose()[0], resid_wind, 72)
    plot_correlation(axss.transpose()[1], resid_demand, 72)

    axss[0,0].set_title(f'Residual wind electricity generation')
    axss[0,1].set_title(f'Residual electricity load')
    axss[1,0].set_title('')
    axss[1,1].set_title('')
    axss[0,0].set_ylabel('obs. autocorrelation', size='large')
    axss[1,0].set_ylabel('obs. partial autocorrelation', size='large')
    axss[1,0].set_xlabel('lags')
    axss[1,1].set_xlabel('lags')


    fig.tight_layout()
    plt.style.use 

    axss[0][0].set_xlim(-1, 73)
    axss[0][0].set_xticks([0, 12, 24, 36, 48, 60, 72])

    # There is an initial burn-in because of the cumulative effect of AR processes (therefore iloc)
    # plot_correlation(equilibria['AR1 & elast'].demand, 168)

    save_figure(fig, "acf_pacf_plot")

    plt.show()
    

plot_acf_pacf(country=GERMANY, height=HEIGHT*1.5, width=REGULAR_WIDTH)


In [None]:
# plot_acf_pacf(country=SPAIN, height=HEIGHT*1.5, width=REGULAR_WIDTH)


# Wind effect on prices

In [None]:
wind = pd.Series(GERMANY.wind_generation)

winds = pd.concat([wind.shift(i).rename(f"wind-t-{i}") for i in range(1, 27)], axis=1)

exog = pd.concat([winds, EXOG_DF], axis=1)

In [None]:
sm.OLS(endog=GERMANY.price, exog=pd.concat([wind, exog], axis=1), missing='drop').fit(cov_type='HAC', cov_kwds={'maxlags':168}).summary()

In [None]:
price_diff = GERMANY.price.diff(1).rename("price_diff(1)")

wind_diff = GERMANY.wind_generation.diff(1).rename("wind_generation_diff(1)")

In [None]:
exog = pd.concat([wind_diff, exog], axis=1)

In [None]:
reg = sm.OLS(endog=price_diff, exog=exog, missing='drop')

In [None]:
wind_diff.abs().mean()