In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.stats.stattools import durbin_watson
from linearmodels.iv import IV2SLS

from model import ModelData, IVModel

from functools import lru_cache

from synthetic import PriceArea, interval, Consumption, HourlySeries, Dummy, exogenous_variables, Areas, ar_model_analysis, ar_model_fit, Country, exog_list, IndEquilibrium, IndividualDemand, Supply, get_wind_series, WindOptions, RandomCached

pd.options.plotting.backend = "plotly"

In [None]:
from simulations import Simulation, StructuralModel
import numpy as np
from arch.unitroot import ADF


sim = Simulation(8760)
X = np.linspace(-250, 250, 5)

for i, x in enumerate(X):
    demand = sim.get_demand(StructuralModel.Model_III, x)
    supply = sim.get_supply(None, 26)
    eq = sim.get_equilibrium(demand, supply)

    # plt.figure(i)  # Create a new figure for each plot
    # eq.clearing.demand.plot()
    # plt.title(f'Plot for x = {x}')
    # plt.xlabel('Time')
    # plt.ylabel('Demand')
    # plt.show()

    adf = ADF(eq.clearing.demand)
    print(adf.summary().as_text())



In [None]:
series = HourlySeries(interval("201501010000", 
                               "202312312300"))

temp = series.temperature(Areas.DE2).dropna(inplace=False)
print(temp.head(), temp.tail())
temp.to_csv("temperature.csv")

cdd = series.cdd(Areas.DE2).dropna(inplace=False)
print(cdd.head(), cdd.tail())
print(len(cdd))
cdd.to_csv("cdd.csv")

hdd = series.hdd(Areas.DE2).dropna(inplace=False)
print(hdd.head(), hdd.tail())
print(len(hdd))
hdd.to_csv("hdd.csv")

wind = series.wind_speed(Areas.DE2).dropna(inplace=False)
wind.to_csv("wind_speed.csv")

In [None]:
from statsmodels.stats import proportion
import statsmodels.stats.weightstats as ws




In [None]:
print(np.linspace(1, 21, 11))

In [None]:
print(proportion.proportion_confint(50, 50, alpha=0.5))


In [None]:
print(ws.DescrStatsW([3, 12, 4, 5, 12]).tconfint_mean(alpha=0.5))

In [None]:
# Get consumption parameters
start = pd.Timestamp("201711010000")
end = pd.Timestamp("202012312300")

consumption_analysis_interval = interval(start=start, end=end)

consumption = Consumption(start=start, end=end, price_area=Areas.DE2)

dummy = Dummy(consumption_analysis_interval)
consumption_analyisis_series = HourlySeries(consumption_analysis_interval)

exog = exogenous_variables(
    start=start, end=end, price_area=consumption.price_area
)

EXOG_DF = exogenous_variables(
    start=start, end=end, price_area=consumption.price_area
)

min_exog = exogenous_variables(
    start=start, end=end, price_area=consumption.price_area, minimal=True
)

In [None]:
GERMANY = Country(Areas.DE2, consumption_analyisis_series, remove_holidays=True)
SPAIN = Country(Areas.ES, consumption_analyisis_series, remove_holidays=False)

In [None]:
GERMANY = Country(Areas.DE2, consumption_analyisis_series, remove_holidays=True)

print(GERMANY.consumption.mean())

In [None]:
GERMANY = Country(Areas.DE2, consumption_analyisis_series, remove_holidays=False)

print(GERMANY.consumption.mean())

In [None]:
df = pd.concat([GERMANY.consumption, GERMANY.wind_generation], axis=1)

df['hour'] = df['consumption'].index.hour

daily_profile = df.groupby('hour').mean()

In [None]:
daily_profile['consumption'].plot()

In [None]:
# Resample to daily average prices
hourly_prices = GERMANY.price
daily_prices = hourly_prices.resample('D').mean()

# Calculate variances
hourly_variance = hourly_prices.var()
daily_variance = daily_prices.var()

# Compare variances
print("Hourly Variance:", hourly_variance)
print("Daily Variance:", daily_variance)

# Calculate standard deviations
hourly_std_deviation = hourly_prices.std()
daily_std_deviation = daily_prices.std()

# Compare standard deviations
print("Hourly Standard Deviation:", hourly_std_deviation)
print("Daily Standard Deviation:", daily_std_deviation)

print("Hourly demand SD:", GERMANY.consumption.std())

Intra-day optimization

In [None]:
dawn_series_demand = GERMANY.consumption.between_time('00:00', '05:59').resample('D').mean().rename("demand_dawn")
morning_series_demand  = GERMANY.consumption.between_time('06:00', '11:59').resample('D').mean().rename("demand_morning")
afternoon_series_demand  = GERMANY.consumption.between_time('12:00', '17:59').resample('D').mean().rename("demand_afternoon")
night_series_demand  = GERMANY.consumption.between_time('18:00', '23:59').resample('D').mean().rename("demand_night")

dawn_series_price = GERMANY.price.between_time('00:00', '05:59').resample('D').mean().rename("price_dawn")
morning_series_price = GERMANY.price.between_time('06:00', '11:59').resample('D').mean().rename("price_morning")
afternoon_series_price = GERMANY.price.between_time('12:00', '17:59').resample('D').mean().rename("price_afternoon")
night_series_price = GERMANY.price.between_time('18:00', '23:59').resample('D').mean().rename("price_night")

dawn_series_wind = GERMANY.wind_generation.between_time('00:00', '05:59').resample('D').mean().rename("wind_dawn")
morning_series_wind  = GERMANY.wind_generation.between_time('06:00', '11:59').resample('D').mean().rename("wind_morning")
afternoon_series_wind  = GERMANY.wind_generation.between_time('12:00', '17:59').resample('D').mean().rename("wind_afternoon")
night_series_wind  = GERMANY.wind_generation.between_time('18:00', '23:59').resample('D').mean().rename("wind_night")

In [None]:
daily_df = exog.resample('D').mean()

wind_controls = []

for wind in dawn_series_wind, morning_series_wind, afternoon_series_wind, night_series_wind:
    for i in range(1, 4):
        assert isinstance(wind.name, str)
        name = wind.name+f"_t-{i}"
        shifted_series = wind.copy().shift(i).rename(name)
        wind_controls.append(shifted_series)
for ser in wind_controls:
    print(ser.name)

control_list = [dummy.constant(), dummy.school_holiday(), dummy.public_holiday(Areas.DE1), dummy.week_53(), consumption_analyisis_series.hdd(Areas.DE1), consumption_analyisis_series.cdd(Areas.DE1), consumption_analyisis_series.gas_price(Areas.DE1)] + dummy.month() + dummy.weekday()

control_list = [ser.resample('D').mean() for ser in control_list]

data = ModelData.construct(
        dependent=morning_series_demand,
        exognous=control_list + wind_controls,
        endogenous=[dawn_series_price, morning_series_price, afternoon_series_price, night_series_price],
        instruments=[dawn_series_wind, morning_series_wind, afternoon_series_wind, night_series_wind],
        scaling=False,
    )


In [None]:
iv_model = IVModel(data=data, cov_type='kernel')
fitted = iv_model.fitted_model

In [None]:
fitted.summary

Eigenvalues

In [None]:
for i in range(10):
    print((2**i))

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assuming you have two instrument variables and two endogenous variables in pandas Series
# Let's create sample data for demonstration purposes
instrument_var1 = GERMANY.wind_generation.copy()
instrument_var2 = GERMANY.wind_generation.copy().shift(1).rename("Wind_{t-1}")
endogenous_var1 = GERMANY.price.copy()
endogenous_var2 = GERMANY.consumption.copy().shift(1).rename("Demand_{t-1}")

instrument_var1 = pd.Series([1, 2, 3, 4, 5, 6])
instrument_var2 = pd.Series([4, 8, 16, 32, 64, 128])
endogenous_var1 = pd.Series([2, 4, 8, 16, 32, 64])
endogenous_var2 = pd.Series([5, 4, 3, 1, 2, 0])

# Calculate the correlation coefficients
corr_instrument_1 = pd.Series([instrument_var1.corr(endogenous_var1), instrument_var1.corr(endogenous_var2)])
corr_instrument_2 = pd.Series([instrument_var2.corr(endogenous_var1), instrument_var2.corr(endogenous_var2)])

# Create the correlation matrix
corr_dataframe = pd.concat([corr_instrument_1, corr_instrument_2], axis=1)

# Compute the eigenvalues
eigenvalues = np.linalg.eigvals(corr_dataframe)

print(corr_dataframe)

print(eigenvalues)


# Plot the eigenvalues
plt.bar(range(1, len(eigenvalues) + 1), eigenvalues)
plt.title('Eigenvalues of Instrument-Endogenous Correlation Matrix')
plt.xlabel('Eigenvalue Index')
plt.ylabel('Eigenvalue')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assuming you have two instrument variables and two endogenous variables in pandas Series
# Let's create sample data for demonstration purposes
instrument_var1 = GERMANY.wind_generation.copy()
instrument_var2 = GERMANY.wind_generation.copy().shift(1).rename("Wind_{t-1}")
endogenous_var1 = GERMANY.price.copy()
endogenous_var2 = GERMANY.consumption.copy().shift(1).rename("Demand_{t-1}")

# Combine series into a DataFrame
data = pd.DataFrame({
    'Instrument_Var1': instrument_var1,
    'Instrument_Var2': instrument_var2,
    'Endogenous_Var1': endogenous_var1,
    'Endogenous_Var2': endogenous_var2
})

# Calculate the correlation matrix
corr_matrix = data.corr()

# Extract the relevant part of the correlation matrix
corr_instrument_endogenous = corr_matrix.iloc[:2, 2:]

# Calculate the eigenvalues
eigenvalues = np.linalg.eigvals(corr_instrument_endogenous)

# Plot the eigenvalues
plt.bar(range(1, len(eigenvalues) + 1), eigenvalues)
plt.title('Eigenvalues of Instrument-Endogenous Correlation Matrix')
plt.xlabel('Eigenvalue Index')
plt.ylabel('Eigenvalue')
plt.show()


Summary statistics

In [None]:
# Create a DataFrame from the data
df = pd.concat([GERMANY.consumption, GERMANY.price, GERMANY.wind_generation], axis=1)

# Calculate summary statistics for each variable
summary_stats = df.describe().transpose()

# Add skewness and kurtosis to the summary statistics
summary_stats['Skewness'] = df.skew()
summary_stats['Kurtosis'] = df.kurt()

# Rename columns for clarity
summary_stats.rename(
    columns={
        'mean': 'Mean',
        '50%': 'Median',
        'std': 'STD',
        'min': 'Minimum',
        'max': 'Maximum'
    },
    inplace=True
)

summary_stats = summary_stats.drop(columns=["count", "25%", "75%"])

for col in summary_stats.columns:
    if col in ['Mean', 'Median', 'STD', 'Minimum', 'Maximum']:
        summary_stats[col].iloc[0] = summary_stats[col].iloc[0] / 1000
        summary_stats[col].iloc[2] = summary_stats[col].iloc[2] / 1000
    if col == "Skewness":
        summary_stats[col] = summary_stats[col].apply(lambda x: round(x, 2))
    else:
        summary_stats[col] = summary_stats[col].apply(lambda x: round(x, 1))

# Print the summary statistics table
print(summary_stats)

Magnitude interpretation

In [None]:
print(GERMANY.wind_generation.describe())


In [None]:
std_deviation = GERMANY.wind_generation.std()
skewness = GERMANY.wind_generation.skew()
mean_value = GERMANY.wind_generation.mean()

print("Standard Deviation:", std_deviation)
print("Skewness:", skewness)
print("Mean:", mean_value)

In [None]:
plt.hist(GERMANY.wind_generation, bins='auto', color='#0504aa', alpha=0.7, rwidth=0.85)
plt.title('Histogram of Wind generation')
plt.xlabel('MWh')
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.75)

plt.show()

In [None]:
def filter_series_on_peak(series:pd.Series) -> pd.Series:
    assert isinstance(series.index, pd.DatetimeIndex)
    return series.loc[
    (series.index.hour >= 8) & (series.index.hour < 20)
]

def filter_dataframe_on_peak(df:pd.DataFrame) -> pd.DataFrame:
    assert isinstance(df.index, pd.DatetimeIndex)
    return df.loc[
    (df.index.hour >= 8) & (df.index.hour < 20)
]

In [None]:
exog_list_vars = exog_list(
    series=consumption_analyisis_series, dummy=dummy, price_area=consumption.price_area
)

wind_lags_list = [GERMANY.wind_generation.copy().shift(i).rename(f"wind_generation_t-{i}") for i in range(1, 51)]

civ_wind_exog = pd.concat(exog_list_vars+wind_lags_list, axis=1)

wind = GERMANY.wind_generation
price = GERMANY.price
demand = GERMANY.consumption

on_peak_civ_wind_exog = filter_dataframe_on_peak(civ_wind_exog)
on_peak_wind = filter_series_on_peak(wind)
on_peak_price = filter_series_on_peak(price)
on_peak_demand = filter_series_on_peak(demand)


on_peak_residuals = sm.OLS(endog=on_peak_wind, exog=on_peak_civ_wind_exog, missing='drop', hasconst=True).fit().resid
all_residuals = pd.Series(sm.OLS(endog=wind, exog=civ_wind_exog, missing='drop', hasconst=True).fit().resid)

In [None]:
forward_results = sm.OLS(endog = all_residuals, exog=all_residuals.shift(-1), missing='drop', hasconst=False).fit()

In [None]:
forward_results.summary()

In [None]:
forward_results = sm.OLS(endog = all_residuals, exog=pd.concat([Dummy(all_residuals.index).constant(), all_residuals.shift(-1)], axis = 1), missing='drop', hasconst=True).fit()


In [None]:
forward_results.summary()


In [None]:
res_series = pd.Series(on_peak_residuals)

print(res_series.describe())


In [None]:
std_deviation = res_series.std()
skewness = res_series.skew()
mean_value = res_series.mean()

print("Standard Deviation:", std_deviation)
print("Skewness:", skewness)
print("Mean:", mean_value)

In [None]:
plt.hist(res_series, bins='auto', color='#0504aa', alpha=0.7, rwidth=0.85)
plt.title('Histogram of Residuals')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.75)

plt.show()

In [None]:
first_stage = sm.OLS(on_peak_price, exog=filter_dataframe_on_peak(pd.concat([wind]+wind_lags_list+exog_list_vars, axis=1)), missing='drop').fit()

In [None]:
print(round(first_stage.params['wind_generation'], 6))

In [None]:
ES_civ_wind_estimate = -115.96418771147728
DE_civ_wind_estimate = -310
FR_civ_wind_estimate = -1520.687990039587

civ_wind_estimate = DE_civ_wind_estimate

reg_iv_estimate = 0
resid_wind_sd = res_series.std()
wind_sd = GERMANY.wind_generation.std()
wind_first_stage_estimate = first_stage.params['wind_generation']
price_sd = GERMANY.price.std()

In [None]:
print(resid_wind_sd*wind_first_stage_estimate*DE_civ_wind_estimate)

In [None]:
GERMANY.consumption.std()

In [None]:
18*223/GERMANY.consumption.std()*100

In [None]:
print("The share of price responsive capacity at average on-peak demand: ", round(civ_wind_estimate*resid_wind_sd*wind_first_stage_estimate*100/on_peak_demand.std(), 4), '%')

In [None]:
print("The share of price responsive capacity at maximum demand: ", round(civ_wind_estimate*resid_wind_sd*100/GERMANY.consumption.max(), 4), '%')

In [None]:
print("Responsive capacity using 1 SD of observed price: ", civ_wind_estimate*price_sd)

In [None]:
print("Responsive capacity using 2 SD of observed price: ", civ_wind_estimate*(price_sd*2))

In [None]:
print("Responsive capacity using 1 SD of residual wind: ", civ_wind_estimate*resid_wind_sd*wind_first_stage_estimate)

In [None]:
print("Responsive capacity using 2 SD of residual wind: ", civ_wind_estimate*resid_wind_sd*2*wind_first_stage_estimate)

ACF/PACF

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

def plot_correlation(axs, series: pd.Series, lags: int, pacf_limit=168):
        ax_acf, ax_pacf = axs
        plot_acf(series, lags=lags, ax=ax_acf, markersize=3, bartlett_confint=False, vlines_kwargs={'linewidth':0.6})
        plot_pacf(series, lags=min(lags, pacf_limit), ax=ax_pacf, method="ywm", markersize=3, vlines_kwargs={'linewidth':0.6})

def plot_acf_pacf(series: pd.Series, height:float, width:float, name:str):
    fig, axss = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(width,height))

    plot_correlation(axss.transpose()[0], series, 168)
    assert isinstance(series.index, pd.DatetimeIndex), "Series does not have a pd.DatetimeIndex"
    residuals = sm.OLS(series, exogenous_variables(start=series.index.min(), end=series.index.max(), price_area=GERMANY.area)).fit().resid
    plot_correlation(axss.transpose()[1], residuals, 168)

    axss[0,0].set_title(f'uncontrolled {name}')
    axss[0,1].set_title(f'controlled {name}')
    axss[1,0].set_title('')
    axss[1,1].set_title('')
    axss[0,0].set_ylabel('autocorrelation', size='large')
    axss[1,0].set_ylabel('partial autocorrelation', size='large')
    axss[1,0].set_xlabel('lags')
    axss[1,1].set_xlabel('lags')
    

    fig.tight_layout()
    plt.style.use 
    plt.show()
    

# plot_acf_pacf(consumption(), height=6, width=8, name="demand")


# There is an initial burn-in because of the cumulative effect of AR processes (therefore iloc)
# plot_correlation(equilibria['AR1 & elast'].demand, 168)

In [None]:
fig, axss = plt.subplots(2, 2, sharex=True, sharey='row', figsize=(8, 6))

plt.style.use("seaborn-v0_8-whitegrid")

resid_wind = sm.OLS(GERMANY.wind_generation, exog=exog).fit().resid
resid_demand = sm.OLS(GERMANY.consumption, exog=exog).fit().resid

plot_correlation(axss.transpose()[0], resid_wind, 72)
plot_correlation(axss.transpose()[1], resid_demand, 72)

axss[0,0].set_title(f'Residual wind electricity generation')
axss[0,1].set_title(f'Residual electricity load')
axss[1,0].set_title('')
axss[1,1].set_title('')
axss[0,0].set_ylabel('autocorrelation', size='large')
axss[1,0].set_ylabel('partial autocorrelation', size='large')
axss[1,0].set_xlabel('lags')
axss[1,1].set_xlabel('lags')

# for ax_list in axss:
#     for ax in ax_list:
#         ax.yaxis.grid(visible=True)
#         ax.xaxis.grid(visible=True)

fig.tight_layout()
plt.style.use 

# There is an initial burn-in because of the cumulative effect of AR processes (therefore iloc)
# plot_correlation(equilibria['AR1 & elast'].demand, 168)

plt.show()

Lead CIV Wind

In [None]:
@lru_cache
def exog_list(series: HourlySeries, dummy: Dummy, price_area:PriceArea, minimal:bool=False, constant:bool=True) -> list[pd.Series]:
    '''!!!! Since this function is cached, always copy the return before modifying it e.g. before appending other columns !!!!'''
    if (price_area == Areas.DE1 or price_area == Areas.DE2) and (
            series.timestamps.min()
            < pd.Timestamp("201809302300", tz="Europe/Berlin")
            and pd.Timestamp("201810010000", tz="Europe/Berlin")
            < series.timestamps.max()
        ):
        series1 = HourlySeries(
            interval(
                start=series.timestamps.min(),
                end=pd.Timestamp("201809302300", tz="Europe/Berlin"),
            )
        )
        series2 = HourlySeries(
            interval(
                start=pd.Timestamp("201810010000", tz="Europe/Berlin"),
                end=series.timestamps.max(),
            )
        )
        
        solar_generation = pd.concat(
            [
                series1.solar_generation(price_area=Areas.DE1).div(1000),
                series2.solar_generation(price_area=Areas.DE2).div(1000),
            ]
        ).rename("generation.DE.Solar")
    else:
        solar_generation = series.solar_generation(price_area)

    if minimal:
        return (
            [dummy.constant()]
            + dummy.month()
            + dummy.hour()
            + [solar_generation,
               series.hdd(price_area),
               series.cdd(price_area)]
        )

    base = [dummy.constant()] if constant else []
    return ( base
        + [
            series.gas_price(price_area),
            series.coal_price(),
            series.eua_price(),
            series.sunlight(price_area),
            solar_generation,
            series.hdd(price_area),
            series.cdd(price_area),
        ]
        + dummy.weekhour()
        + dummy.month()
        + dummy.year()

        + [dummy.public_holiday(price_area)]
        + [dummy.school_holiday()]
        + [dummy.week_53()]

    )


In [None]:
x = pd.Series([1, 2, 3, 4, 5])
print([x.shift(i) for i in range(1)])

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.consumption,
                exog=pd.concat([dummy.constant()]+[GERMANY.wind_generation.copy().shift(-i).rename(f"wind_t+{-i}") for i in range (1, 27)] + exog_list(series=consumption_analyisis_series, dummy=dummy, price_area=GERMANY.area, constant=False), axis=1),
                endog=GERMANY.price, 
                instruments=GERMANY.wind_generation).fit(cov_type="kernel")
iv2sls.summary

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define the range for alpha and beta
alpha_values = np.linspace(0, 0.999, num=15)  # Adjust the number of points as needed
beta_values = np.linspace(0, 0.999, num=15)   # Adjust the number of points as needed

# Create a grid of alpha and beta values
alpha_grid, beta_grid = np.meshgrid(alpha_values, beta_values)

# Compute E for each pair of alpha and beta (replace with your actual calculation)
E_values = (alpha_grid*10)*(beta_grid*10)
F_values = (alpha_grid**2*80)+(beta_grid**3*2)

vmin = min(np.min(E_values), np.min(F_values), 0)
vmax = max(np.max(E_values), np.max(F_values), 100)

fig, axs = plt.subplots(1, 2, sharex=False,  sharey=True, figsize=(16, 6))


# Plot the heatmap
im = axs[0].imshow(E_values, cmap='viridis', origin='lower', extent=(0, 1, 0, 1), vmin=vmin, vmax=vmax)
fig.colorbar(im, ax=axs[0], label='Absolute percentage error')
axs[0].set_xlabel('Autocorrelation of demand ($\\alpha^D$)')
axs[0].set_ylabel('Autocorrelation of wind ($\\alpha^W$)')
axs[0].set_title('Naive IV in Model I')

# plt.contour(alpha_grid, beta_grid, E_values, levels=[1], colors='red', linestyles='dashed')
axs[0].contour(alpha_grid, beta_grid, E_values, levels=[5], colors='red', linestyles='dashed')
# plt.contour(alpha_grid, beta_grid, E_values, levels=[10], colors='red', linestyles='dashed')

im2 = axs[1].imshow(F_values, cmap='viridis', origin='lower', extent=(0, 1, 0, 1), vmin=vmin, vmax=vmax)
fig.colorbar(im2, ax=axs[1], label='Absolute percentage error')
axs[1].set_xlabel('Autocorrelation of non-price-responsive demand ($\\alpha^\\mu$)')
axs[1].set_ylabel('Autocorrelation of wind ($\\alpha^W$)')
axs[1].set_title('CIV-D in Model II')

# plt.contour(alpha_grid, beta_grid, E_values, levels=[1], colors='red', linestyles='dashed')
axs[1].contour(alpha_grid, beta_grid, F_values, levels=[5], colors='red', linestyles='dashed')
# plt.contour(alpha_grid, beta_grid, E_values, levels=[10], colors='red', linestyles='dashed')

plt.show()

print(vmax)

Correlation

In [None]:
def correlation_plot(demand, price, title:str):
    slope, intercept = np.polyfit(demand, price, 1)
    plt.scatter(demand, price, label='Data Points', color='b')
    regression_line = [(slope * x) + intercept for x in demand]
    plt.plot(demand, regression_line, label='Regression Line', color='r')
    plt.xlabel('Demand [GW]')
    plt.ylabel('Price [€/MWh]')
    plt.title(title)

    plt.ticklabel_format(style='plain', useOffset=False)

    plt.ylim(min(price)-10, max(price)+10)
    plt.xlim(min(demand)-1, max(demand)+1)

    plt.legend()
    plt.show()

In [None]:
correlation_plot(GERMANY.consumption, GERMANY.price, "Observed values")

In [None]:
correlation_plot(GERMANY.consumption[(GERMANY.consumption.index.hour >= 8) & (GERMANY.consumption.index.hour <= 20)], GERMANY.price[(GERMANY.consumption.index.hour >= 8) & (GERMANY.consumption.index.hour <= 20)], "Observed values")

In [None]:
print(GERMANY.price)

In [None]:
correlation_plot(GERMANY.consumption[(GERMANY.consumption.index.hour >= 20) | (GERMANY.consumption.index.hour <= 8)], GERMANY.price[(GERMANY.consumption.index.hour >= 20) | (GERMANY.consumption.index.hour <= 8)], "Observed values")

In [None]:
resid_consumption = sm.OLS(GERMANY.consumption, exog, missing='drop').fit().resid
resid_price = sm.OLS(GERMANY.price, exog, missing='drop').fit().resid

correlation_plot(resid_consumption, resid_price, "Residuals (full controlling set)")

In [None]:
resid_consumption = sm.OLS(GERMANY.consumption, min_exog, missing='drop').fit().resid
resid_price = sm.OLS(GERMANY.price, min_exog, missing='drop').fit().resid

correlation_plot(resid_consumption, resid_price, "Residuals (minimal controlling set)")

Wind effect on prices

In [None]:
wind = pd.Series(GERMANY.wind_generation)

winds = pd.concat([wind.shift(i).rename(f"wind-t-{i}") for i in range(1, 27)], axis=1)

exog = pd.concat([winds, exog], axis=1)

In [None]:
sm.OLS(endog=GERMANY.price, exog=pd.concat([wind, exog], axis=1), missing='drop').fit(cov_type='HAC', cov_kwds={'maxlags':168}).summary()

In [None]:
price_diff = GERMANY.price.diff(1).rename("price_diff(1)")

wind_diff = GERMANY.wind_generation.diff(1).rename("wind_generation_diff(1)")

In [None]:
exog = pd.concat([wind_diff, exog], axis=1)

In [None]:
reg = sm.OLS(endog=price_diff, exog=exog, missing='drop')

In [None]:
wind_diff.abs().mean()

In [None]:
# reg.fit(cov_type='HAC', cov_kwds={'maxlags':168}).summary()

# Weak instrument analysis from Felix

In [None]:
W_c = pd.Series(sm.OLS(GERMANY.wind_generation, exog, missing='drop').fit().resid).rename("residual_wind")
D_c = pd.Series(sm.OLS(GERMANY.consumption, exog, missing='drop').fit().resid).rename("residual_demand")
P_c = pd.Series(sm.OLS(GERMANY.price, exog, missing='drop').fit().resid).rename("residual_price")
constant = dummy.constant()[D_c.index]

exog_list_return = exog_list(series=consumption_analyisis_series, dummy=dummy, price_area=GERMANY.area, constant=False)

In [None]:
len(exog_list_return)

2SLS, uncontrolled

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.consumption,
                exog=dummy.constant(),
                endog=GERMANY.price, 
                instruments=GERMANY.wind_generation).fit(cov_type="kernel")
iv2sls.summary

2SLS, residuals

In [None]:
iv2sls = IV2SLS(dependent=D_c,
                exog=None,
                endog=P_c, 
                instruments=W_c).fit(cov_type="kernel")

iv2sls.summary

Demand on Wind, uncontrolled

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.consumption,
                exog=pd.concat([dummy.constant(), GERMANY.wind_generation], axis=1),
                endog=None, 
                instruments=None).fit(cov_type="kernel")
iv2sls.summary

Demand on Wind, residuals

In [None]:
iv2sls = IV2SLS(dependent=D_c,
                exog=pd.concat([W_c], axis=1),
                endog=None, 
                instruments=None).fit(cov_type="kernel")
iv2sls.summary

p{t} ~ controls, wind{t}, constant

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.price,
                exog=pd.concat([constant, GERMANY.wind_generation]+exog_list_return, axis=1),
                endog=None, 
                instruments=None).fit(cov_type="kernel")
iv2sls.summary

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.price,
                exog=pd.concat([constant, GERMANY.wind_generation]+exog_list_return, axis=1),
                endog=None, 
                instruments=None).fit(cov_type="unadjusted")
iv2sls.summary

p{t} ~ controls, wind{t-1}, wind{t}, constant

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.price,
                exog=pd.concat([constant, GERMANY.wind_generation, GERMANY.wind_generation.shift(1).rename("wind_t-1")]+exog_list_return, axis=1),
                endog=None, 
                instruments=None).fit(cov_type="kernel")
iv2sls.summary

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.price,
                exog=pd.concat([constant, GERMANY.wind_generation, GERMANY.wind_generation.shift(1).rename("wind_t-1")]+exog_list_return, axis=1),
                endog=None, 
                instruments=None).fit(cov_type="unadjusted")
iv2sls.summary

p{t} ~ controls, wind{t}, wind{t-1}, ..., W_{t-26} constant

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.price,
                exog=pd.concat([constant, GERMANY.wind_generation] + [GERMANY.wind_generation.shift(i).rename(f"wind_t-{i}") for i in range(1, 27)]+exog_list_return, axis=1),
                endog=None, 
                instruments=None).fit(cov_type="kernel")
iv2sls.summary

In [None]:
iv2sls = IV2SLS(dependent=GERMANY.price,
                exog=pd.concat([constant, GERMANY.wind_generation] + [GERMANY.wind_generation.shift(i).rename(f"wind_t-{i}") for i in range(1, 27)]+exog_list_return, axis=1),
                endog=None, 
                instruments=None).fit(cov_type="unadjusted")
iv2sls.summary

# AR analysis from Felix 

In [None]:
correted_consumption = GERMANY.consumption - GERMANY.price * 230

In [None]:
print(correted_consumption)

In [None]:
print((np.var(GERMANY.consumption)-np.var(correted_consumption))/np.var(GERMANY.consumption)*100)

In [None]:
XX = np.linspace(-500, 0, 1000)

# Calculate correted_consumption using the given formula

YY = [(np.var(GERMANY.consumption) - np.var(GERMANY.consumption + GERMANY.price * x)) / np.var(GERMANY.consumption) * 100 for x in XX]

# Plotting the data
plt.plot(XX, YY, label='Explained variance')
plt.axvline(x=-233, color='red', linestyle='--', label='CIV-Wind current estimate')
plt.axvline(x=XX[YY.index(max(YY))], color='grey', linestyle='--', label='CIV-Wind current estimate')

plt.xlabel('Coefficient')
plt.ylabel('Explained variance')
plt.title('Explained variance as a function of price response coefficient')
plt.legend()
plt.show()

In [None]:
results = ar_model_fit(series=correted_consumption, lags=2, exog=None)
params = ar_model_analysis(results)

print(params.lags_dict)
print(params.error_sd)

In [None]:
results.resid.plot()

In [None]:
durbin_watson(results.resid)

In [None]:
pd.Series(results.resid).rolling(30*24).mean().plot()

In [None]:
residuals = sm.OLS(correted_consumption, exog).fit().resid

results = ar_model_fit(series=residuals, lags=2, exog=exog)
params = ar_model_analysis(results)

print(params.lags_dict)
print(params.error_sd)

In [None]:
results.resid.plot()

In [None]:
pd.Series(results.resid).rolling(30*24).mean().plot()

In [None]:
durbin_watson(results.resid)

Variance explained

In [None]:
EXOG_DF = exogenous_variables(
    start=start, end=end, price_area=consumption.price_area
)

exog_df = pd.concat(exog_list_return + [GERMANY.wind_generation.copy().shift(i) for i in range(1, 27)], axis=1)

resid_cons = sm.OLS(GERMANY.consumption, exog=exog_df, missing='drop').fit().resid
resid_price = sm.OLS(GERMANY.price, exog=exog_df, missing='drop').fit().resid


In [None]:
corrected_consumption_resid = resid_cons - resid_price*233

In [None]:
print((np.var(resid_cons)-np.var(corrected_consumption_resid))/np.var(resid_cons)*100)

# Simulation for weak instruments

In [None]:
eq = IndEquilibrium(
    supply=Supply(
        Areas.DE2,
        wind=get_wind_series(wind_type=WindOptions.SYNTHETIC_AR_N,
                             series=consumption_analyisis_series,
                             price_area=Areas.DE2,
                             synthetic_nlags=26,
                             use_default_series_for_wind=False),
        error=RandomCached.normal(0, 1)
    ),
    demand=IndividualDemand(base_constant=consumption().mean(),
                            lags={-1:0.897},
                            elasticity=-20,
                            cross_price_elasticity=50,
                            error=RandomCached.normal(0, 1000),
                            average_supply_price=60),
    times=consumption_analysis_interval
)

2SLS, uncontrolled

In [None]:
iv2sls = IV2SLS(dependent=eq.clearing.demand,
                exog=dummy.constant()[eq.clearing.demand.index],
                endog=eq.clearing.price, 
                instruments=eq.supply.wind[eq.clearing.demand.index]).fit(cov_type="kernel")
iv2sls.summary

p{t} ~ controls, wind{t}, constant

In [None]:
from model import ModelData, treat_nan

In [None]:
dep, exog, endog, inst = treat_nan([eq.clearing.price, 
                                    pd.concat([constant, eq.supply.wind], axis=1), 
                                    None,
                                    None])

iv2sls = IV2SLS(dependent=dep,
                exog=exog,
                endog=endog, 
                instruments=inst).fit(cov_type="kernel")
iv2sls.summary

p{t} ~ controls, wind{t-1}, wind{t}, constant

In [None]:
dep, exog, endog, inst = treat_nan([eq.clearing.price, 
                                    pd.concat([constant, eq.supply.wind, eq.supply.wind.shift(1).rename(f"{eq.supply.wind.name}_t-{1}")], axis=1), 
                                    None,
                                    None])

iv2sls = IV2SLS(dependent=dep,
                exog=exog,
                endog=endog, 
                instruments=inst).fit(cov_type="unadjusted")
iv2sls.summary

p{t} ~ controls, wind{t}, wind{t-1}, ..., W_{t-26} constant

In [None]:
dep, exog, endog, inst = treat_nan([eq.clearing.price, 
                                    pd.concat([constant, eq.supply.wind]+[eq.supply.wind.shift(i).rename(f"{eq.supply.wind.name}_t-{i}") for i in range(1, 27)], axis=1), 
                                    None,
                                    None])

iv2sls = IV2SLS(dependent=dep,
                exog=exog,
                endog=endog, 
                instruments=inst).fit(cov_type="unadjusted")
iv2sls.summary