In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import sklearn
from math import sqrt

### Import data 

In [7]:
from help_functions import get_energy_data

energyconsumption = get_energy_data.get_data(
    'hourly_ec_20201107_20231107.csv')

# add weekdays, months, hours seperately
# monday = 0, sunday = 6
energyconsumption['weekday'] = energyconsumption.index.weekday
energyconsumption['hour'] = energyconsumption.index.hour
energyconsumption['month'] = energyconsumption.index.month


# create winter/cold dummy variable
energyconsumption['winter'] = energyconsumption['month'].apply(
    lambda x: 1 if x in [10, 11, 12, 1, 2, 3] else 0)

# Define mapping of hours to timeframes (based on graph) and create dummy variable
time_mapping = {
    'low_consumption_time': list(range(7)),  # differs a lot weekend/weekday
    'high_consumption_time': list(range(7, 20)),
    'transition_time': [6, 20, 21, 22, 23]}

for timeframe, hours in time_mapping.items():
    energyconsumption[timeframe] = energyconsumption['hour'].apply(
        lambda x: 1 if x in hours else 0)

# create weekend day dummy variable
energyconsumption['weekend_day'] = energyconsumption['weekday'].apply(
    lambda x: 1 if x in [5,6] else 0)

# drop unneccesary columns
energyconsumption = energyconsumption.drop(columns=['date', 'beginning', 'weekday', 'hour', 'month'])

energyconsumption

Unnamed: 0_level_0,energy_consumption,winter,low_consumption_time,high_consumption_time,transition_time,weekend_day
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-11-07 00:00:00,46.97475,1,1,0,0,1
2020-11-07 01:00:00,45.35550,1,1,0,0,1
2020-11-07 02:00:00,44.66450,1,1,0,0,1
2020-11-07 03:00:00,44.76300,1,1,0,0,1
2020-11-07 04:00:00,44.81150,1,1,0,0,1
...,...,...,...,...,...,...
2023-11-07 06:00:00,59.43725,1,1,0,1,0
2023-11-07 07:00:00,63.89000,1,0,1,0,0
2023-11-07 08:00:00,66.64725,1,0,1,0,0
2023-11-07 09:00:00,68.20925,1,0,1,0,0


### Fit model 

In [33]:
y_ec = energyconsumption['energy_consumption']
X_ec = energyconsumption.drop(
    columns=['energy_consumption', 'low_consumption_time']) #low consumption time as reference time --> drop

# add constant for the intercept term
X_ec = sm.add_constant(X_ec)

X_ec
# fit seasonal linear regression model
model = sm.OLS(y_ec, X_ec).fit()
model.summary()

0,1,2,3
Dep. Variable:,energy_consumption,R-squared:,0.715
Model:,OLS,Adj. R-squared:,0.715
Method:,Least Squares,F-statistic:,16470.0
Date:,"Wed, 08 Nov 2023",Prob (F-statistic):,0.0
Time:,16:43:59,Log-Likelihood:,-80466.0
No. Observations:,26291,AIC:,160900.0
Df Residuals:,26286,BIC:,161000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,45.2099,0.074,611.208,0.000,45.065,45.355
winter,5.8969,0.064,92.576,0.000,5.772,6.022
high_consumption_time,14.9759,0.077,194.505,0.000,14.825,15.127
transition_time,7.9468,0.094,84.116,0.000,7.762,8.132
weekend_day,-9.6217,0.070,-136.610,0.000,-9.760,-9.484

0,1,2,3
Omnibus:,835.742,Durbin-Watson:,0.266
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1030.674
Skew:,-0.382,Prob(JB):,1.56e-224
Kurtosis:,3.597,Cond. No.,5.14


### Calculate forecasts 

In [23]:
# create new dataframe containing future date_times and indepentent variables

last_ts = energyconsumption.index[-1]
horizon = pd.date_range(start=last_ts + pd.DateOffset(
    hours=1), periods=200, freq='H')

energy_forecast = pd.DataFrame({'date_time': horizon})
energy_forecast.set_index('date_time', inplace=True)

energy_forecast['weekday'] = energy_forecast.index.weekday
energy_forecast['hour'] = energy_forecast.index.hour
energy_forecast['month'] = energy_forecast.index.month

# create winter/cold dummy variable
energy_forecast['winter'] = energy_forecast['month'].apply(
    lambda x: 1 if x in [10, 11, 12, 1, 2, 3] else 0)

# time mapping already initialized
for timeframe, hours in time_mapping.items():
    energy_forecast[timeframe] = energy_forecast['hour'].apply(
        lambda x: 1 if x in hours else 0)

# create weekend day dummy variable
energy_forecast['weekend_day'] = energy_forecast['weekday'].apply(
    lambda x: 1 if x in [5,6] else 0)

energy_forecast

# Point forecasts
X_fc = energy_forecast.drop(columns=['weekday', 'hour', 'month','low_consumption_time'])
X_fc = sm.add_constant(X_fc, has_constant='add')
X_fc

# Make predictions
predictions_ec = model.predict(X_fc)
predictions_ec.head(20)

q0.025,q0.25,q0.5,q0.75,q0.975

date_time
2023-11-07 11:00:00    66.082657
2023-11-07 12:00:00    66.082657
2023-11-07 13:00:00    66.082657
2023-11-07 14:00:00    66.082657
2023-11-07 15:00:00    66.082657
2023-11-07 16:00:00    66.082657
2023-11-07 17:00:00    66.082657
2023-11-07 18:00:00    66.082657
2023-11-07 19:00:00    66.082657
2023-11-07 20:00:00    59.053548
2023-11-07 21:00:00    59.053548
2023-11-07 22:00:00    59.053548
2023-11-07 23:00:00    59.053548
2023-11-08 00:00:00    51.106751
2023-11-08 01:00:00    51.106751
2023-11-08 02:00:00    51.106751
2023-11-08 03:00:00    51.106751
2023-11-08 04:00:00    51.106751
2023-11-08 05:00:00    51.106751
2023-11-08 06:00:00    59.053548
dtype: float64

***Erkenntnisse bis jetzt:***
* Eine Referenz-Dummy-Variable muss immer ausgelassen werden 
* Viele verschiedene Kombinationen ausprobiert (alle Stunden miteinbezogen,alle Monate miteinbezogen,..)
* Bestehendes Modell eignet sich gut zum Schätzen laut R^2--> Saisonales Lineares Regressionsmodell 

Nächster Schritt: Quantilregression zur Berechnung der Quantile 


In [46]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

model_qr = sm.QuantReg(y_ec, X_ec)

for q in quantiles:
    
    model_temp = model_qr.fit(q=q)

    # Calculate forecasts for X_fc using the fitted model for the current quantile
    forecast_temp = model_temp.predict(X_fc)

    # Add the forecasts to the energy_forecast DataFrame with a label like 'forecast025'
    energy_forecast[f'forecast{q}'] = forecast_temp

Unnamed: 0_level_0,weekday,hour,month,winter,low_consumption_time,high_consumption_time,transition_time,weekend_day,forecast025,forecast0.025,forecast0.25,forecast0.5,forecast0.75,forecast0.975
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-11-07 11:00:00,1,11,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-07 12:00:00,1,12,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-07 13:00:00,1,13,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-07 14:00:00,1,14,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-07 15:00:00,1,15,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-15 14:00:00,2,14,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-15 15:00:00,2,15,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-15 16:00:00,2,16,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-15 17:00:00,2,17,11,1,0,1,0,0,51.965751,51.965751,62.826253,66.557,70.110998,75.507698


### Extract required forecasts 

In [49]:
# Define the specific date and time combinations
selected_dates = ['2023-11-10 12:00:00', '2023-11-10 16:00:00', '2023-11-10 20:00:00',
                  '2023-11-11 12:00:00', '2023-11-11 16:00:00', '2023-11-11 20:00:00']

forecasting_results = energy_forecast.loc[selected_dates,
                                          'forecast025':'forecast0.975']

forecasting_results

Unnamed: 0_level_0,forecast025,forecast0.025,forecast0.25,forecast0.5,forecast0.75,forecast0.975
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-11-10 12:00:00,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-10 16:00:00,51.965751,51.965751,62.826253,66.557,70.110998,75.507698
2023-11-10 20:00:00,47.158251,47.158251,55.523751,59.529001,63.065508,68.723749
2023-11-11 12:00:00,43.996251,43.996251,52.709002,56.941501,60.883957,65.976003
2023-11-11 16:00:00,43.996251,43.996251,52.709002,56.941501,60.883957,65.976003
2023-11-11 20:00:00,39.188751,39.188751,45.4065,49.913502,53.838467,59.192053


### Create submission table
forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
2022-10-19,DAX,1 day,-3.5,-1.5,0,1.5,3.5
2022-10-19,DAX,2 day,-4,-2,0,2,4
2022-10-19,DAX,5 day,-4,-2,0,2,4
2022-10-19,DAX,6 day,-4,-2,0,2,4
2022-10-19,DAX,7 day,-4,-2,0,2,4
2022-10-19,energy,36 hour,NA,NA,NA,NA,NA
2022-10-19,energy,40 hour,NA,NA,NA,NA,NA
2022-10-19,energy,44 hour,NA,NA,NA,NA,NA
2022-10-19,energy,60 hour,NA,NA,NA,NA,NA
2022-10-19,energy,64 hour,NA,NA,NA,NA,NA
2022-10-19,energy,68 hour,NA,NA,NA,NA,NA
2022-10-19,infections,0 week,1000,1100,1200,1300,1400
2022-10-19,infections,1 week,1000,1100,1200,1300,1400
2022-10-19,infections,2 week,1000,1100,1200,1300,1400
2022-10-19,infections,3 week,1000,1100,1200,1300,1400
2022-10-19,infections,4 week,1000,1100,1200,1300,1400

In [58]:


columns = ['forecast_date', 'target', 'horizon', 'q0.025', 'q0.25', 'q0.5', 'q0.75', 'q0.975']

data = np.full((16, len(columns)), np.nan)
submission_table = pd.DataFrame(data, columns=columns)

submission_table


Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,,,,,,,,
1,,,,,,,,
2,,,,,,,,
3,,,,,,,,
4,,,,,,,,
5,,,,,,,,
6,,,,,,,,
7,,,,,,,,
8,,,,,,,,
9,,,,,,,,


In [79]:
from datetime import datetime, date, timedelta

date_str = datetime.today().strftime('%Y%m%d')
hours = ['36 hour', '40 hour', '44 hour', '60 hour', '64 hour', '68 hour']

df_sub_ec = pd.DataFrame({
    "forecast_date": date_str,
    "target": "energy",
    "horizon": hours,
    "q0.025": forecasting_results.iloc[:, 0],
    "q0.25": forecasting_results.iloc[:, 1],
    "q0.5": forecasting_results.iloc[:, 2],
    "q0.75": forecasting_results.iloc[:, 3],
    "q0.975": forecasting_results.iloc[:, 4]}).reset_index()

df_sub_ec.drop(columns={'date_time'})

Unnamed: 0,date_time,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-11-10 12:00:00,20231108,energy,36 hour,51.965751,51.965751,62.826253,66.557,70.110998
1,2023-11-10 16:00:00,20231108,energy,40 hour,51.965751,51.965751,62.826253,66.557,70.110998
2,2023-11-10 20:00:00,20231108,energy,44 hour,47.158251,47.158251,55.523751,59.529001,63.065508
3,2023-11-11 12:00:00,20231108,energy,60 hour,43.996251,43.996251,52.709002,56.941501,60.883957
4,2023-11-11 16:00:00,20231108,energy,64 hour,43.996251,43.996251,52.709002,56.941501,60.883957
5,2023-11-11 20:00:00,20231108,energy,68 hour,39.188751,39.188751,45.4065,49.913502,53.838467


In [80]:
days = ['1 day', '2 day', '3 day', '4 day', '5 day', '6 day']
df_sub_dax = pd.DataFrame({
    "forecast_date": date_str,
    "target": "DAX",
    "horizon": days,
    "q0.025": np.nan,
    "q0.25": np.nan,
    "q0.5": np.nan,
    "q0.75": np.nan,
    "q0.975": np.nan})

weeks = ['0 week', '1 week', '2 week', '3 week', '4 week']
df_sub_inf = pd.DataFrame({
    "forecast_date": date_str,
    "target": "infections",
    "horizon": weeks,
    "q0.025": np.nan,
    "q0.25": np.nan,
    "q0.5": np.nan,
    "q0.75": np.nan,
    "q0.975": np.nan})

submission_frame = pd.merge(df_sub_dax, df_sub_ec, how='outer')
submission_frame = pd.merge(submission_frame, df_sub_inf, how='outer')
submission_frame.drop(columns='date_time')

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,20231108,DAX,1 day,,,,,
1,20231108,DAX,2 day,,,,,
2,20231108,DAX,3 day,,,,,
3,20231108,DAX,4 day,,,,,
4,20231108,DAX,5 day,,,,,
5,20231108,DAX,6 day,,,,,
6,20231108,energy,36 hour,51.965751,51.965751,62.826253,66.557,70.110998
7,20231108,energy,40 hour,51.965751,51.965751,62.826253,66.557,70.110998
8,20231108,energy,44 hour,47.158251,47.158251,55.523751,59.529001,63.065508
9,20231108,energy,60 hour,43.996251,43.996251,52.709002,56.941501,60.883957


In [86]:
# check 

from check_submissions import check_df

ModuleNotFoundError: No module named 'check_submissions'

In [81]:
submission_frame.to_csv('C:/Users/Maria/Documents/Studium/Pyhton Projekte/PTSFC/submission_files'+date_str, index=False)

In [71]:
# Define the range of rows (5-10) that you want to fill
fill_range = range(5, 11)

# Update the rows in submission_table with the corresponding rows from df_sub
df_sub.columns = submission_table.columns  # Match column names
submission_table.loc[fill_range] = df_sub.values

# Print the updated submission_table
print(submission_table)

ValueError: Length mismatch: Expected axis has 9 elements, new values have 8 elements

In [39]:
# List of quantiles
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

# Loop through the quantiles and fit quantile regression models
quantile_results = {}
for q in quantiles:
    model_q = sm.QuantReg(y_ec, X_ec)
    model_result = model_q.fit(q=q)
    quantile_results[f'q{q}'] = model_result

# Print the summary of each quantile regression model
for q, result in quantile_results.items():
    print(f"Quantile {q}:")
    print(result.summary())

Quantile q0.025:
                         QuantReg Regression Results                          
Dep. Variable:     energy_consumption   Pseudo R-squared:               0.2817
Model:                       QuantReg   Bandwidth:                      0.8689
Method:                 Least Squares   Sparsity:                        94.23
Date:                Wed, 08 Nov 2023   No. Observations:                26291
Time:                        16:47:57   Df Residuals:                    26286
                                        Df Model:                            4
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    37.3073      0.218    171.268      0.000      36.880      37.734
winter                    3.4563      0.182     19.007      0.000       3.100       3.813
high_consumption_time    11.2023      0.220     50.891      0.000     

In [30]:
model.params

const                    45.209899
winter                    5.896852
high_consumption_time    14.975906
transition_time           7.946796
weekend_day              -9.621686
dtype: float64

In [29]:
model.std
#quantiles = np.arange(0.025, 0.25, 0.5, 0.75, 0.975)

AttributeError: 'OLSResults' object has no attribute 'std'