# Data analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from eval_scripts import data_analysis
from eval_scripts import model_eval

### Hierarchical structure

In [None]:
data_analysis.create_hierarchical_sunburst()

### Electricity demand autocorrelation

In [2]:
data_analysis.plot_autocorrelation_over_aggregate_size()

### Weather forecast accuracy

In [3]:
display(data_analysis.create_weather_forecast_df(to_LaTeX=True))

\begin{tabular}{lrrrr}
\toprule
{} &        1-day &        2-day &        3-day &        4-day \\
\midrule
Temperature &  0.09 (0.04) &  0.10 (0.03) &  0.12 (0.04) &  0.13 (0.04) \\
Dew point   &  0.13 (0.04) &  0.14 (0.04) &  0.17 (0.06) &  0.18 (0.06) \\
Wind speed  &  0.22 (0.07) &  0.24 (0.10) &  0.30 (0.10) &  0.33 (0.12) \\
\bottomrule
\end{tabular}



Unnamed: 0,1-day,2-day,3-day,4-day
Temperature,0.09 (0.04),0.10 (0.03),0.12 (0.04),0.13 (0.04)
Dew point,0.13 (0.04),0.14 (0.04),0.17 (0.06),0.18 (0.06)
Wind speed,0.22 (0.07),0.24 (0.10),0.30 (0.10),0.33 (0.12)


In [4]:
# display(data_analysis.create_weather_forecast_df(to_LaTeX=True, winter_period=True))

### Multivariate weather regression

In [5]:
print(data_analysis.weather_OLS('L0', 'Agg')[0])

                            OLS Regression Results                            
Dep. Variable:     Energy demand (L0)   R-squared:                       0.890
Model:                            OLS   Adj. R-squared:                  0.889
Method:                 Least Squares   F-statistic:                     727.9
Date:                Thu, 02 Dec 2021   Prob (F-statistic):          9.43e-171
Time:                        14:48:30   Log-Likelihood:                -1758.3
No. Observations:                 364   AIC:                             3527.
Df Residuals:                     359   BIC:                             3546.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Constant      549.8159      1.600    343.716      

In [6]:
# print(data_analysis.weather_OLS('L0', 'Agg', forecast=True)[0])

In [7]:
print(data_analysis.weather_OLS('L0', 'Agg', include_seasonality=False)[0])

                            OLS Regression Results                            
Dep. Variable:     Energy demand (L0)   R-squared:                       0.854
Model:                            OLS   Adj. R-squared:                  0.853
Method:                 Least Squares   F-statistic:                     702.5
Date:                Thu, 02 Dec 2021   Prob (F-statistic):          4.69e-150
Time:                        14:48:32   Log-Likelihood:                -1810.0
No. Observations:                 364   AIC:                             3628.
Df Residuals:                     360   BIC:                             3644.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Constant      549.8159      1.842    298.564      

In [8]:
# print(data_analysis.weather_OLS('L0', 'Agg', include_seasonality=False, forecast=True)[0])

In [9]:
print('Temperature vs. Temperature + Dew Point')
print(f"{data_analysis.weather_OLS('L0', 'Agg', weather_variables=['temperature'], include_seasonality=False)[1]:.3f}")
print(f"{data_analysis.weather_OLS('L0', 'Agg', weather_variables=['temperature', 'dew_point'], include_seasonality=False)[1]:.3f}")
print()
print('Temperature + Seasonality vs. Temperature + Dew Point + Seasonality')
print(f"{data_analysis.weather_OLS('L0', 'Agg', weather_variables=['temperature'], include_seasonality=True)[1]:.3f}")
print(f"{data_analysis.weather_OLS('L0', 'Agg', weather_variables=['temperature', 'dew_point'], include_seasonality=True)[1]:.3f}")
print()

Temperature vs. Temperature + Dew Point
0.802
0.847

Temperature + Seasonality vs. Temperature + Dew Point + Seasonality
0.859
0.888



In [10]:
#print('Temperature forecast vs. Temperature forecast + Dew Point forecast')
#print(f"{data_analysis.weather_OLS('L0', 'Agg', weather_variables=['temperature'], include_seasonality=False, forecast=True)[1]:.3f}")
#print(f"{data_analysis.weather_OLS('L0', 'Agg', weather_variables=['temperature', 'dew_point'], include_seasonality=False, forecast=True)[1]:.3f}")
#print()
#print('Temperature forecast + Seasonality vs. Temperature forecast + Dew Point forecast + Seasonality')
#print(f"{data_analysis.weather_OLS('L0', 'Agg', weather_variables=['temperature'], include_seasonality=True, forecast=True)[1]:.3f}")
#print(f"{data_analysis.weather_OLS('L0', 'Agg', weather_variables=['temperature', 'dew_point'], include_seasonality=True, forecast=True)[1]:.3f}")
#print()

In [11]:
print(data_analysis.weather_OLS('L0', 'Agg', weather_variables=[])[0])

                            OLS Regression Results                            
Dep. Variable:     Energy demand (L0)   R-squared:                       0.800
Model:                            OLS   Adj. R-squared:                  0.799
Method:                 Least Squares   F-statistic:                     1447.
Date:                Thu, 02 Dec 2021   Prob (F-statistic):          1.54e-128
Time:                        14:48:47   Log-Likelihood:                -1867.5
No. Observations:                 364   AIC:                             3739.
Df Residuals:                     362   BIC:                             3747.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Constant      549.8159      2.151    255.657      

In [12]:
# print(data_analysis.auto_OLS('L0', 'Agg')[0])

### Correlation of weather variables and electricity demand

In [14]:
data_analysis.plot_correlation('temperature', 'L0', 'Agg')
# data_analysis.plot_correlation('temperature', 'L0', 'Agg', forecast=True)

In [15]:
data_analysis.plot_correlation('wind_speed', 'L0', 'Agg')
# data_analysis.plot_correlation('wind_speed', 'L0', 'Agg', forecast=True)

In [16]:
data_analysis.plot_correlation('dew_point', 'L0', 'Agg')
# data_analysis.plot_correlation('dew_point', 'L0', 'Agg', forecast=True)

In [17]:
data_analysis.plot_weather_correlation_per_half_hour('L0', 'Agg')
# data_analysis.plot_weather_correlation_per_half_hour('L0', 'Agg', forecast=True)

In [18]:
data_analysis.plot_weather_correlation_over_aggregate_size()
# data_analysis.plot_weather_correlation_over_aggregate_size(forecast=True)

### Temperature seasonality

In [19]:
data_analysis.plot_temperature_seasonality()
# data_analysis.plot_temperature_seasonality(forecast=True)

R^2 = 0.748
