# Exploratory analysis of combined dataset

In [2]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [8]:
# Set max number of columns to display; default 20
pd.options.display.max_columns = 50

In [4]:
# Paths to data directories
cwd_path = Path.cwd()
data_path = cwd_path.parent.joinpath('data')
data_push_path = cwd_path.parent.joinpath('data_to_push')

#### Read in data 

In [5]:
df = pd.read_pickle(data_push_path / 'df_main_smard_era5_final.pkl')
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 48168 entries, 2018-01-01 00:00:00+00:00 to 2023-06-30 23:00:00+00:00
Data columns (total 33 columns):
 #   Column                                          Non-Null Count  Dtype              
---  ------                                          --------------  -----              
 0   hour                                            48168 non-null  int32              
 1   day_of_week                                     48168 non-null  int32              
 2   day_of_month                                    48168 non-null  int32              
 3   month_number                                    48168 non-null  int32              
 4   year                                            48168 non-null  int32              
 5   meteorological_season                           48168 non-null  object             
 6   turbines_in_operation                           48168 non-null  int64              
 7   total_nominal_capacity_operational_tur

In [6]:
df.isna().sum()

hour                                                 0
day_of_week                                          0
day_of_month                                         0
month_number                                         0
year                                                 0
meteorological_season                                0
turbines_in_operation                                0
total_nominal_capacity_operational_turbines_mw       0
datetime_cet                                         0
actual_generated_smard_mwh                           0
day_ahead_price_eur_mwh                           6550
total_nominal_capacity_smard_mw                      0
forecasted_generation_smard_mwh                     24
total_net_load_smard_mwh                             0
residual_load_smard_mwh                              0
mean_wind_speed_10m                                  0
mean_wind_speed_100m                                 0
wind_direction_angle_10m                             0
wind_direc

In [9]:
df.sample(10)

Unnamed: 0_level_0,hour,day_of_week,day_of_month,month_number,year,meteorological_season,turbines_in_operation,total_nominal_capacity_operational_turbines_mw,datetime_cet,actual_generated_smard_mwh,day_ahead_price_eur_mwh,total_nominal_capacity_smard_mw,forecasted_generation_smard_mwh,total_net_load_smard_mwh,residual_load_smard_mwh,mean_wind_speed_10m,mean_wind_speed_100m,wind_direction_angle_10m,wind_direction_angle_100m,mean_sea_level_pressure_mb,wind_gusts_10m,temp_2m_celsius,wind_direction_intercardinal_10m,wind_direction_intercardinal_100m,weighted_temp_2m_celsius,weighted_wind_gusts_10m,weighted_mean_sea_level_pressure_mb,weighted_mean_wind_speed_10m,weighted_mean_wind_speed_100m,weighted_mean_wind_direction_angle_10m,weighted_mean_wind_direction_angle_100m,weighted_wind_direction_intercardinal_10m,weighted_wind_direction_intercardinal_100m
datetime_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
2019-09-01 11:00:00+00:00,11,6,1,9,2019,autumn,28332,52502.721817,2019-09-01 13:00:00+02:00,8900.5,30.23,52792.0,7693.5,49168.5,20061.5,3.617245,4.900414,285.856293,286.265717,1012.734063,9.16702,22.123041,WNW,WNW,21.539017,9.316106,1012.409757,3.832643,5.227208,289.356242,290.068237,WNW,WNW
2018-03-13 04:00:00+00:00,4,1,13,3,2018,spring,27619,50207.660277,2018-03-13 05:00:00+01:00,18308.0,,51633.0,17452.75,56514.75,36476.0,4.763721,8.463181,237.146149,240.304749,998.085156,9.495407,6.864465,WSW,WSW,6.927823,8.646663,996.056484,4.43266,7.903172,243.264784,243.818451,WSW,WSW
2020-12-22 12:00:00+00:00,12,1,22,12,2020,winter,28782,54230.140577,2020-12-22 13:00:00+01:00,31632.25,34.11,53184.0,26366.25,64434.75,24746.25,6.41503,10.551105,253.00885,255.094452,1012.290547,13.243778,11.531213,WSW,WSW,11.533518,12.920934,1010.14057,6.453695,10.510547,259.877278,261.874151,W,W
2018-08-04 17:00:00+00:00,17,5,4,8,2018,summer,28017,51465.382777,2018-08-04 19:00:00+02:00,7622.75,,51633.0,8212.25,52720.5,41170.75,3.278516,4.623217,269.008423,266.735596,1016.803984,6.425434,28.724634,W,W,27.919939,7.343608,1017.182508,3.813612,5.332658,299.756802,299.602105,WNW,WNW
2023-06-26 20:00:00+00:00,20,0,26,6,2023,summer,29412,59256.325137,2023-06-26 22:00:00+02:00,14650.0,129.37,57590.0,16628.0,51719.75,32477.0,4.275035,7.239828,303.3703,306.050415,1016.858438,8.239814,19.119867,WNW,NW,18.282425,8.487698,1016.641374,4.475383,7.478352,300.97427,303.416959,WNW,WNW
2019-12-26 02:00:00+00:00,2,3,26,12,2019,winter,28486,53028.803667,2019-12-26 03:00:00+01:00,7149.25,25.58,52792.0,7643.0,37668.75,28673.75,2.67301,4.994273,257.324341,268.145966,1024.668594,5.113494,3.556482,WSW,W,4.128747,5.430711,1023.99843,2.910379,5.516359,266.28573,276.580678,W,W
2020-04-22 16:00:00+00:00,16,2,22,4,2020,spring,28573,53415.143367,2020-04-22 18:00:00+02:00,18805.75,18.05,53184.0,19015.5,58201.75,27820.25,5.209209,7.705105,74.374466,75.853302,1020.56875,10.764341,17.947321,ENE,ENE,17.580117,10.640238,1022.310299,5.232407,7.671119,75.773675,77.265985,ENE,ENE
2021-03-07 00:00:00+00:00,0,6,7,3,2021,spring,28779,54540.969077,2021-03-07 01:00:00+01:00,11123.5,41.75,54499.0,10768.5,47556.0,31456.75,2.70394,5.042383,179.917297,188.626099,1028.370156,4.905696,-0.510779,S,S,0.575752,5.773412,1028.069257,3.203478,5.9016,219.063963,224.905921,SW,SW
2023-01-25 21:00:00+00:00,21,2,25,1,2023,winter,29257,58009.809497,2023-01-25 22:00:00+01:00,1545.5,163.61,57590.0,3869.25,56271.0,52175.75,1.775975,2.711616,196.23761,201.880173,1026.304531,3.322715,-1.432074,SSW,SSW,-1.417256,3.976094,1026.165761,2.165388,3.353924,219.752287,225.05151,SW,SW
2020-06-15 17:00:00+00:00,17,0,15,6,2020,summer,28617,53550.314167,2020-06-15 19:00:00+02:00,3021.75,43.52,53184.0,3309.25,57525.5,51607.0,2.356881,3.69256,214.299255,214.678894,1016.013438,4.742115,19.347345,SW,SW,20.55267,4.673149,1015.672684,2.406078,3.603022,188.694773,187.911518,S,S
