## Imports

In [9]:
import pathlib

import pandas as pd
import plotly.express as px

## Environment variables

In [10]:
pd.options.plotting.backend = "plotly"
pd.options.display.float_format = '{:.3f}'.format

## Constants definition

In [3]:
WEATHER_PATH: pathlib.Path = pathlib.Path("../data/predict-energy-behavior-of-prosumers/forecast_weather.csv")

## Data load

- **origin_date** - The date when the day-ahead prices became available.
- **forecast_date** - Represents the start of the 1-hour period when the price is valid
- **euros_per_mwh** - The price of electricity on the day ahead markets in euros per megawatt hour.
- **data_block_id** - All rows sharing the same `data_block_id` will be available at the same forecast time. This is a function of what information is available when forecasts are actually made, at 11 AM each morning. For example, if the forecast weather `data_block_id` for predictins made on October 31st is 100 then the historic weather `data_block_id` for October 31st will be 101 as the historic weather data is only actually available the next day.

In [4]:
weather: pd.DataFrame = pd.read_csv(WEATHER_PATH)
weather.head(10)

Unnamed: 0,latitude,longitude,origin_datetime,hours_ahead,temperature,dewpoint,cloudcover_high,cloudcover_low,cloudcover_mid,cloudcover_total,10_metre_u_wind_component,10_metre_v_wind_component,data_block_id,forecast_datetime,direct_solar_radiation,surface_solar_radiation_downwards,snowfall,total_precipitation
0,57.6,21.7,2021-09-01 02:00:00,1,15.655786,11.553613,0.904816,0.019714,0.0,0.905899,-0.411328,-9.106137,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
1,57.6,22.2,2021-09-01 02:00:00,1,13.003931,10.689844,0.886322,0.004456,0.0,0.886658,0.206347,-5.355405,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
2,57.6,22.7,2021-09-01 02:00:00,1,14.206567,11.671777,0.729034,0.005615,0.0,0.730499,1.451587,-7.417905,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
3,57.6,23.2,2021-09-01 02:00:00,1,14.844507,12.264917,0.336304,0.074341,0.000626,0.385468,1.090869,-9.163999,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
4,57.6,23.7,2021-09-01 02:00:00,1,15.293848,12.458887,0.102875,0.088074,1.5e-05,0.17659,1.268481,-8.975766,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
5,57.6,24.2,2021-09-01 02:00:00,1,15.448999,12.878687,0.0,0.232635,0.053558,0.237198,1.114184,-7.702085,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
6,57.6,24.7,2021-09-01 02:00:00,1,10.723535,10.032739,0.095764,0.222443,0.034836,0.302612,1.374927,-2.276303,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
7,57.6,25.2,2021-09-01 02:00:00,1,10.364893,8.832788,0.469421,0.105103,0.055603,0.528717,1.847339,-2.719419,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
8,57.6,25.7,2021-09-01 02:00:00,1,9.783228,8.720483,0.370117,0.091003,0.035339,0.430389,1.459521,-2.114682,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0
9,57.6,26.2,2021-09-01 02:00:00,1,9.965112,9.094995,0.004456,0.247803,0.01503,0.252289,1.481128,-2.359311,1,2021-09-01 03:00:00,0.0,0.0,0.0,0.0


In [5]:
weather.columns

Index(['latitude', 'longitude', 'origin_datetime', 'hours_ahead',
       'temperature', 'dewpoint', 'cloudcover_high', 'cloudcover_low',
       'cloudcover_mid', 'cloudcover_total', '10_metre_u_wind_component',
       '10_metre_v_wind_component', 'data_block_id', 'forecast_datetime',
       'direct_solar_radiation', 'surface_solar_radiation_downwards',
       'snowfall', 'total_precipitation'],
      dtype='object')

In [6]:
weather.dtypes

latitude                             float64
longitude                            float64
origin_datetime                       object
hours_ahead                            int64
temperature                          float64
dewpoint                             float64
cloudcover_high                      float64
cloudcover_low                       float64
cloudcover_mid                       float64
cloudcover_total                     float64
10_metre_u_wind_component            float64
10_metre_v_wind_component            float64
data_block_id                          int64
forecast_datetime                     object
direct_solar_radiation               float64
surface_solar_radiation_downwards    float64
snowfall                             float64
total_precipitation                  float64
dtype: object

## EDA

### Check database info

In [7]:
weather.info(verbose=True, memory_usage="deep", show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3424512 entries, 0 to 3424511
Data columns (total 18 columns):
 #   Column                             Non-Null Count    Dtype  
---  ------                             --------------    -----  
 0   latitude                           3424512 non-null  float64
 1   longitude                          3424512 non-null  float64
 2   origin_datetime                    3424512 non-null  object 
 3   hours_ahead                        3424512 non-null  int64  
 4   temperature                        3424512 non-null  float64
 5   dewpoint                           3424512 non-null  float64
 6   cloudcover_high                    3424512 non-null  float64
 7   cloudcover_low                     3424512 non-null  float64
 8   cloudcover_mid                     3424512 non-null  float64
 9   cloudcover_total                   3424512 non-null  float64
 10  10_metre_u_wind_component          3424512 non-null  float64
 11  10_metre_v_wind_componen

In [11]:
weather.describe()

Unnamed: 0,latitude,longitude,hours_ahead,temperature,dewpoint,cloudcover_high,cloudcover_low,cloudcover_mid,cloudcover_total,10_metre_u_wind_component,10_metre_v_wind_component,data_block_id,direct_solar_radiation,surface_solar_radiation_downwards,snowfall,total_precipitation
count,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424512.0,3424510.0,3424512.0,3424512.0
mean,58.65,24.95,24.5,5.744,2.412,0.395,0.435,0.359,0.682,1.255,0.725,319.0,151.188,110.764,0.0,0.0
std,0.687,2.016,13.853,7.844,7.121,0.444,0.439,0.42,0.401,3.995,4.224,183.886,256.507,187.444,0.0,0.0
min,57.6,21.7,1.0,-27.499,-29.684,0.0,0.0,0.0,0.0,-17.577,-22.116,1.0,-0.773,-0.326,-0.0,-0.0
25%,58.125,23.2,12.75,0.261,-2.364,0.0,0.0,0.0,0.265,-1.467,-1.978,160.0,0.0,0.0,0.0,0.0
50%,58.65,24.95,24.5,4.873,1.836,0.089,0.231,0.101,0.977,1.469,0.943,319.0,0.0,0.604,0.0,0.0
75%,59.175,26.7,36.25,11.146,7.302,0.978,0.999,0.9,1.0,3.814,3.508,478.0,212.845,144.172,0.0,0.0
max,59.7,28.2,48.0,31.811,23.681,1.0,1.0,1.0,1.0,22.573,19.314,637.0,954.422,848.714,0.005,0.017


## Conclusion

1. molto semplice, bisogna solo convertire le colonne