## KNMI get data examples


#### External import libraries

In [1]:
import datetime

#### Internal import libraries

In [2]:
import knmi_update_metadata

import knmi_meteo_ingest
import knmi_meteo_transform

In [3]:
# Optional: uncomment line below for more info on script content
# help(knmi_meteo_ingest)
# help(knmi_update_metadata)

#### Optional: update all metadata files
To run the cell below, a stable internet connection and the availability of the KNMI data retrieval service using a script are required. This is also the case for all subsequent data retrieval procedures.

Runing this cell is <u>optional</u>, since the metadata is not prone to change (significantly) over time.

#### Improvement suggestions (for production environments)
Since no version control is used for the data in the files, this update simply <strong>overwrites</strong> the existing metadata files in the <code>metadata</code> folder, discarding the old content.

In a production environment, it is recommended to use e.g. tables instead of JSON files and to make use of one of the SCD (Slowly Changing Dimension) types instead. For more infomation on SCD types to use in that case, see for example: https://en.wikipedia.org/wiki/Slowly_changing_dimension. 

Furthermore, KNMI offers more stable (production-ready) data products through the use of API keys, available on request by contacting KNMI (site in Dutch): https://www.knmi.nl/over-het-knmi/contact/contactformulier

In [4]:
# Update hourly, daily parameter and meteo station metadata files
knmi_update_metadata.knmi_update_all_metadata()

Updating daily parameter metadata...
Success.
Updating hourly parameter metadata...
Success.
Updating meteo station metadata...
Success.


### Data retrieval examples

#### Example 1: Retrieve all daily data from all KNMI meteo stations for a full year
In the cells below we will retrieve data for all daily parameters for all KNMI automatic meteo stations for a full year. 

We do so by setting <code>start_date</code> and <code>end_date</code>, while keeping <code>meteo_stns_list</code>, <code>meteo_params_list</code> and <code>mode</code> (default value: <code>'day'</code>) undefined.

In [3]:
# Set start and end dates (inclusive) for data retrieval
start_date = datetime.date(2023, 1, 1)
end_date = datetime.date(2023, 12, 31)

# Optional: uncomment below to make end_date exclusive
# end_date -= datetime.timedelta(days=1)

In [4]:
# Get dataset from KNMI web script service
df_day = knmi_meteo_ingest.knmi_meteo_to_df(meteo_stns_list=None,
                                            meteo_params_list=None,
                                            start_date=start_date,
                                            end_date=end_date)

In [5]:
# Show the result
df_day

Unnamed: 0,STN,YYYYMMDD,DDVEC,FHVEC,FG,FHX,FHXH,FHN,FHNH,FXX,...,VVNH,VVX,VVXH,NG,UG,UX,UXH,UN,UNH,EV24
0,209,20230101,215.0,92.0,96.0,140.0,1.0,30.0,22.0,200.0,...,,,,,,,,,,
1,209,20230102,266.0,61.0,69.0,80.0,2.0,50.0,1.0,100.0,...,,,,,,,,,,
2,209,20230103,196.0,83.0,88.0,150.0,22.0,30.0,3.0,200.0,...,,,,,,,,,,
3,209,20230104,233.0,137.0,141.0,170.0,2.0,120.0,18.0,220.0,...,,,,,,,,,,
4,209,20230105,245.0,82.0,90.0,120.0,2.0,60.0,13.0,160.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17150,391,20231227,176.0,27.0,31.0,50.0,18.0,10.0,1.0,110.0,...,,,,,82.0,95.0,1.0,53.0,21.0,2.0
17151,391,20231228,225.0,48.0,48.0,60.0,11.0,40.0,1.0,130.0,...,,,,,72.0,79.0,4.0,67.0,17.0,3.0
17152,391,20231229,236.0,49.0,50.0,60.0,1.0,40.0,13.0,150.0,...,,,,,81.0,92.0,13.0,72.0,1.0,1.0
17153,391,20231230,213.0,30.0,35.0,50.0,1.0,30.0,6.0,90.0,...,,,,,80.0,87.0,8.0,73.0,14.0,3.0


In [6]:
df_day[~df_day['EV24'].isna()]

Unnamed: 0,STN,YYYYMMDD,DDVEC,FHVEC,FG,FHX,FHXH,FHN,FHNH,FXX,...,VVNH,VVX,VVXH,NG,UG,UX,UXH,UN,UNH,EV24
31,215,20230101,213.0,70.0,74.0,120.0,4.0,30.0,21.0,200.0,...,21.0,80.0,3.0,8.0,80.0,96.0,21.0,67.0,2.0,3.0
32,215,20230102,245.0,34.0,38.0,60.0,1.0,10.0,24.0,120.0,...,4.0,70.0,9.0,6.0,90.0,96.0,5.0,82.0,12.0,2.0
33,215,20230103,187.0,53.0,55.0,100.0,21.0,10.0,1.0,160.0,...,4.0,70.0,13.0,7.0,90.0,99.0,4.0,81.0,21.0,2.0
34,215,20230104,233.0,105.0,107.0,120.0,5.0,90.0,15.0,210.0,...,8.0,70.0,6.0,8.0,86.0,93.0,1.0,78.0,20.0,2.0
35,215,20230105,242.0,62.0,67.0,90.0,2.0,40.0,16.0,150.0,...,16.0,70.0,22.0,8.0,87.0,95.0,16.0,81.0,12.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1452,391,20230127,25.0,34.0,35.0,50.0,4.0,20.0,1.0,100.0,...,,,,,80.0,91.0,6.0,74.0,15.0,3.0
1453,391,20230128,34.0,10.0,15.0,30.0,10.0,0.0,16.0,50.0,...,,,,,78.0,95.0,24.0,68.0,5.0,3.0
1454,391,20230129,221.0,26.0,26.0,50.0,24.0,10.0,1.0,90.0,...,,,,,84.0,95.0,2.0,75.0,19.0,1.0
1455,391,20230130,264.0,36.0,38.0,50.0,1.0,20.0,19.0,100.0,...,,,,,81.0,94.0,5.0,64.0,14.0,4.0


In [7]:
df_stns = knmi_meteo_ingest.knmi_load_meteo_stations()

In [8]:
df_stns

Unnamed: 0,STN,LON(east),LAT(north),ALT(m),NAME
0,209,4.518,52.465,0.0,IJmond
1,210,4.43,52.171,-0.2,Valkenburg Zh
2,215,4.437,52.141,-1.1,Voorschoten
3,225,4.555,52.463,4.4,IJmuiden
4,235,4.781,52.928,1.2,De Kooy
5,240,4.79,52.318,-3.3,Schiphol
6,242,4.921,53.241,10.8,Vlieland
7,248,5.174,52.634,0.8,Wijdenes
8,249,4.979,52.644,-2.4,Berkhout
9,251,5.346,53.392,0.7,Hoorn Terschelling


In [6]:
df_day_cleaned = knmi_meteo_transform.transform_param_values(df_day)

In [7]:
df_day_cleaned

Unnamed: 0,station_code,date,vect_avg_wind_dir,vect_avg_wind_speed,day_avg_wind_speed,max_hour_avg_wind_speed,hour_slot_max_avg_wind_speed,min_hour_avg_wind_speed,hour_slot_min_avg_wind_speed,max_gust_speed,...,hour_slot_min_visibility,max_visibility_cat,hour_slot_min_visibility.1,cloudiness_in_eights_cat,day_avg_humidity,max_humidity,hour_slot_max_humidity,min_humidity,hour_slot_min_humidity,evap_ref
0,209,20230101,215.0,9.2,9.6,14.0,1.0,3.0,22.0,20.0,...,,,,,,,,,,
1,209,20230102,266.0,6.1,6.9,8.0,2.0,5.0,1.0,10.0,...,,,,,,,,,,
2,209,20230103,196.0,8.3,8.8,15.0,22.0,3.0,3.0,20.0,...,,,,,,,,,,
3,209,20230104,233.0,13.7,14.1,17.0,2.0,12.0,18.0,22.0,...,,,,,,,,,,
4,209,20230105,245.0,8.2,9.0,12.0,2.0,6.0,13.0,16.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17150,391,20231227,176.0,2.7,3.1,5.0,18.0,1.0,1.0,11.0,...,,,,,0.82,0.95,1.0,0.53,21.0,0.2
17151,391,20231228,225.0,4.8,4.8,6.0,11.0,4.0,1.0,13.0,...,,,,,0.72,0.79,4.0,0.67,17.0,0.3
17152,391,20231229,236.0,4.9,5.0,6.0,1.0,4.0,13.0,15.0,...,,,,,0.81,0.92,13.0,0.72,1.0,0.1
17153,391,20231230,213.0,3.0,3.5,5.0,1.0,3.0,6.0,9.0,...,,,,,0.80,0.87,8.0,0.73,14.0,0.3


In [8]:
df_day_cleaned.columns

Index(['station_code', 'date', 'vect_avg_wind_dir', 'vect_avg_wind_speed',
       'day_avg_wind_speed', 'max_hour_avg_wind_speed',
       'hour_slot_max_avg_wind_speed', 'min_hour_avg_wind_speed',
       'hour_slot_min_avg_wind_speed', 'max_gust_speed',
       'hour_slot_max_gust_speed', 'day_avg_temp', 'min_temp',
       'hour_slot_min_temp', 'max_temp', 'hour_slot_max_temp', 'min_temp_10cm',
       'six_hour_slot_min_temp_10cm', 'sunshine_hours',
       'sunshine_day_fraction', 'global_irradiation', 'rain_hours', 'rain_sum',
       'max_rain_hour_sum', 'hour_slot_max_rain_hour_sum', 'day_avg_pressure',
       'max_hour_pressure', 'hour_slot_max_pressure', 'min_hour_pressure',
       'hour_slot_min_pressure', 'min_visibility_cat',
       'hour_slot_min_visibility', 'max_visibility_cat',
       'hour_slot_min_visibility', 'cloudiness_in_eights_cat',
       'day_avg_humidity', 'max_humidity', 'hour_slot_max_humidity',
       'min_humidity', 'hour_slot_min_humidity', 'evap_ref'],
      d