In [1]:
from wetterdienst import Wetterdienst
from wetterdienst.provider.dwd.observation import DwdObservationRequest
import geopandas
import numpy as np
import pandas as pd

In [2]:
DWD = Wetterdienst(provider="dwd", network="observation")
request = DwdObservationRequest(
    parameter=["climate_summary"],
    resolution="daily",
    period="recent"
)

In [3]:
stations = request.all().df
data = request.all().values.all().df

In [4]:
stations = geopandas.GeoDataFrame(
    stations, geometry=geopandas.points_from_xy(stations.longitude, stations.latitude)
).drop(columns=["longitude", "latitude"]).rename(
    columns={"station_id": "id"}
).set_index(
    "id"
)
stations


Unnamed: 0_level_0,from_date,to_date,height,name,state,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00011,1980-09-01 00:00:00+00:00,2020-09-30 00:00:00+00:00,680.0,Donaueschingen (Landeplatz),Baden-Württemberg,POINT (8.52050 47.97370)
00044,1969-01-01 00:00:00+00:00,2022-07-05 00:00:00+00:00,44.0,Großenkneten,Niedersachsen,POINT (8.23700 52.93360)
00071,1986-11-01 00:00:00+00:00,2019-12-31 00:00:00+00:00,759.0,Albstadt-Badkap,Baden-Württemberg,POINT (8.97840 48.21560)
00073,1959-03-01 00:00:00+00:00,2022-07-05 00:00:00+00:00,340.0,Aldersbach-Kriestorf,Bayern,POINT (13.05060 48.61590)
00078,1961-01-01 00:00:00+00:00,2022-07-05 00:00:00+00:00,65.0,Alfhausen,Niedersachsen,POINT (7.91260 52.48530)
...,...,...,...,...,...,...
15911,2021-06-01 00:00:00+00:00,2022-07-05 00:00:00+00:00,278.0,Großpostwitz-Denkwitz,Sachsen,POINT (14.43430 51.13480)
15976,2020-04-02 00:00:00+00:00,2022-07-05 00:00:00+00:00,216.0,Ober-Olm/Bellem,Rheinland-Pfalz,POINT (8.21070 49.95290)
15978,2020-04-02 00:00:00+00:00,2022-07-05 00:00:00+00:00,316.0,Sembach,Rheinland-Pfalz,POINT (7.86180 49.50440)
19171,2020-09-01 00:00:00+00:00,2022-07-05 00:00:00+00:00,13.0,Hasenkrug-Hardebek,Schleswig-Holstein,POINT (9.85530 54.00380)


In [5]:
stations.to_file("src/assets/stations.geo.json", driver="GeoJSON")


In [14]:
len(data)

4351228

In [16]:
data

Unnamed: 0,station_id,dataset,parameter,date,value,quality
0,00011,climate_summary,wind_gust_max,2020-02-13 00:00:00+00:00,18.00,10.0
1,00011,climate_summary,wind_gust_max,2020-02-14 00:00:00+00:00,10.70,10.0
2,00011,climate_summary,wind_gust_max,2020-02-15 00:00:00+00:00,9.60,10.0
3,00011,climate_summary,wind_gust_max,2020-02-16 00:00:00+00:00,14.60,10.0
4,00011,climate_summary,wind_gust_max,2020-02-17 00:00:00+00:00,16.50,10.0
...,...,...,...,...,...,...
4351223,19172,climate_summary,temperature_air_min_005,2022-07-01 00:00:00+00:00,284.85,1.0
4351224,19172,climate_summary,temperature_air_min_005,2022-07-02 00:00:00+00:00,283.85,1.0
4351225,19172,climate_summary,temperature_air_min_005,2022-07-03 00:00:00+00:00,283.95,1.0
4351226,19172,climate_summary,temperature_air_min_005,2022-07-04 00:00:00+00:00,282.65,1.0


In [20]:
precipitation_form = {
    0: "no precipitation",
    1: "rain",
    4: "unknown",
    6: "rain",
    7: "snow",
    8: "rain and snow",
    9: "error or missing"
}

In [21]:
df = data.pivot(index=["station_id", "date"], columns="parameter", values="value")

In [30]:
df.precipitation_form = (
    df.precipitation_form.apply(
        lambda x: precipitation_form[int(x)] if not np.isnan(x) else x
    )
).astype("category")


In [42]:
for col in [col for col in list(df.columns) if col.startswith("temperature")] :
    df[col] = df[col] - 273.15

In [43]:
df

Unnamed: 0_level_0,parameter,cloud_cover_total,humidity,precipitation_form,precipitation_height,pressure_air_site,pressure_vapor,snow_depth,sunshine_duration,temperature_air_max_200,temperature_air_mean_200,temperature_air_min_005,temperature_air_min_200,wind_gust_max,wind_speed
station_id,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
00011,2020-02-13 00:00:00+00:00,,,,,,,,,,,,,18.0,4.4
00011,2020-02-14 00:00:00+00:00,,,,,,,,,,,,,10.7,3.0
00011,2020-02-15 00:00:00+00:00,,,,,,,,,,,,,9.6,2.8
00011,2020-02-16 00:00:00+00:00,,,,,,,,,,,,,14.6,3.7
00011,2020-02-17 00:00:00+00:00,,,,,,,,,,,,,16.5,4.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19172,2022-07-01 00:00:00+00:00,,86.71,unknown,1.5,,1640.0,,,20.5,16.5,11.7,13.4,,
19172,2022-07-02 00:00:00+00:00,,68.08,no precipitation,0.0,,1360.0,,,22.8,18.0,10.7,12.4,,
19172,2022-07-03 00:00:00+00:00,,73.88,unknown,5.5,,1530.0,,,25.2,18.5,10.8,12.7,,
19172,2022-07-04 00:00:00+00:00,,76.33,no precipitation,0.0,,1370.0,,,20.5,16.0,9.5,11.3,,


In [50]:
df.to_csv("src/assets/data.csv", float_format="%g", date_format="%Y-%m-%d")