Windwerte und Niederschlag historisch und aktuell aus https://www.dwd.de/DE/leistungen/klimadatendeutschland/klarchivstunden.html

* stundenwerte_FF_02014_akt.zip (Wind, Hannover, aktuell)
* stundenwerte_FF_02014_hist.zip (Wind, Hannover, historisch)
* stundenwerte_RR_02014_akt.zip (Niederschlag, Hannover, aktuell)
* stundenwerte_RR_02014_hist.zip (Niederschlag, Hannover, historisch)

In [25]:
import os
from dotenv import load_dotenv
from zipfile import ZipFile 
import pandas as pd

load_dotenv()
 
existing_files = os.listdir(os.environ['DWD_DATA'])
Hannover_code = '02014'
raw_dataframes = {}
for measure in ['FF','RR']:
    raw_dataframes[measure] = {}
    for section in ['akt','hist']:
        f = f'stundenwerte_{measure}_{Hannover_code}_{section}.zip'
        assert(f in existing_files)
        with ZipFile(f"{os.environ['DWD_DATA']}/{f}") as zip:
            prod_list = [df for df in zip.namelist() if df.startswith('produkt_')]
            assert(len(prod_list)==1)
            with zip.open(prod_list[0]) as product_file:
                df =  pd.read_csv(product_file, sep=';')
                #print(df.head())
                raw_dataframes[measure][section] = df

raw_dataframes['FF']['hist'].head() # wind historic data sample

Unnamed: 0,STATIONS_ID,MESS_DATUM,QN_3,F,D,eor
0,2014,1950010100,5,2.9,-999,eor
1,2014,1950010101,5,2.4,-999,eor
2,2014,1950010102,5,2.3,-999,eor
3,2014,1950010103,5,2.5,-999,eor
4,2014,1950010104,5,2.3,-999,eor


In [36]:

def get_wind_data():
    timeframe_start, timeframe_end, hour_of_day = 2021050100,2022083123,13

    df = raw_dataframes['FF']['hist']    

    df = df.drop(['STATIONS_ID','QN_3','eor'], axis=1).rename(columns={'   F': 'strength', '   D': 'direction'})
    mask = (df['MESS_DATUM'] >= timeframe_start) & (df['MESS_DATUM'] <= timeframe_end)
    mask = mask & (df['strength'] >= 0) & (df['strength'] < 6) # too strong wind
    mask = mask & (df['MESS_DATUM'] % 100 == hour_of_day) & (df['direction'] >= 0)
    df = df[mask] # reduce size
    
    return df

def get_rain_data():
    timeframe_start, timeframe_end, hour_of_day = 2021050100,2022083123,13

    # R1;stdl. Niederschlagshoehe;mm;
    # RS_IND;Indikator Niederschlag ja/nein;numerischer Code;
    # WRTR;stdl. Niederschlagsform (=Niederschlagshoehe_ind);numerischer Code;

    df = raw_dataframes['RR']['hist']  
    df = df.drop(['STATIONS_ID','QN_8','eor'], axis=1)
    mask = (df['MESS_DATUM'] >= timeframe_start) & (df['MESS_DATUM'] <= timeframe_end)
    mask = mask & (df['MESS_DATUM'] % 100 == hour_of_day)
    df = df[mask] # reduce size

    return df 

df_wind = get_wind_data()
df_rain = get_rain_data()

df_wind.join(df_rain.set_index("MESS_DATUM"), on="MESS_DATUM", validate="1:1" ).head()

#df_wind.head()



Unnamed: 0,MESS_DATUM,strength,direction,R1,RS_IND,WRTR
624821,2021050113,4.6,270,0.0,0,0
624941,2021050613,4.4,260,0.0,1,6
624989,2021050813,4.4,200,0.0,0,0
625061,2021051113,3.6,20,0.0,0,0
625109,2021051313,4.7,310,0.0,0,0
