In [2]:
#setup:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

from wetterdienst.provider.dwd.observation import DwdObservationRequest, DwdObservationDataset, DwdObservationPeriod, DwdObservationResolution
# from wetterdienst import Wetterdienst, Resolution, Period, Parameter
from wetterdienst.provider.dwd.mosmix import(
    DwdMosmixRequest,
    DwdForecastDate,
    DwdMosmixType,
    )

from pprint import pprint

In [3]:
def output_section(title, data):  # pragma: no cover
    print("-" * len(title))
    print(title)
    print("-" * len(title))
    print(data)
    print()

In [4]:
from wetterdienst import Settings
Settings.tidy = True
Settings.humanize =True
Settings.si_units = True
# API = Wetterdienst(provider="dwd", network="observation")

sample_request = DwdMosmixRequest(
        parameter=["DD", "ww"],
        start_issue=DwdForecastDate.LATEST,  # automatically set if left empty
        mosmix_type=DwdMosmixType.LARGE,
    )


# print(dir(sample_request))
pprint(sample_request.discover())

def mosmix_request(params, station_ids):
    """Retrieve stations_result of mosmix forecast."""
    request = DwdMosmixRequest(
        parameter=[params],
        start_issue=DwdForecastDate.LATEST,  # automatically set if left empty
        mosmix_type=DwdMosmixType.LARGE,
    )
    
    stations = request.filter_by_station_id(station_ids)
    # stations=request
    return stations#["station_id"]


{'large': {'cloud_base_convective': {'origin': 'm', 'si': 'm'},
           'cloud_cover_above_7_km': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_below_1000_ft': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_below_500_ft': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_below_7_km': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_between_2_to_7_km': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_effective': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_total': {'origin': 'pct', 'si': 'pct'},
           'error_absolute_pressure_air_site': {'origin': 'Pa', 'si': 'Pa'},
           'error_absolute_temperature_air_mean_200': {'origin': 'K',
                                                       'si': 'K'},
           'error_absolute_temperature_dew_point_mean_200': {'origin': 'K',
                                                             'si': 'K'},
           'error_absolute_wind_direction': {'origin': 'deg', 'si': 'deg'},
        

In [5]:
selected_stations=pd.read_pickle("../data/Stations_filtered_ids/selected_stations.pkl")
print(selected_stations.head())
selected_mosmix_ids=selected_stations.mosmix_id.tolist()
selected_historical_ids=selected_stations.Stations_id.tolist()
print(selected_mosmix_ids)
print(selected_historical_ids)

   Stations_id  von_datum  bis_datum  Stationshoehe  geoBreite  geoLaenge  \
0          183   19730101   20220909             42    54.6791    13.4344   
1          701   19490101   20220915              7    53.5332     8.5761   
2          856   19910101   20220915            551    47.8843    12.5404   
3          953   19550101   20220915            481    49.7619     7.0542   
4          963   19940103   20220915             38    52.5881     8.3424   

  Stationsname              Bundesland mosmix_id  
0       Arkona  Mecklenburg-Vorpommern     10091  
1  Bremerhaven                  Bremen     10129  
2     Chieming                  Bayern     10982  
3   Deuselbach         Rheinland-Pfalz     10615  
4     Diepholz           Niedersachsen     10321  
['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731']
[183, 701, 856, 953, 963, 1443, 1605, 1694, 2115, 3231, 4104, 4177]


In [6]:
start_date="2021-01-01 00:00:00+00:00"
end_date="2021-12-31 23:59:00+00:00"
parameters = ["wind_speed", "sunshine_duration", "pressure_air_site_reduced", "temperature_air_mean_200", "cloud_cover_total"]
mosmix_ids=[str(i).zfill(5) for i in selected_mosmix_ids]
historical_ids = [str(i).zfill(5) for i in selected_historical_ids]
print(mosmix_ids)
print(historical_ids)
# date=pd.date_range(start_date, end_date, freq="H")

['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731']
['00183', '00701', '00856', '00953', '00963', '01443', '01605', '01694', '02115', '03231', '04104', '04177']


header = pd.MultiIndex.from_product([station_ids, parameters], names=['station_id', 'parameter'])
total_df=pd.DataFrame(columns=header)

total_df.loc[:,("date", "date")]=date
total_df.loc[:,("date", "timestamp")]=date.astype('int64')//10**9

for parameter in parameters:
    # station = dwd_request(parameter, start_date, end_date, station_ids)
    for id in station_ids:
        print("Receiving: ", parameter, " from station ", id)
        station = dwd_request(parameter, start_date, end_date, id)
        df = station.values.all().df
        # print(df.value.head())
        # total_df.loc[:, (id, parameter)]=df.value.copy()
        data_df[id]=df.value.copy()

#total_df.head(30)

In [16]:
def get_weatherdata(parameter, mosmix_ids, historical_ids):
    data_df=pd.DataFrame(columns = historical_ids)
    print("Receiving: ", parameter, " from stations: \n", mosmix_ids, "\n", historical_ids)
    for n, id in enumerate(mosmix_ids):
        stations = mosmix_request(parameter, id)
        response = stations.values.all()
        df=response.df
        # print(df)
        # print(df.tail())
        data_df[historical_ids[n]]=df.value
    data_df["date"]=df.date
    data_df = data_df.set_index("date")
    return data_df


In [17]:
for parameter in parameters:
    data_df = get_weatherdata(parameter, mosmix_ids, historical_ids)
    data_df
    data_df.fillna(0)
    path="../data/weather_forecasts/"+parameter+".pkl"
    print(path)
    data_df.to_pickle(path)


Receiving:  wind_speed  from stations: 
 ['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731'] 
 ['00183', '00701', '00856', '00953', '00963', '01443', '01605', '01694', '02115', '03231', '04104', '04177']
../data/weather_forecasts/wind_speed.pkl
Receiving:  sunshine_duration  from stations: 
 ['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731'] 
 ['00183', '00701', '00856', '00953', '00963', '01443', '01605', '01694', '02115', '03231', '04104', '04177']
../data/weather_forecasts/sunshine_duration.pkl
Receiving:  pressure_air_site_reduced  from stations: 
 ['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731'] 
 ['00183', '00701', '00856', '00953', '00963', '01443', '01605', '01694', '02115', '03231', '04104', '04177']
../data/weather_forecasts/pressure_air_site_reduced.pkl
Receiving:  temperature_air_mean_200  from station

In [18]:
parameter=parameters[2]
print(parameter)
path="../data/weather_forecasts/"+parameter+".pkl"
new_df=pd.read_pickle(path)
new_df.describe()
new_df

pressure_air_site_reduced


Unnamed: 0_level_0,00183,00701,00856,00953,00963,01443,01605,01694,02115,03231,04104,04177
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-10-03 10:00:00+00:00,101920.0,102540.0,102730.0,102840.0,102660.0,102730.0,102350.0,102160.0,102480.0,102700.0,102650.0,102850.0
2022-10-03 11:00:00+00:00,101930.0,102540.0,102690.0,102820.0,102660.0,102700.0,102350.0,102160.0,102490.0,102680.0,102640.0,102830.0
2022-10-03 12:00:00+00:00,101940.0,102540.0,102640.0,102760.0,102650.0,102650.0,102340.0,102160.0,102490.0,102650.0,102620.0,102780.0
2022-10-03 13:00:00+00:00,101940.0,102530.0,102590.0,102710.0,102630.0,102600.0,102320.0,102170.0,102480.0,102630.0,102580.0,102730.0
2022-10-03 14:00:00+00:00,101930.0,102510.0,102560.0,102680.0,102590.0,102570.0,102300.0,102160.0,102470.0,102600.0,102560.0,102690.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-13 12:00:00+00:00,101760.0,101590.0,101890.0,101780.0,101690.0,101820.0,101870.0,101600.0,101570.0,101920.0,101760.0,101720.0
2022-10-13 13:00:00+00:00,101740.0,101570.0,101850.0,101750.0,101670.0,101780.0,101820.0,101590.0,101560.0,101890.0,101730.0,101690.0
2022-10-13 14:00:00+00:00,101730.0,101560.0,101830.0,101740.0,101660.0,101760.0,101810.0,101590.0,101530.0,101870.0,101710.0,101660.0
2022-10-13 15:00:00+00:00,101720.0,101560.0,101830.0,101740.0,101650.0,101750.0,101810.0,101590.0,101530.0,101870.0,101710.0,101650.0


In [10]:
# new_df=new_df.fillna(0)
# new_df.to_pickle(path)