In [109]:
#setup:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

from wetterdienst.provider.dwd.observation import DwdObservationRequest, DwdObservationDataset, DwdObservationPeriod, DwdObservationResolution
# from wetterdienst import Wetterdienst, Resolution, Period, Parameter
from wetterdienst.provider.dwd.mosmix import(
    DwdMosmixRequest,
    DwdForecastDate,
    DwdMosmixType,
    )

from pprint import pprint

In [110]:
def output_section(title, data):  # pragma: no cover
    print("-" * len(title))
    print(title)
    print("-" * len(title))
    print(data)
    print()

In [120]:
from wetterdienst import Settings
Settings.tidy = True
Settings.humanize =True
Settings.si_units = True
# API = Wetterdienst(provider="dwd", network="observation")

sample_request = DwdMosmixRequest(
        parameter=["DD", "ww"],
        start_issue=DwdForecastDate.LATEST,  # automatically set if left empty
        mosmix_type=DwdMosmixType.LARGE,
    )


# print(dir(sample_request))
pprint(sample_request.discover())

def mosmix_request(params, station_ids):
    """Retrieve stations_result of mosmix forecast."""
    request = DwdMosmixRequest(
        parameter=[params],
        start_issue=DwdForecastDate.LATEST,  # automatically set if left empty
        mosmix_type=DwdMosmixType.LARGE,
    )
    
    stations = request.filter_by_station_id(station_ids)
    # stations=request
    return stations#["station_id"]


{'large': {'cloud_base_convective': {'origin': 'm', 'si': 'm'},
           'cloud_cover_above_7_km': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_below_1000_ft': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_below_500_ft': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_below_7_km': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_between_2_to_7_km': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_effective': {'origin': 'pct', 'si': 'pct'},
           'cloud_cover_total': {'origin': 'pct', 'si': 'pct'},
           'error_absolute_pressure_air_site': {'origin': 'Pa', 'si': 'Pa'},
           'error_absolute_temperature_air_mean_200': {'origin': 'K',
                                                       'si': 'K'},
           'error_absolute_temperature_dew_point_mean_200': {'origin': 'K',
                                                             'si': 'K'},
           'error_absolute_wind_direction': {'origin': 'deg', 'si': 'deg'},
        

In [122]:
selected_stations=pd.read_pickle("../data/Stations_filtered_ids/selected_stations.pkl")
print(selected_stations.head())
selected_mosmix_ids=selected_stations.mosmix_id.tolist()
selected_historical_ids=selected_stations.Stations_id.tolist()
print(selected_mosmix_ids)
print(selected_historical_ids)

   Stations_id  von_datum  bis_datum  Stationshoehe  geoBreite  geoLaenge  \
0          183   19730101   20220909             42    54.6791    13.4344   
1          701   19490101   20220915              7    53.5332     8.5761   
2          856   19910101   20220915            551    47.8843    12.5404   
3          953   19550101   20220915            481    49.7619     7.0542   
4          963   19940103   20220915             38    52.5881     8.3424   

  Stationsname              Bundesland mosmix_id  
0       Arkona  Mecklenburg-Vorpommern     10091  
1  Bremerhaven                  Bremen     10129  
2     Chieming                  Bayern     10982  
3   Deuselbach         Rheinland-Pfalz     10615  
4     Diepholz           Niedersachsen     10321  
['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731']
[183, 701, 856, 953, 963, 1443, 1605, 1694, 2115, 3231, 4104, 4177]


In [123]:
start_date="2021-01-01 00:00:00+00:00"
end_date="2021-12-31 23:59:00+00:00"
parameters = ["wind_speed", "sunshine_duration", "pressure_air_site_reduced", "temperature_air_mean_200", "cloud_cover_total"]
mosmix_ids=[str(i).zfill(5) for i in selected_mosmix_ids]
historical_ids = [str(i).zfill(5) for i in selected_historical_ids]
print(mosmix_ids)
print(historical_ids)
# date=pd.date_range(start_date, end_date, freq="H")

['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731']
['00183', '00701', '00856', '00953', '00963', '01443', '01605', '01694', '02115', '03231', '04104', '04177']


header = pd.MultiIndex.from_product([station_ids, parameters], names=['station_id', 'parameter'])
total_df=pd.DataFrame(columns=header)

total_df.loc[:,("date", "date")]=date
total_df.loc[:,("date", "timestamp")]=date.astype('int64')//10**9

for parameter in parameters:
    # station = dwd_request(parameter, start_date, end_date, station_ids)
    for id in station_ids:
        print("Receiving: ", parameter, " from station ", id)
        station = dwd_request(parameter, start_date, end_date, id)
        df = station.values.all().df
        # print(df.value.head())
        # total_df.loc[:, (id, parameter)]=df.value.copy()
        data_df[id]=df.value.copy()

#total_df.head(30)

In [129]:
def get_weatherdata(parameter, mosmix_ids, historical_ids):
    data_df=pd.DataFrame(columns = historical_ids)
    print("Receiving: ", parameter, " from stations: \n", mosmix_ids, "\n", historical_ids)
    for n, id in enumerate(mosmix_ids):
        stations = mosmix_request(parameter, id)
        response = stations.values.all()
        df=response.df
        # print(df)
        # print(df.tail())
        data_df[historical_ids[n]]=df.value
    data_df["date"]=df.date
    data_df["timestamp"]=data_df["date"].astype('int64')//10**9
    return data_df


In [137]:
for parameter in parameters:
    data_df = get_weatherdata(parameter, mosmix_ids, historical_ids)
    data_df
    data_df.fillna(0)
    path="../data/weather_forecasts/"+parameter+".pkl"
    print(path)
    data_df.to_pickle(path)


Receiving:  wind_speed  from stations: 
 ['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731'] 
 ['00183', '00701', '00856', '00953', '00963', '01443', '01605', '01694', '02115', '03231', '04104', '04177']
../data/weather_forecasts/wind_speed.pkl
Receiving:  sunshine_duration  from stations: 
 ['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731'] 
 ['00183', '00701', '00856', '00953', '00963', '01443', '01605', '01694', '02115', '03231', '04104', '04177']
../data/weather_forecasts/sunshine_duration.pkl
Receiving:  pressure_air_site_reduced  from stations: 
 ['10091', '10129', '10982', '10615', '10321', '10803', '10365', '10168', '10015', '10548', '10776', '10731'] 
 ['00183', '00701', '00856', '00953', '00963', '01443', '01605', '01694', '02115', '03231', '04104', '04177']
../data/weather_forecasts/pressure_air_site_reduced.pkl
Receiving:  temperature_air_mean_200  from station

In [136]:
parameter=parameters[2]
print(parameter)
path="../data/weather_forecasts/"+parameter+".pkl"
new_df=pd.read_pickle(path)
new_df.describe()

pressure_air_site_reduced


Unnamed: 0,00183,00701,00856,00953,00963,01443,01605,01694,02115,03231,04104,04177,timestamp
count,247.0,247.0,247.0,247.0,247.0,247.0,247.0,247.0,247.0,247.0,247.0,247.0,247.0
mean,100922.186235,100875.82996,101321.376518,101158.097166,100922.550607,101274.777328,101049.230769,100930.97166,100815.1417,101177.206478,101278.340081,101201.902834,1664262000.0
std,727.791568,703.935525,535.088064,541.249704,694.772271,455.716905,695.664138,721.119181,720.255629,615.904954,582.689375,524.277302,257209.0
min,99870.0,99660.0,100220.0,100030.0,99660.0,100260.0,99860.0,99740.0,99590.0,100010.0,100110.0,100050.0,1663819000.0
25%,100330.0,100400.0,101050.0,100930.0,100470.0,101135.0,100595.0,100435.0,100270.0,100845.0,100950.0,100955.0,1664041000.0
50%,100830.0,100800.0,101350.0,101220.0,100910.0,101310.0,101070.0,100900.0,100770.0,101250.0,101330.0,101250.0,1664262000.0
75%,101360.0,101330.0,101580.0,101330.0,101340.0,101440.0,101405.0,101315.0,101315.0,101440.0,101580.0,101420.0,1664483000.0
max,102560.0,102480.0,102580.0,102490.0,102500.0,102430.0,102610.0,102570.0,102420.0,102650.0,102600.0,102450.0,1664705000.0


In [156]:
# new_df=new_df.fillna(0)
# new_df.to_pickle(path)