# Introduction

Example script for downloading station data.

https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=DD&parameters=DDX&parameters=DDX_FLAG&parameters=DD_FLAG&parameters=FF&parameters=FFAM&parameters=FFAM_FLAG&parameters=FFX&parameters=FFX_FLAG&parameters=FF_FLAG&parameters=GSX&parameters=GSX_FLAG&parameters=HSR&parameters=HSR_FLAG&parameters=HSX&parameters=HSX_FLAG&parameters=P&parameters=P0&parameters=P0_FLAG&parameters=P_FLAG&parameters=QFLAG&parameters=RF&parameters=RF_FLAG&parameters=RR&parameters=RRM&parameters=RRM_FLAG&parameters=RR_FLAG&parameters=SH&parameters=SH_FLAG&parameters=SO&parameters=SO_FLAG&parameters=TB1&parameters=TB1_FLAG&parameters=TB2&parameters=TB2_FLAG&parameters=TB3&parameters=TB3_FLAG&parameters=TL&parameters=TLMAX&parameters=TLMAX_FLAG&parameters=TLMIN&parameters=TLMIN_FLAG&parameters=TL_FLAG&parameters=TP&parameters=TP_FLAG&parameters=TS&parameters=TSMAX&parameters=TSMAX_FLAG&parameters=TSMIN&parameters=TSMIN_FLAG&parameters=TS_FLAG&parameters=ZEITX&parameters=ZEITX_FLAG&start=1992-05-20T00%3A00%3A00&end=2022-05-21T23%3A59%3A59&station_ids=905&output_format=csv&filename=ZEHNMIN+Datensatz_19920520_20220521

**TODOs:**

- create some kind of overview dict or table for the parameters
- use station IDs (download station table and use for selection? or just tell user to look up herself)

# Setup

## Modules

In [1]:
import datetime as dt
from pathlib import Path

import ipywidgets as widgets
import numpy as np
import pandas as pd
from IPython.display import display

from ZAMGdatahub import data_download, metadata, query, utils

## Global variables

In [2]:
# set output directory
# ODIR = "/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/"
# ODIR = Path(ODIR)
ODIR = "."
ODIR = Path(ODIR).resolve()
ODIR = ODIR.joinpath("test_downloads")

if not ODIR.is_dir():
    ODIR.mkdir(parents=True)

# variable
params = [
    "RR",
    "TL",
    "TB1",
    "TB2",
    "TB3",
]  # precipitation, air temperature and ground temperature

params = ["TB1"]  # only soil temperature for testing

var_convert = pd.read_csv("data/ZEHNMIN_convert.csv", index_col=0)["var_id"].to_dict()

maxMonths = 12

overwrite = False
overwriteMerge = False
verbose = True

datetimeformat = "%Y-%m-%d"
start = "1992-08-01"  # inclusive
end = dt.datetime.now().strftime(datetimeformat)

In [3]:
parameter_meta = pd.read_csv(
    Path("data").joinpath("ZEHNMIN_Parameter-Metadaten.csv"), index_col=0
)
parameter_meta.loc[params]

Unnamed: 0,Kurzbeschreibung,Beschreibung,Einheit
TB1,Erdbodentemperatur in 10cm Tiefe,"Erdbodentemperatur in 10cm Tiefe, Basiswert zu...",°C


In [4]:
# dataset type
dataset = query.DatasetType.STATION_10min
print(dataset)

DatasetType.STATION_10min


In [5]:
def getParameterWithFlags(params, parameter_meta):
    params_extra = []
    for key in params:
        params_extra = params_extra + [
            par for par in parameter_meta.index if par[: len(key)] == key
        ]

    params_extra.append("QFLAG")
    return params_extra


params_extra = getParameterWithFlags(params, parameter_meta)
parameter_meta.loc[params_extra]

Unnamed: 0,Kurzbeschreibung,Beschreibung,Einheit
TB1,Erdbodentemperatur in 10cm Tiefe,"Erdbodentemperatur in 10cm Tiefe, Basiswert zu...",°C
TB1_FLAG,Qualitätsflag der Erdbodentemperatur in 10 cm,Qualitätsflag für die Erdbodentemperatur in 10...,code
QFLAG,Qualitätsflag,"Qualitätsflag={<3: Rohdaten, 3: geprüfte Daten}",Code


In [6]:
dropdown_variable = widgets.Dropdown(options=list(var_convert.keys()))
variable = widgets.Output()


def dropdown_variable_eventhandler(change):
    variable.clear_output()
    sel = getParameterWithFlags([var_convert[change.new]], parameter_meta)
    display(parameter_meta.loc[sel])


dropdown_variable.observe(dropdown_variable_eventhandler, names="value")

display(dropdown_variable)

Dropdown(options=('wind direction', 'wind speed', 'global radiation', 'diffuse radiation', 'air pressure', 're…

In [7]:
dropdown_variable.value

'wind direction'

## Make query

In [8]:
gridbox_oetz = query.LatLonBox(
    label="oetztal", lat_min=46.6, lat_max=47.3, lon_min=10.5, lon_max=11.4
)
print(gridbox_oetz)
print()

# gridbox for all Tirol
gridbox_tirol = query.LatLonBox(
    label="tirol", lat_min=46.77, lat_max=47.71, lon_min=9.53, lon_max=13.51
)
print(gridbox_tirol)
print()

LOCATION: oetztal
COORDINATES: latitude: 46.6 ... 47.3 ; longitude 10.5 ... 11.4

LOCATION: tirol
COORDINATES: latitude: 46.77 ... 47.71 ; longitude 9.53 ... 13.51



In [9]:
stations_meta = pd.read_csv(Path("data").joinpath("ZEHNMIN_Stations-Metadaten.csv"))
# find stations within gridbox
def stationsInGridbox(stations_meta, gridbox: query.LatLonBox):
    # find stations in right longitude range
    lon_mask = np.logical_and(
        stations_meta["Länge [°E]"].values < gridbox.lon_max,
        stations_meta["Länge [°E]"].values > gridbox.lon_min,
    )
    # find stations in right latitude range
    lat_mask = np.logical_and(
        stations_meta["Breite [°N]"].values < gridbox.lat_max,
        stations_meta["Breite [°N]"].values > gridbox.lat_min,
    )
    # combine the masks
    mask = np.logical_and(lat_mask, lon_mask)
    return stations_meta[mask]

In [10]:
stations_oetz = stationsInGridbox(stations_meta, gridbox_oetz)
station_ids = stations_oetz.id.values
station_names = stations_oetz.Stationsname.to_list()
station_starts = stations_oetz.Startdatum.to_list()
stations_oetz

Unnamed: 0,id,Synopstationsnummer,Stationsname,Länge [°E],Breite [°N],Höhe [m],Startdatum,Enddatum,Bundesland,Sonnenschein,Globalstrahlung
101,11803,11320,INNSBRUCK-UNIV.,11.384167,47.259998,578.0,2009-07-14,2100-12-31,TIR,ja,ja
102,11804,11120,INNSBRUCK-FLUGPLATZ,11.356667,47.259998,578.0,1992-11-24,2100-12-31,TIR,ja,ja
116,14631,11117,UMHAUSEN,10.928889,47.139168,1035.0,2003-06-01,2100-12-31,TIR,nein,ja
138,17301,11127,OBERGURGL,11.024445,46.866943,1941.0,1999-01-04,2100-12-31,TIR,ja,ja
139,17315,11316,PITZTALER GLETSCHER,10.879167,46.926945,2863.9,1993-12-09,2100-12-31,TIR,ja,ja
140,17320,11318,BRUNNENKOGEL,10.861667,46.912777,3437.0,2002-01-28,2100-12-31,TIR,nein,nein
231,14701,11324,NEUSTIFT/MILDERS,11.291945,47.102779,1007.0,2004-11-04,2100-12-31,TIR,nein,ja
232,14622,11317,ST.LEONHARD/PITZTAL,10.865556,47.027222,1454.4,2007-11-27,2100-12-31,TIR,nein,ja
233,14603,11309,HAIMING,10.889444,47.259724,659.0,2007-08-20,2100-12-31,TIR,ja,ja
234,14513,11115,IMST,10.742222,47.236946,773.0,2007-08-20,2100-12-31,TIR,ja,ja


In [11]:
myQuery = query.stationQuery(
    dataset, params, station_ids, station_names, station_starts
)

print(myQuery)

stationQuery for download of STATION_10min. 
params: ['TB1', 'QFLAG']
dataset: DatasetType.STATION_10min
station_ids: ['11803', '11804', '14631', '17301', '17315', '17320', '14701', '14622', '14603', '14513', '14403']
station_names: ['INNSBRUCK-UNIV.', 'INNSBRUCK-FLUGPLATZ', 'UMHAUSEN', 'OBERGURGL', 'PITZTALER GLETSCHER', 'BRUNNENKOGEL', 'NEUSTIFT/MILDERS', 'ST.LEONHARD/PITZTAL', 'HAIMING', 'IMST', 'LANDECK']
station_starts: ['2009-07-14', '1992-11-24', '2003-06-01', '1999-01-04', '1993-12-09', '2002-01-28', '2004-11-04', '2007-11-27', '2007-08-20', '2007-08-20', '1993-12-23']
output_format: csv
location_label: station-selection
output_filename_head: station-10min


## Save/Load query to/from file

In [12]:
myQuery.saveQuery()

Query saved to "./STATION_10min_query_station-selection.txt"


In [13]:
print(query.loadQuery("./STATION_10min_query_station-selection.txt"))

stationQuery for download of STATION_10min. 
params: ['TB1', 'QFLAG', 'QFLAG']
dataset: DatasetType.STATION_10min
station_ids: ['11803', '11804', '14631', '17301', '17315', '17320', '14701', '14622', '14603', '14513', '14403']
station_names: ['INNSBRUCK-UNIV.', 'INNSBRUCK-FLUGPLATZ', 'UMHAUSEN', 'OBERGURGL', 'PITZTALER GLETSCHER', 'BRUNNENKOGEL', 'NEUSTIFT/MILDERS', 'ST.LEONHARD/PITZTAL', 'HAIMING', 'IMST', 'LANDECK']
station_starts: ['2009-07-14', '1992-11-24', '2003-06-01', '1999-01-04', '1993-12-09', '2002-01-28', '2004-11-04', '2007-11-27', '2007-08-20', '2007-08-20', '1993-12-23']
output_format: csv
location_label: station-selection
output_filename_head: station-10min


# Download

In [14]:
data_download.makeURL(myQuery, start, end)

['https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&parameters=QFLAG&start=2009-07-14&end=2022-05-25&station_ids=11803&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&parameters=QFLAG&start=1992-11-24&end=2022-05-25&station_ids=11804&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&parameters=QFLAG&start=2003-06-01&end=2022-05-25&station_ids=14631&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&parameters=QFLAG&start=1999-01-04&end=2022-05-25&station_ids=17301&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&parameters=QFLAG&start=1993-12-09&end=2022-05-25&station_ids=17315&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/stati

In [None]:
data_download.downloadData(
    myQuery, start, end, ODIR, overwrite=overwrite, verbose=verbose, parallel=True
)

Starting download of 11803_INNSBRUCK-UNIV._station-10min_TB1,QFLAG_20090714-20220525.csv
11803_INNSBRUCK-UNIV._station-10min_TB1,QFLAG_20090714-20220525.csv was downloaded.
Starting download of 11804_INNSBRUCK-FLUGPLATZ_station-10min_TB1,QFLAG_19921124-20220525.csv
HTTP Error 504: Gateway Time-out
Trying again...


Process ForkPoolWorker-1:
Process ForkPoolWorker-3:
Process ForkPoolWorker-7:
Process ForkPoolWorker-10:
Process ForkPoolWorker-9:
Process ForkPoolWorker-4:
Process ForkPoolWorker-6:
Process ForkPoolWorker-8:
Process ForkPoolWorker-12:
Process ForkPoolWorker-5:
Process ForkPoolWorker-11:


Failed to download 11804_INNSBRUCK-FLUGPLATZ_station-10min_TB1,QFLAG_19921124-20220525.csv
Try requesting less data, e.g. fewer parameters or smaller time periods.

Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()





  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
Process ForkPoolWorker-2:
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
Traceback (most recent call last):
  File "/home/skalevag/anaconda3/lib/python3.8/multiprocessing/proc