# Introduction

Example script for downloading SPARTACUS data.

**Website only allows for 6 months to 1 year's worth of data to be downloaded at a time, depending on the size of the lat-lon box.**

https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=DD&parameters=DDX&parameters=DDX_FLAG&parameters=DD_FLAG&parameters=FF&parameters=FFAM&parameters=FFAM_FLAG&parameters=FFX&parameters=FFX_FLAG&parameters=FF_FLAG&parameters=GSX&parameters=GSX_FLAG&parameters=HSR&parameters=HSR_FLAG&parameters=HSX&parameters=HSX_FLAG&parameters=P&parameters=P0&parameters=P0_FLAG&parameters=P_FLAG&parameters=QFLAG&parameters=RF&parameters=RF_FLAG&parameters=RR&parameters=RRM&parameters=RRM_FLAG&parameters=RR_FLAG&parameters=SH&parameters=SH_FLAG&parameters=SO&parameters=SO_FLAG&parameters=TB1&parameters=TB1_FLAG&parameters=TB2&parameters=TB2_FLAG&parameters=TB3&parameters=TB3_FLAG&parameters=TL&parameters=TLMAX&parameters=TLMAX_FLAG&parameters=TLMIN&parameters=TLMIN_FLAG&parameters=TL_FLAG&parameters=TP&parameters=TP_FLAG&parameters=TS&parameters=TSMAX&parameters=TSMAX_FLAG&parameters=TSMIN&parameters=TSMIN_FLAG&parameters=TS_FLAG&parameters=ZEITX&parameters=ZEITX_FLAG&start=1992-05-20T00%3A00%3A00&end=2022-05-21T23%3A59%3A59&station_ids=905&output_format=csv&filename=ZEHNMIN+Datensatz_19920520_20220521

**TODOs:**

- edit query class init?
- create some kind of overview dict or table for the parameters
- use station IDs (download station table and use for selection? or just tell user to look up herself)

# Setup

## Modules

In [1]:
import datetime as dt
from pathlib import Path

import ipywidgets as widgets
import numpy as np
import pandas as pd
from IPython.display import display

from ZAMGdatahub import data_download, query, utils

## Global variables

In [2]:
# set output directory
ODIR = "/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/"
ODIR = Path(ODIR)

if not ODIR.is_dir():
    ODIR.mkdir(parents=True)

# variable
params = [
    "RR",
    "TL",
    "TB1",
    "TB2",
    "TB3",
]  # precipitation, air temperature and ground temperature

params = ["TB1"]  # only soil temperature for testing

maxMonths = 12

overwrite = False
overwriteMerge = False
verbose = True

datetimeformat = "%Y-%m-%d %H:%M"
start = "1992-08-01 00:00"  # inclusive
end = dt.datetime.now().strftime(datetimeformat)

In [3]:
# dataset type
dataset = query.DatasetType.STATION_10min
print(dataset)

DatasetType.STATION_10min


In [4]:
# get the parameter heads
param_heads = [
    "DD",
    "FF",
    "GS",
    "HS",
    "P",
    "RF",
    "RR",
    "SH",
    "SO",
    "TB",
    "TL",
    "TP",
    "TS",
    "ZEIT",
]

param_heads_eng = [
    "wind direction",
    "wind speed",
    "global radiation",
    "diffuse radiation",
    "air pressure",
    "relative humidity",
    "precipitation",
    "snow depth",
    "sunshine duration",
    "soil temperature",
    "air temperature 2m",
    "dew point",
    "air temperature 5cm",
    "timestamp of maximum wind speed",
]
# append quality flag parameter which should always be included
param_heads.append("QFLAG")
param_heads_eng.append("quality flag")
var_convert = dict(zip(param_heads_eng, param_heads))
var_convert

{'wind direction': 'DD',
 'wind speed': 'FF',
 'global radiation': 'GS',
 'diffuse radiation': 'HS',
 'air pressure': 'P',
 'relative humidity': 'RF',
 'precipitation': 'RR',
 'snow depth': 'SH',
 'sunshine duration': 'SO',
 'soil temperature': 'TB',
 'air temperature 2m': 'TL',
 'dew point': 'TP',
 'air temperature 5cm': 'TS',
 'timestamp of maximum wind speed': 'ZEIT',
 'quality flag': 'QFLAG'}

In [5]:
stations_meta = pd.read_csv("ZEHNMIN Stations-Metadaten.csv")
parameter_meta = pd.read_csv("ZEHNMIN Parameter-Metadaten.csv", index_col=0)
parameter_meta.loc[params]

Unnamed: 0,Kurzbeschreibung,Beschreibung,Einheit
TB1,Erdbodentemperatur in 10cm Tiefe,"Erdbodentemperatur in 10cm Tiefe, Basiswert zu...",°C


In [6]:
def getParameterWithFlags(params, parameter_meta):
    params_extra = []
    for key in params:
        params_extra = params_extra + [
            par for par in parameter_meta.index if par[: len(key)] == key
        ]
    return params_extra


params_extra = getParameterWithFlags(params, parameter_meta)
parameter_meta.loc[params_extra]

Unnamed: 0,Kurzbeschreibung,Beschreibung,Einheit
TB1,Erdbodentemperatur in 10cm Tiefe,"Erdbodentemperatur in 10cm Tiefe, Basiswert zu...",°C
TB1_FLAG,Qualitätsflag der Erdbodentemperatur in 10 cm,Qualitätsflag für die Erdbodentemperatur in 10...,code


In [7]:
dropdown_variable = widgets.Dropdown(options=param_heads_eng)
variable = widgets.Output()


def dropdown_variable_eventhandler(change):
    variable.clear_output()
    sel = getParameterWithFlags([var_convert[change.new]], parameter_meta)
    display(parameter_meta.loc[sel])


dropdown_variable.observe(dropdown_variable_eventhandler, names="value")

display(dropdown_variable)

Dropdown(options=('wind direction', 'wind speed', 'global radiation', 'diffuse radiation', 'air pressure', 're…

In [8]:
dropdown_variable.value

'wind direction'

## Make query

In [9]:
gridbox_oetz = query.LatLonBox(
    label="oetztal", lat_min=46.6, lat_max=47.3, lon_min=10.5, lon_max=11.4
)
print(gridbox_oetz)
print()

# gridbox for all Tirol
gridbox_tirol = query.LatLonBox(
    label="tirol", lat_min=46.77, lat_max=47.71, lon_min=9.53, lon_max=13.51
)
print(gridbox_tirol)
print()

LOCATION: oetztal
COORDINATES: latitude: 46.6 ... 47.3 ; longitude 10.5 ... 11.4

LOCATION: tirol
COORDINATES: latitude: 46.77 ... 47.71 ; longitude 9.53 ... 13.51



In [10]:
# find stations within gridbox
def stationsInGridbox(stations_meta, gridbox: query.LatLonBox):
    # find stations in right longitude range
    lon_mask = np.logical_and(
        stations_meta["Länge [°E]"].values < gridbox.lon_max,
        stations_meta["Länge [°E]"].values > gridbox.lon_min,
    )
    # find stations in right latitude range
    lat_mask = np.logical_and(
        stations_meta["Breite [°N]"].values < gridbox.lat_max,
        stations_meta["Breite [°N]"].values > gridbox.lat_min,
    )
    # combine the masks
    mask = np.logical_and(lat_mask, lon_mask)
    return stations_meta[mask]

In [11]:
stations_oetz = stationsInGridbox(stations_meta, gridbox_oetz)
station_ids = stations_oetz.id.values
stations_oetz

Unnamed: 0,id,Synopstationsnummer,Stationsname,Länge [°E],Breite [°N],Höhe [m],Startdatum,Enddatum,Bundesland,Sonnenschein,Globalstrahlung
101,11803,11320,INNSBRUCK-UNIV.,11.384167,47.259998,578.0,2009-07-14,2100-12-31,TIR,ja,ja
102,11804,11120,INNSBRUCK-FLUGPLATZ,11.356667,47.259998,578.0,1992-11-24,2100-12-31,TIR,ja,ja
116,14631,11117,UMHAUSEN,10.928889,47.139168,1035.0,2003-06-01,2100-12-31,TIR,nein,ja
138,17301,11127,OBERGURGL,11.024445,46.866943,1941.0,1999-01-04,2100-12-31,TIR,ja,ja
139,17315,11316,PITZTALER GLETSCHER,10.879167,46.926945,2863.9,1993-12-09,2100-12-31,TIR,ja,ja
140,17320,11318,BRUNNENKOGEL,10.861667,46.912777,3437.0,2002-01-28,2100-12-31,TIR,nein,nein
231,14701,11324,NEUSTIFT/MILDERS,11.291945,47.102779,1007.0,2004-11-04,2100-12-31,TIR,nein,ja
232,14622,11317,ST.LEONHARD/PITZTAL,10.865556,47.027222,1454.4,2007-11-27,2100-12-31,TIR,nein,ja
233,14603,11309,HAIMING,10.889444,47.259724,659.0,2007-08-20,2100-12-31,TIR,ja,ja
234,14513,11115,IMST,10.742222,47.236946,773.0,2007-08-20,2100-12-31,TIR,ja,ja


In [12]:
myQuery = query.stationQuery(dataset, params, station_ids)

print(myQuery)

stationQuery for download of STATION_10min. 
params: ['TB1']
dataset: DatasetType.STATION_10min
station_ids: ['11803', '11804', '14631', '17301', '17315', '17320', '14701', '14622', '14603', '14513', '14403']
output_format: csv
location_label: station-selection
output_filename_head: station-10min


## Save/Load query to/from file

In [13]:
myQuery.saveQuery()

Query saved to "./STATION_10min_query_station-selection.txt"


In [14]:
print(query.loadQuery("./STATION_10min_query_station-selection.txt"))

stationQuery for download of STATION_10min. 
params: ['TB1']
dataset: DatasetType.STATION_10min
station_ids: ['11803', '11804', '14631', '17301', '17315', '17320', '14701', '14622', '14603', '14513', '14403']
output_format: csv
location_label: station-selection
output_filename_head: station-10min


# Download

In [15]:
data_download.makeURL(myQuery, start, end)

['https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&start=1992-08-01T00:00&end=2022-05-24T20:36&station_ids=11803&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&start=1992-08-01T00:00&end=2022-05-24T20:36&station_ids=11804&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&start=1992-08-01T00:00&end=2022-05-24T20:36&station_ids=14631&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&start=1992-08-01T00:00&end=2022-05-24T20:36&station_ids=17301&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10min?parameters=TB1&start=1992-08-01T00:00&end=2022-05-24T20:36&station_ids=17315&output_format=csv&filename=dummy',
 'https://dataset.api.hub.zamg.ac.at/v1/station/historical/klima-v1-10

In [16]:
data_download.downloadData(
    myQuery, start, end, ODIR, overwrite=overwrite, verbose=verbose
)

11803_station-10min_TB1_199208010000-202205242036.csv was downloaded.


KeyboardInterrupt: 