# Introduction

Example script for downloading 10 min station data.

# Setup

## Modules

In [1]:
import datetime as dt
from pathlib import Path

import ipywidgets as widgets
import numpy as np
import pandas as pd
from IPython.display import display

from ZAMGdatahub import data_download, metadata, query, utils

## Global variables

In [2]:
# set output directory
ODIR = "/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/"
ODIR = Path(ODIR)
# ODIR = "."
# ODIR = Path(ODIR).resolve()
# ODIR = ODIR.joinpath("test_downloads")

if not ODIR.is_dir():
    ODIR.mkdir(parents=True)

# variable
params = [
    "RR",
    "SH",
    "TL",
    "TS",
    "TB1",
    "TB2",
    "TB3",
    "GS",
    "HS",
]


var_convert = pd.read_csv("data/ZEHNMIN_convert.csv", index_col=0)["var_id"].to_dict()
parameter_meta = pd.read_csv(
    Path("data").joinpath("ZEHNMIN_Parameter-Metadaten.csv"), index_col=0
)
stations_meta = pd.read_csv(Path("data").joinpath("ZEHNMIN_Stations-Metadaten.csv"))

datetimeformat = "%Y-%m-%d %H:%M"
start = dt.datetime(1992, 8, 1).strftime(datetimeformat)  # inclusive
end = dt.datetime.now().strftime(datetimeformat)

overwrite = False
verbose = True
parallelProcess = True

### Test widget for selecting variables

In [3]:
dropdown_variable = widgets.Dropdown(options=list(var_convert.keys()))
variable = widgets.Output()


def dropdown_variable_eventhandler(change):
    variable.clear_output()
    sel = getParameterWithFlags([var_convert[change.new]], parameter_meta)
    display(parameter_meta.loc[sel])


dropdown_variable.observe(dropdown_variable_eventhandler, names="value")

display(dropdown_variable)

Dropdown(options=('wind direction', 'wind speed', 'global radiation', 'diffuse radiation', 'air pressure', 're…

In [4]:
dropdown_variable.value

'wind direction'

# Functions

In [5]:
def getParameterWithFlags(params, parameter_meta):
    params_extra = []
    for key in params:
        params_extra = params_extra + [
            par for par in parameter_meta.index if par[: len(key)] == key
        ]

    return params_extra


def stationsInGridbox(stations_meta, gridbox: query.LatLonBox):
    # find stations in right longitude range
    lon_mask = np.logical_and(
        stations_meta["Länge [°E]"].values < gridbox.lon_max,
        stations_meta["Länge [°E]"].values > gridbox.lon_min,
    )
    # find stations in right latitude range
    lat_mask = np.logical_and(
        stations_meta["Breite [°N]"].values < gridbox.lat_max,
        stations_meta["Breite [°N]"].values > gridbox.lat_min,
    )
    # combine the masks
    mask = np.logical_and(lat_mask, lon_mask)
    return stations_meta[mask]

# Make query

## Dataset

In [6]:
# dataset type
dataset = query.DatasetType.STATION_10min
print(dataset)

DatasetType.STATION_10min


## Select parameters

In [7]:
params_extra = getParameterWithFlags(params, parameter_meta)
parameter_meta.loc[params_extra]

Unnamed: 0,Kurzbeschreibung,Beschreibung,Einheit
RR,Niederschlag,"10 Minuten Summe des Niederschlags, Summe der ...",mm
RRM,Niederschlagsmelder,"10 Minuten Summe des Regenmelders, Summe der B...",min
RRM_FLAG,Qualitätsflag der 10 Minuten Summe des Regenme...,Qualitätsflag für die 10 Minuten Summe des Reg...,code
RR_FLAG,Qualitätsflag der 10 Minuten Summe des Nieders...,Qualitätsflag für die 10 Minuten Summe des Nie...,code
SH,Gesamtschneehöhe aus Schneepegelmessung,"Gesamtschneehöhe aus Schneepegelmessung, arith...",cm
SH_FLAG,Qualitätsflag der Gesamtschneehöhe,Qualitätsflag für die Gesamtschneehöhe - Quali...,code
TL,Lufttemperatur in 2m,"Lufttemperatur in 2m Höhe, Basiswert zur Minute10",°C
TLMAX,Lufttemperaturmaximum in 2m,"Maximum der Lufttemperatur in 2m Höhe, Maximum...",°C
TLMAX_FLAG,Qualitätsflag des Lufttemperaturmaximums in 2m,Qualitätsflag für das Maximum der Lufttemperat...,code
TLMIN,Lufttemperaturminimum in 2m,"Minimum der Lufttemperatur in 2m Höhe, Minimum...",°C


## Selecting stations

In [8]:
gridbox_oetz = query.LatLonBox(
    label="oetztal", lat_min=46.6, lat_max=47.3, lon_min=10.5, lon_max=11.4
)
print(gridbox_oetz)
print()

# gridbox for all Tirol
gridbox_tirol = query.LatLonBox(
    label="tirol", lat_min=46.77, lat_max=47.71, lon_min=9.53, lon_max=13.51
)
print(gridbox_tirol)
print()

LOCATION: oetztal
COORDINATES: latitude: 46.6 ... 47.3 ; longitude 10.5 ... 11.4

LOCATION: tirol
COORDINATES: latitude: 46.77 ... 47.71 ; longitude 9.53 ... 13.51



In [9]:
# find stations within gridbox
stations_oetz = stationsInGridbox(stations_meta, gridbox_oetz)
station_ids = stations_oetz.id.values
station_names = stations_oetz.Stationsname.to_list()
station_starts = stations_oetz.Startdatum.to_list()
stations_oetz

Unnamed: 0,id,Synopstationsnummer,Stationsname,Länge [°E],Breite [°N],Höhe [m],Startdatum,Enddatum,Bundesland,Sonnenschein,Globalstrahlung
101,11803,11320,INNSBRUCK-UNIV.,11.384167,47.259998,578.0,2009-07-14,2100-12-31,TIR,ja,ja
102,11804,11120,INNSBRUCK-FLUGPLATZ,11.356667,47.259998,578.0,1992-11-24,2100-12-31,TIR,ja,ja
116,14631,11117,UMHAUSEN,10.928889,47.139168,1035.0,2003-06-01,2100-12-31,TIR,nein,ja
138,17301,11127,OBERGURGL,11.024445,46.866943,1941.0,1999-01-04,2100-12-31,TIR,ja,ja
139,17315,11316,PITZTALER GLETSCHER,10.879167,46.926945,2863.9,1993-12-09,2100-12-31,TIR,ja,ja
140,17320,11318,BRUNNENKOGEL,10.861667,46.912777,3437.0,2002-01-28,2100-12-31,TIR,nein,nein
231,14701,11324,NEUSTIFT/MILDERS,11.291945,47.102779,1007.0,2004-11-04,2100-12-31,TIR,nein,ja
232,14622,11317,ST.LEONHARD/PITZTAL,10.865556,47.027222,1454.4,2007-11-27,2100-12-31,TIR,nein,ja
233,14603,11309,HAIMING,10.889444,47.259724,659.0,2007-08-20,2100-12-31,TIR,ja,ja
234,14513,11115,IMST,10.742222,47.236946,773.0,2007-08-20,2100-12-31,TIR,ja,ja


## Make the query

In [10]:
myQuery = query.StationQuery(
    dataset, params_extra, station_ids, station_names, station_starts
)

print(myQuery)

stationQuery for download of STATION_10min. 
params: ['RR', 'RRM', 'RRM_FLAG', 'RR_FLAG', 'SH', 'SH_FLAG', 'TL', 'TLMAX', 'TLMAX_FLAG', 'TLMIN', 'TLMIN_FLAG', 'TL_FLAG', 'TS', 'TSMAX', 'TSMAX_FLAG', 'TSMIN', 'TSMIN_FLAG', 'TS_FLAG', 'TB1', 'TB1_FLAG', 'TB2', 'TB2_FLAG', 'TB3', 'TB3_FLAG', 'GSX', 'GSX_FLAG', 'HSR', 'HSR_FLAG', 'HSX', 'HSX_FLAG', 'QFLAG']
dataset: DatasetType.STATION_10min
station_ids: ['11803', '11804', '14631', '17301', '17315', '17320', '14701', '14622', '14603', '14513', '14403']
station_names: ['INNSBRUCK-UNIV.', 'INNSBRUCK-FLUGPLATZ', 'UMHAUSEN', 'OBERGURGL', 'PITZTALER GLETSCHER', 'BRUNNENKOGEL', 'NEUSTIFT/MILDERS', 'ST.LEONHARD/PITZTAL', 'HAIMING', 'IMST', 'LANDECK']
station_longnames: ['11803_INNSBRUCK-UNIV.', '11804_INNSBRUCK-FLUGPLATZ', '14631_UMHAUSEN', '17301_OBERGURGL', '17315_PITZTALER-GLETSCHER', '17320_BRUNNENKOGEL', '14701_NEUSTIFT-MILDERS', '14622_ST.LEONHARD-PITZTAL', '14603_HAIMING', '14513_IMST', '14403_LANDECK']
station_starts: ['2009-07-14', '1992

## Save/Load query to/from file

In [11]:
myQuery.saveQuery()

Query saved to "./STATION_10min_query_station-selection.txt"


In [12]:
importedQuery = query.loadQuery("./STATION_10min_query_station-selection.txt")
print(importedQuery)

stationQuery for download of STATION_10min. 
params: ['RR', 'RRM', 'RRM_FLAG', 'RR_FLAG', 'SH', 'SH_FLAG', 'TL', 'TLMAX', 'TLMAX_FLAG', 'TLMIN', 'TLMIN_FLAG', 'TL_FLAG', 'TS', 'TSMAX', 'TSMAX_FLAG', 'TSMIN', 'TSMIN_FLAG', 'TS_FLAG', 'TB1', 'TB1_FLAG', 'TB2', 'TB2_FLAG', 'TB3', 'TB3_FLAG', 'GSX', 'GSX_FLAG', 'HSR', 'HSR_FLAG', 'HSX', 'HSX_FLAG', 'QFLAG', 'QFLAG']
dataset: DatasetType.STATION_10min
station_ids: ['11803', '11804', '14631', '17301', '17315', '17320', '14701', '14622', '14603', '14513', '14403']
station_names: ['INNSBRUCK-UNIV.', 'INNSBRUCK-FLUGPLATZ', 'UMHAUSEN', 'OBERGURGL', 'PITZTALER GLETSCHER', 'BRUNNENKOGEL', 'NEUSTIFT/MILDERS', 'ST.LEONHARD/PITZTAL', 'HAIMING', 'IMST', 'LANDECK']
station_longnames: ['11803_INNSBRUCK-UNIV.', '11804_INNSBRUCK-FLUGPLATZ', '14631_UMHAUSEN', '17301_OBERGURGL', '17315_PITZTALER-GLETSCHER', '17320_BRUNNENKOGEL', '14701_NEUSTIFT-MILDERS', '14622_ST.LEONHARD-PITZTAL', '14603_HAIMING', '14513_IMST', '14403_LANDECK']
station_starts: ['2009-07-1

# Download

In [13]:
# save the metadata on selected parameters
parameter_meta.loc[params_extra].to_csv(
    ODIR.joinpath("ZAMG_station-10min_parameter_metadata.csv")
)
# save the station selection
stations_oetz.to_csv(ODIR.joinpath("ZAMG_station-10min_stations_metadata.csv"))

In [14]:
if not len(utils.makeStationFilenames(start, end, myQuery)) == len(
    data_download.makeURL(myQuery, start, end)
):
    raise Exception(
        "Something went wrong when splitting station data requests into slices. No garantee that filenames and file contents match!"
    )

In [None]:
data_download.downloadData(
    myQuery,
    start,
    end,
    ODIR,
    overwrite=overwrite,
    verbose=verbose,
    parallelProcess=parallelProcess,
)

Parallelising with 5 cores.
11803_INNSBRUCK-UNIV._station-10min_2009.csv11803_INNSBRUCK-UNIV._station-10min_2021.csv14631_UMHAUSEN_station-10min_2006.csv11804_INNSBRUCK-FLUGPLATZ_station-10min_2002.csv11804_INNSBRUCK-FLUGPLATZ_station-10min_2014.csv     has already been downloaded:has already been downloaded:has already been downloaded:has already been downloaded:has already been downloaded:     /home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/11804_INNSBRUCK-FLUGPLATZ/11804_INNSBRUCK-FLUGPLATZ_station-10min_2002.csv/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/14631_UMHAUSEN/14631_UMHAUSEN_station-10min_2006.csv/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/11803_INNSBRUCK-UNIV./11803_INNSBRUCK-UNIV._station-10min_2009.csv/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/11803_INNSBRUCK-UNIV./11803_INNSBRUCK-UNIV._station-10min_2021.csv/home/skalevag/Documents/NRC_P8