# Introduction

Example script for downloading hourly station data.

# Setup

## Modules

In [1]:
import datetime as dt
from pathlib import Path

import ipywidgets as widgets
import numpy as np
import pandas as pd
from IPython.display import display

from ZAMGdatahub import data_download, metadata, query, utils

## Global variables

In [2]:
# set output directory
ODIR = "/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/"
ODIR = Path(ODIR)
# ODIR = "."
# ODIR = Path(ODIR).resolve()
# ODIR = ODIR.joinpath("test_downloads")

if not ODIR.is_dir():
    ODIR.mkdir(parents=True)

# variable
params = ["RSX", "GSW", "RSX", "SCH", "TT"]


# var_convert = pd.read_csv("data/ST", index_col=0)["var_id"].to_dict()
parameter_meta = pd.read_csv(
    Path("data").joinpath("STD_Parameter-Metadaten.csv"), index_col=0
)
stations_meta = pd.read_csv(Path("data").joinpath("STD_Stations-Metadaten.csv"))

datetimeformat = "%Y-%m-%d %H:%M"
start = dt.datetime(1880, 4, 1).strftime(datetimeformat)
end = dt.datetime.now().strftime(datetimeformat)

overwrite = False
verbose = True
parallelProcess = True

In [3]:
param_ids = [i for i in parameter_meta.index if len(i) < 4]
parameter_meta.loc[param_ids]

Unnamed: 0,Kurzbeschreibung,Beschreibung,Einheit
D2X,Windrichtung in Sektoren,Windrichtung in Sektoren: 32-teilig,
D6X,Windrichtung,Windrichtung in ° - vektorielles Mittel aus 10...,°
FFX,Relative Feuchte,Relative Luftfeuchte - letzter 10 Minutenwert ...,%
GSR,Globalstrahlung ungeeicht,Globalstrahlung ungeeicht,mV
GSW,Globalstrahlung,Globalstrahlung - Mittelwert aus 10 Minuten-We...,W/m²
GSX,Globalstrahlung,Globalstrahlung - Mittelwert aus 10 Minuten-We...,J/cm²
HSR,Diffusstrahlung ungeeicht,Diffusstrahlung ungeeicht - Mittelwert aus 10 ...,mV
HSX,Diffusstrahlung,Diffusstrahlung - Mittelwert aus 10 Minuten-We...,J/cm² pro Stunde
LT2,Lufttemperatur in 5 cm,Lufttemperatur in 5 cm über dem Boden - minima...,°C
PPX,Luftdruck Stationsniveau,Luftdruck Stationsniveau - letzter 10 Minutenw...,mbar


# Functions

In [4]:
def getParameterWithFlags(params, parameter_meta):
    params_extra = []
    for key in params:
        params_extra = params_extra + [
            par for par in parameter_meta.index if par[: len(key)] == key
        ]

    return params_extra


def stationsInGridbox(stations_meta, gridbox: query.LatLonBox):
    # find stations in right longitude range
    lon_mask = np.logical_and(
        stations_meta["Länge [°E]"].values < gridbox.lon_max,
        stations_meta["Länge [°E]"].values > gridbox.lon_min,
    )
    # find stations in right latitude range
    lat_mask = np.logical_and(
        stations_meta["Breite [°N]"].values < gridbox.lat_max,
        stations_meta["Breite [°N]"].values > gridbox.lat_min,
    )
    # combine the masks
    mask = np.logical_and(lat_mask, lon_mask)
    return stations_meta[mask]

# Make query

## Dataset

In [5]:
# dataset type
dataset = query.DatasetType.STATION_1h
print(dataset)

DatasetType.STATION_1h


## Select parameters

In [6]:
params_extra = getParameterWithFlags(params, parameter_meta)
parameter_meta.loc[params_extra]

Unnamed: 0,Kurzbeschreibung,Beschreibung,Einheit
RSX,Niederschlag,Niederschlag - 60 Minuten-Summe ((hh-1):00:01 ...,mm
RSX_qflag,Qualitätsqflag für RSX,Qualitätsqflag für RSX - Qualitätsflag={0-2: u...,
RSX_typ,Qualitätstyp für RSX,Qualitätstyp für RSX - Qualitätsflag={0-2: nic...,
GSW,Globalstrahlung,Globalstrahlung - Mittelwert aus 10 Minuten-We...,W/m²
GSW_qflag,Qualitätsqflag für GSW,Qualitätsqflag für GSW - Qualitätsflag={0-2: u...,
GSW_typ,Qualitätstyp für GSW,Qualitätstyp für GSW - Qualitätsflag={0-2: nic...,
RSX,Niederschlag,Niederschlag - 60 Minuten-Summe ((hh-1):00:01 ...,mm
RSX_qflag,Qualitätsqflag für RSX,Qualitätsqflag für RSX - Qualitätsflag={0-2: u...,
RSX_typ,Qualitätstyp für RSX,Qualitätstyp für RSX - Qualitätsflag={0-2: nic...,
SCH,Schneehöhe,"Schneehöhe - aus Schneepegel, letzter 10 Minu...",cm


## Selecting stations

In [7]:
gridbox_oetz = query.LatLonBox(
    label="oetztal", lat_min=46.6, lat_max=47.3, lon_min=10.5, lon_max=11.4
)
print(gridbox_oetz)
print()

# gridbox for all Tirol
gridbox_tirol = query.LatLonBox(
    label="tirol", lat_min=46.77, lat_max=47.71, lon_min=9.53, lon_max=13.51
)
print(gridbox_tirol)
print()

LOCATION: oetztal
COORDINATES: latitude: 46.6 ... 47.3 ; longitude 10.5 ... 11.4

LOCATION: tirol
COORDINATES: latitude: 46.77 ... 47.71 ; longitude 9.53 ... 13.51



In [8]:
# find stations within gridbox
stations_oetz = stationsInGridbox(stations_meta, gridbox_oetz)
station_ids = stations_oetz.id.values
station_names = stations_oetz.Stationsname.to_list()
station_starts = stations_oetz.Startdatum.to_list()
stations_oetz

Unnamed: 0,id,Synopstationsnummer,Stationsname,Länge [°E],Breite [°N],Höhe [m],Startdatum,Enddatum,Bundesland,Sonnenschein,Globalstrahlung
190,11800,11120.0,INNSBRUCK-FLUGPLATZ,11.355278,47.258888,579.0,1971-01-01,1989-12-31,TIR,nein,ja
191,11801,,INNSBRUCK-UNIV.,11.385,47.260555,577.0,1921-12-01,2008-04-30,TIR,ja,ja
192,11803,11320.0,INNSBRUCK-UNIV.,11.384167,47.259998,578.0,1986-05-01,2100-12-31,TIR,ja,ja
193,11804,11120.0,INNSBRUCK-FLUGPLATZ,11.356667,47.259998,578.0,1992-07-01,2100-12-31,TIR,ja,ja
218,14400,11112.0,LANDECK,10.566667,47.133335,818.0,1978-01-01,1981-12-31,TIR,nein,nein
219,14500,,HOCHSERFAUS,10.6,47.033333,1815.0,1961-01-01,1976-12-31,TIR,ja,ja
220,14525,,FAGGEN,10.683333,47.083332,900.0,1961-01-01,1969-03-31,TIR,ja,nein
221,14631,11117.0,UMHAUSEN,10.928889,47.139168,1035.0,2003-07-01,2100-12-31,TIR,nein,ja
264,17300,11127.0,OBERGURGL,11.027223,46.8675,1938.0,1953-05-01,1998-12-31,TIR,ja,nein
265,17301,11127.0,OBERGURGL,11.024445,46.866943,1941.0,1999-01-01,2100-12-31,TIR,ja,ja


## Make the query

In [9]:
myQuery = query.StationQuery(
    dataset, params_extra, station_ids, station_names, station_starts
)

print(myQuery)

stationQuery for download of STATION_1h. 
params: ['RSX', 'RSX_qflag', 'RSX_typ', 'GSW', 'GSW_qflag', 'GSW_typ', 'RSX', 'RSX_qflag', 'RSX_typ', 'SCH', 'SCH_qflag', 'SCH_typ', 'TT0', 'TT0_qflag', 'TT0_typ', 'TT2', 'TT2_qflag', 'TT2_typ', 'TT3', 'TT3_qflag', 'TT3_typ', 'TT4', 'TT4_qflag', 'TT4_typ', 'TT5', 'TT5_qflag', 'TT5_typ', 'TTX', 'TTX_qflag', 'TTX_typ']
dataset: DatasetType.STATION_1h
station_ids: ['11800', '11801', '11803', '11804', '14400', '14500', '14525', '14631', '17300', '17301', '17305', '17315', '17320', '14701', '14630', '14622', '14620', '14603', '14520', '14513', '14510', '14403', '14401']
station_names: ['INNSBRUCK-FLUGPLATZ', 'INNSBRUCK-UNIV.', 'INNSBRUCK-UNIV.', 'INNSBRUCK-FLUGPLATZ', 'LANDECK', 'HOCHSERFAUS', 'FAGGEN', 'UMHAUSEN', 'OBERGURGL', 'OBERGURGL', 'VENT', 'PITZTALER GLETSCHER', 'BRUNNENKOGEL', 'NEUSTIFT/MILDERS', 'UMHAUSEN', 'ST.LEONHARD/PITZTAL', 'PIOESMES', 'HAIMING', 'PRUTZ', 'IMST', 'IMST', 'LANDECK', 'LANDECK']
station_longnames: ['11800_INNSBRUCK-FLU

## Save/Load query to/from file

In [10]:
myQuery.saveQuery()

Query saved to "./STATION_1h_query_station-selection.txt"


In [11]:
importedQuery = query.loadQuery("./STATION_1h_query_station-selection.txt")
print(importedQuery)

stationQuery for download of STATION_1h. 
params: ['RSX', 'RSX_qflag', 'RSX_typ', 'GSW', 'GSW_qflag', 'GSW_typ', 'RSX', 'RSX_qflag', 'RSX_typ', 'SCH', 'SCH_qflag', 'SCH_typ', 'TT0', 'TT0_qflag', 'TT0_typ', 'TT2', 'TT2_qflag', 'TT2_typ', 'TT3', 'TT3_qflag', 'TT3_typ', 'TT4', 'TT4_qflag', 'TT4_typ', 'TT5', 'TT5_qflag', 'TT5_typ', 'TTX', 'TTX_qflag', 'TTX_typ']
dataset: DatasetType.STATION_1h
station_ids: ['11800', '11801', '11803', '11804', '14400', '14500', '14525', '14631', '17300', '17301', '17305', '17315', '17320', '14701', '14630', '14622', '14620', '14603', '14520', '14513', '14510', '14403', '14401']
station_names: ['INNSBRUCK-FLUGPLATZ', 'INNSBRUCK-UNIV.', 'INNSBRUCK-UNIV.', 'INNSBRUCK-FLUGPLATZ', 'LANDECK', 'HOCHSERFAUS', 'FAGGEN', 'UMHAUSEN', 'OBERGURGL', 'OBERGURGL', 'VENT', 'PITZTALER GLETSCHER', 'BRUNNENKOGEL', 'NEUSTIFT/MILDERS', 'UMHAUSEN', 'ST.LEONHARD/PITZTAL', 'PIOESMES', 'HAIMING', 'PRUTZ', 'IMST', 'IMST', 'LANDECK', 'LANDECK']
station_longnames: ['11800_INNSBRUCK-FLU

# Download

In [12]:
# save the metadata on selected parameters
parameter_meta.loc[params_extra].to_csv(
    ODIR.joinpath("ZAMG_station-hourly_parameter_metadata.csv")
)
# save the station selection
stations_oetz.to_csv(ODIR.joinpath("ZAMG_station-hourly_stations_metadata.csv"))

In [13]:
if not len(utils.makeStationFilenames(start, end, myQuery)) == len(
    data_download.makeURL(myQuery, start, end)
):
    raise Exception(
        "Something went wrong when splitting station data requests into slices. No garantee that filenames and file contents match!"
    )

In [None]:
data_download.downloadData(
    myQuery,
    start,
    end,
    ODIR,
    overwrite=overwrite,
    verbose=verbose,
    parallelProcess=parallelProcess,
)

Parallelising with 5 cores.
11800_INNSBRUCK-FLUGPLATZ_station-hourly_2020.csv11801_INNSBRUCK-UNIV._station-hourly_1967.csv11800_INNSBRUCK-FLUGPLATZ_station-hourly_1971.csv11804_INNSBRUCK-FLUGPLATZ_station-hourly_1997.csv11801_INNSBRUCK-UNIV._station-hourly_2016.csv     has already been downloaded:has already been downloaded:has already been downloaded:has already been downloaded:has already been downloaded:     /home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/11800_INNSBRUCK-FLUGPLATZ/11800_INNSBRUCK-FLUGPLATZ_station-hourly_1971.csv/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/11800_INNSBRUCK-FLUGPLATZ/11800_INNSBRUCK-FLUGPLATZ_station-hourly_2020.csv/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/11801_INNSBRUCK-UNIV./11801_INNSBRUCK-UNIV._station-hourly_1967.csv/home/skalevag/Documents/NRC_P8_water_energy_and_sediment/data/meteo_stations/11804_INNSBRUCK-FLUGPLATZ/11804_INNSBRUCK-FLUGPLATZ_stat