# Introduction

Create a script for downloading INCA data.

**Website only allows for 2-3 months worth of data to be downloaded at a time, depending on the size of the lat-lon box.**

TODO:
* merge netcdf files by year
* create option for downloading timeseries from lat lon location

In [1]:
import requests
import urllib
from pathlib import Path
import datetime
from dateutil.relativedelta import relativedelta
from cdo import *
cdo = Cdo()

In [2]:
ODIR = "/home/skalevag/Documents/NRC P8 water energy and sediment/data/air_temp/grids/INCA"
#ODIR = "/home/skalevag/Documents/NRC P8 water energy and sediment/data/precip/grids/INCA"
#ODIR = "/home/skalevag/Documents/NRC P8 water energy and sediment/data/radiation/grids/INCA"
ODIR = Path(ODIR)

if not ODIR.is_dir():
    ODIR.mkdir(parents=True)

In [3]:
datetimeformat = "%Y-%m-%d %H:%M"

# variable
params = "T2M" # air temperature 2m above ground
#params = "RR" # 1-hour precipitation sum
#params = "GL" # global radiation 

# gridbox for Ötztal area
gridboxlabel = "oetztal"
lat_min = 46.6
lat_max = 47.3
lon_min = 10.5
lon_max = 11.4

In [4]:
query = {"params":params,
         "gridboxlabel":gridboxlabel,
         "lat_min":lat_min,
         "lat_max":lat_max,
         "lon_min":lon_min,
         "lon_max":lon_max,
         "output_format":"netcdf",
         "file_extention":"nc",
         "output_filename_head":"incal-hourly"}

Example of timeseries query:
https://forms.hub.zamg.ac.at/v1/timeseries/d512d5b5-4e9f-4954-98b9-806acbf754f6/historical?anonymous=true&parameters=GL,P0,RH2M,RR,T2M,TD2M,UU,VV&start=2011-03-17%2000:00&end=2021-12-01%2009:00&lon=12.9&lat=50&output_format=csv

In [5]:
def makeFilename(start,end,**query):
    """
    Make INCA filename from a ZAMG data hub query.
    """
    output_filename_head = query.get('output_filename_head',"incal-hourly")
    params = query.get("params","data")
    gridboxlabel = query.get("gridboxlabel","latlonbox")
    file_extention = query.get("file_extention","nc")
    
    # compact the datetime notation
    s = start.replace("-","").replace(" ","").replace(":","")
    e = end.replace("-","").replace(" ","").replace(":","")
    timeslice = f"{s}-{e}"
    # make filename
    filename = "_".join([output_filename_head,params,gridboxlabel,timeslice])+f".{file_extention}"
    return filename

def makeURL(start,end,**query):
    """
    Makes a URL string for requesting INCA_L dataset from ZAMG data hub (https://data.hub.zamg.ac.at).
    
    Default parameters requests 2-meter air temperature in a lat-lon box of the Ötztal Alps.
    
    Parameters
    ----------
    start : str
    end : str
    params : str
        default: "T2M"
    lat_min : str ; float
        default: 46.6
    lat_max : str ; float
        default: 47.3
    lon_min : str ; float
        default: 10.5
    lon_max : str ; float
        default: 11.4
    output_format : str
        default: "netcdf"
    
    Returns
    -------
    url : str
    """
    # unpack
    lat_min = query.get("lat_min",46.6)
    lat_max = query.get("lat_max",47.3)
    lon_min = query.get("lon_min",10.5)
    lon_max = query.get("lon_max",11.4)
    params = query.get("params","TD2M")
    output_format = query.get('output_format',"netcdf")
    
    # make start- and endtime strings
    sd = start.replace(" ","%20")
    ed = end.replace(" ","%20")

    # make gridbox string
    bbox = f"{lat_min},{lon_min},{lat_max},{lon_max}"

    url = "https://forms.hub.zamg.ac.at/v1/grid/d512d5b5-4e9f-4954-98b9-806acbf754f6/" + \
            f"historical?anonymous=true&parameters={params}&start={sd}&end={ed}&bbox={bbox}&output_format={output_format}"

    return url

def makeTimeSlices(year,firstMonth = 1,lastMonth=12,maxMonths=2,datetimeformat = "%Y-%m-%d %H:%M"):
    """
    Makes a list of time slices (start and end times) for a specified year.
    
    Parameters
    ----------
    year : int
    firstMonth : int
        default: 1
    lastMonth : int
        default: 12
    maxMonth : int
        default: 2
    datetimeformat : str
        default: '%Y-%m-%d %H:%M'
    """
    # set the last day of the year
    lastDOY = datetime.datetime(year+1,1,1,0,0)
    # set start month
    month = firstMonth
    
    # make list of time slices
    slices = []
    while month<lastMonth+1:
        dtStart = datetime.datetime(year,month,1,0,0)
        dtEnd = dtStart + relativedelta(months=+maxMonths)
        # check that the end datetime does not exceed the last day of the year
        if dtEnd >= lastDOY:
            dtEnd = lastDOY
        
        # convert to string
        start = dtStart.strftime(datetimeformat)
        end = dtEnd.strftime(datetimeformat)

        slices.append((start,end))

        month = month + maxMonths
    
    return slices

def downloadData(start,end,ODIR,overwrite=False,**query):
    """
    Requests and downloads data from ZAMG data hub, and saves the file in a specifed directory.
    """
    # unpack
    lat_min = query.get("lat_min",46.6)
    lat_max = query.get("lat_max",47.3)
    lon_min = query.get("lon_min",10.5)
    lon_max = query.get("lon_max",11.4)
    
    # make filename
    filename = makeFilename(start,end,**query)
    outfile = ODIR.joinpath(filename)
    
    # check whether file already exists
    if overwrite or not outfile.is_file():
        url = makeURL(start,end,**query)
        r = requests.get(url)
        if str(r) == "<Response [400]>":
            raise requests.HTTPError(f"{r}: The data slice that you requested is too big!")
        ff,html = urllib.request.urlretrieve(url, outfile)
        print(filename, "was downloaded.")
    else:
        print(filename, "has already been downloaded:",outfile)
        
    return outfile

In [6]:
# get all in a year
for year in range(2011,2022):

    slices = makeTimeSlices(year)

    for start,end in slices:
        downloadData(start,end,ODIR,**query)

incal-hourly_T2M_oetztal_201101010000-201103010000.nc has already been downloaded: /home/skalevag/Documents/NRC P8 water energy and sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201101010000-201103010000.nc
incal-hourly_T2M_oetztal_201103010000-201105010000.nc has already been downloaded: /home/skalevag/Documents/NRC P8 water energy and sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201103010000-201105010000.nc
incal-hourly_T2M_oetztal_201105010000-201107010000.nc has already been downloaded: /home/skalevag/Documents/NRC P8 water energy and sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201105010000-201107010000.nc
incal-hourly_T2M_oetztal_201107010000-201109010000.nc has already been downloaded: /home/skalevag/Documents/NRC P8 water energy and sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201107010000-201109010000.nc
incal-hourly_T2M_oetztal_201109010000-201111010000.nc has already been downloaded: /home/skalevag/Documents/NRC P8 water

In [32]:
#TODO: merge all files from same year
# use cdo?
search = "_2019"

infiles = list(ODIR.glob(f"*{search}*.nc"))
outfile = infiles[0].name.split(search)[0] + search + "." + query["file_extention"]
outfile = ODIR.joinpath(outfile)

infiles = [str(file).replace(" ","\ ") for file in infiles]
outfile = str(outfile).replace(" ","\ ")
cdo.mergetime(input=infiles,output=outfile)

Error in calling operator mergetime with:
>>> cdo -O -mergetime /home/skalevag/Documents/NRC\ P8\ water\ energy\ and\ sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201903010000-201905010000.nc /home/skalevag/Documents/NRC\ P8\ water\ energy\ and\ sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201905010000-201907010000.nc /home/skalevag/Documents/NRC\ P8\ water\ energy\ and\ sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201907010000-201909010000.nc /home/skalevag/Documents/NRC\ P8\ water\ energy\ and\ sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201901010000-201903010000.nc /home/skalevag/Documents/NRC\ P8\ water\ energy\ and\ sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201909010000-201911010000.nc /home/skalevag/Documents/NRC\ P8\ water\ energy\ and\ sediment/data/air_temp/grids/INCA/incal-hourly_T2M_oetztal_201911010000-202001010000.nc /home/skalevag/Documents/NRC\ P8\ water\ energy\ and\ sediment/data/air_temp/grids/I

CDOException: (returncode:1) 
cdo (Abort): missing output file
