This notebook should be run with a kernel where the climate data store api python package is installed. 

You need to start the jupyter session on a development node and type a conda environment with cdsapi on the launch page.

Create your own conda environment with this or use /work/hpc/users/kerrie/UN_FAO/cdsapi

In [1]:
import cdsapi
import numpy as np
import dask

In [2]:
# directory to save downloads in
data_dir='/work/hpc/users/kerrie/UN_FAO/data/agERA5/'

In [3]:
# generate all the api call info for all the data we want

# list of string months
months=[str(s).zfill(2) for s in np.arange(12)+1]

# list of lists of string days for each month
mlen=[31,28,31,30,31,30,31,31,30,31,30,31]
days=[]
for im in range(len(months)):
    days.append([str(s).zfill(2) for s in np.arange(mlen[im])+1])

year='1980'

# other cds api things
base_dict={'year':year,'format':'tgz'}
cds_variables=['2m_temperature','precipitation_flux','vapour_pressure','10m_wind_speed','solar_radiation_flux','2m_temperature']
cds_statistics=['night_time_minimum','','24_hour_mean','24_hour_mean','','day_time_maximum']
outvars=['tmin','prcp','vapr','wdsp','srad','tmax']
                

In [4]:
# this is the api call
# put in a delayed function so we can download in parallel

@dask.delayed
def get_1mo_daily(month,vardict,data_dir,varout):
    c = cdsapi.Client()
    c.retrieve(
    'sis-agrometeorological-indicators',
    vardict,
    data_dir+varout+'_AgERA5_daily_1980-'+month+'.tar.gz')

In [5]:
tasklist=[]
for iv,(var,stat) in enumerate(zip(cds_variables,cds_statistics)):
    for im,month in enumerate(months):
        # build a dictionary for the api call for each month of each variable
        vardict=base_dict
        vardict['variable']=var
        if stat: vardict['statistic']=stat
        vardict['month']=month
        vardict['day']=days[im]

        # call the delayed download
        tasklist.append(get_1mo_daily(month,vardict,data_dir,outvars[iv]))


In [6]:
# we're requesting 6 variables, with separate call for each month, should have 6*12=72 tasks
print(len(tasklist))

# this is what a task looks like (a dask delayed object)
tasklist[0]

72


Delayed('get_1mo_daily-b0118332-3c66-4a86-9ce7-001a3722107f')

In [11]:
# let er rip
dask.compute(*tasklist)

2023-05-09 15:21:33,081 INFO Welcome to the CDS
2023-05-09 15:21:33,081 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/sis-agrometeorological-indicators
2023-05-09 15:21:33,248 INFO Request is queued
2023-05-09 15:21:33,538 INFO Welcome to the CDS
2023-05-09 15:21:33,538 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/sis-agrometeorological-indicators
2023-05-09 15:21:33,570 INFO Welcome to the CDS
2023-05-09 15:21:33,571 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/sis-agrometeorological-indicators
2023-05-09 15:21:33,578 INFO Welcome to the CDS
2023-05-09 15:21:33,579 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/sis-agrometeorological-indicators
2023-05-09 15:21:33,580 INFO Welcome to the CDS
2023-05-09 15:21:33,581 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/sis-agrometeorological-indicators
2023-05-09 15:21:33,586 INFO Welcome to the CDS


(None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None)