### For all missing ERA5 hourly files, send download requests to https://cds.climate.copernicus.eu/

In [None]:
proj_dir="/path/to/main_project_folder/" # edit this line

import cdsapi
import os
import itertools
import json
import multiprocessing as mp
import sys
sys.path.append(proj_dir)
from project_utils import parameters as param
from project_utils import load_region
from project_utils import prepare_inputs
import importlib
importlib.reload(param)
importlib.reload(load_region)
importlib.reload(prepare_inputs)

In [None]:
region_str_tpl = ('spain', 'balkans', 'france', 'germany', 'russia', 
                      'midwest', 'south_plains', 'southeast', 
                      'north_china', 'south_china',
                      'central_argentina', 'south_argentina',
                      'southwest_africa', 'southeast_africa',
                      'west_australia', 'east_australia')

variables = ["volumetric_soil_water_layer_1", "2m_temperature", "geopotential"]
var_abbrev = ["swvl1", "t2m", "z"]

######### Define Dataset and Product Type ##########
global era5_land_dataset
era5_land_dataset = "reanalysis-era5-land"

global era5_z_dataset
era5_z_dataset = "reanalysis-era5-pressure-levels"
global era5_z_product_type
era5_z_product_type = "reanalysis"
global era5_z_pressure_level
era5_z_pressure_level = '500'

print(region_str_tpl)

In [None]:
###### Define Main Download Function to be iterated ######
def era5_land_download(
    dataset,
    variables,
    years,
    months,
    days,
    times = '00:00',
    format = "netcdf",
    latmin = 0,
    latmax = 180,
    lonmin = -180,
    lonmax = 180,
    outname = 'download.nc'
):

    c = cdsapi.Client(wait_until_complete=False, delete=False)
    r = c.retrieve(
                    dataset,
                    {
                       # "product_type": product_type,
                        "variable": variables,
                        "year": years,
                        "month": months,
                        "day": days,
                        "time": [
                            "00:00", "01:00", "02:00",
                            "03:00", "04:00", "05:00",
                            "06:00", "07:00", "08:00",
                            "09:00", "10:00", "11:00",
                            "12:00", "13:00", "14:00",
                            "15:00", "16:00", "17:00",
                            "18:00", "19:00", "20:00",
                            "21:00", "22:00", "23:00",

                            ],
                        "format": format,
                        "area": [latmax,lonmin,latmin,lonmax],
                        "grid":["0.25", "0.25"],
                    },
    )
    
    request_id = r.reply['request_id']
    
    return request_id

def era5_z_download(
    dataset,
    product_type,
    pressure_level,
    variables,
    years,
    months,
    days,
    times = '00:00',
    format = "netcdf",
    latmin = 0,
    latmax = 180,
    lonmin = -180,
    lonmax = 180,
    outname = 'download.nc'
):

    c = cdsapi.Client(wait_until_complete=False, delete=False)
    r = c.retrieve(
                    dataset,
                    {
                        "product_type": product_type,
                        "pressure_level": pressure_level,
                        "variable": variables,
                        "year": years,
                        "month": months,
                        "day": days,
                        "time": [
                            "00:00", "01:00", "02:00",
                            "03:00", "04:00", "05:00",
                            "06:00", "07:00", "08:00",
                            "09:00", "10:00", "11:00",
                            "12:00", "13:00", "14:00",
                            "15:00", "16:00", "17:00",
                            "18:00", "19:00", "20:00",
                            "21:00", "22:00", "23:00",

                            ],
                        "format": format,
                        "area": [latmax,lonmin,latmin,lonmax],
                    },
    )
      
    request_id = r.reply['request_id']
    
    return request_id

In [None]:
for region_str in region_str_tpl:
    print(region_str)

    hem, region_input_lat_bbox, region_input_lon_bbox, region_box_x, region_box_y, region_lat, region_lon, region_lon_EW, region_t62_lats, region_t62_lons = load_region.load_region_constants(region_str)        

    input_latmin = region_input_lat_bbox.stop
    input_latmax = region_input_lat_bbox.start

    if isinstance(region_input_lon_bbox, slice): 
        input_lonmin = region_input_lon_bbox.start
        input_lonmax = region_input_lon_bbox.stop
    else:
        input_lonmin = region_input_lon_bbox[0].start
        input_lonmax = region_input_lon_bbox[1].stop

    print('lat:', input_latmin, input_latmax)
    print('lon:', input_lonmin, input_lonmax)

    if input_lonmin > 180:
        input_lonmin = input_lonmin-360

    if input_lonmax > 180:
        input_lonmax = input_lonmax-360

    print('lat:', input_latmin, input_latmax)
    print('lon:', input_lonmin, input_lonmax)



    ######### Define Time/Space Grid ###########
    global latmin
    latmin = input_latmin
    global latmax
    latmax = input_latmax
    global lonmin
    lonmin = input_lonmin
    global lonmax
    lonmax = input_lonmax

    yrs = list(range(1979,2022))

    global mons
    mons = list(range(1,13))
    global total_days
    total_days = list((range(1,32)))

    ######### Run Iterating Download Function ###########

    for i,var in enumerate(variables):
        request_id_dict = {}

        global curr_var
        curr_var = var

        global curr_var_abbrev
        curr_var_abbrev = var_abbrev[i]

        if curr_var_abbrev == "z":
                print('Z')
        elif (curr_var_abbrev == "t2m") | (curr_var_abbrev == "swvl1"):
                print('LAND')
        else:
                print('VAR ABBREV ERROR')

        print(curr_var, curr_var_abbrev)

        path_out = proj_dir+"input_data_ERA5/"+region_str+"/hourly/"+curr_var_abbrev+"/"

        for yr in yrs:    
            f_out = '_'.join([region_str,curr_var_abbrev,'hourly',str(yr)+'.nc']) 

            if not os.path.exists(path_out+f_out):
                print(path_out+f_out+" does NOT exist... requesting now")
                if curr_var_abbrev == "z":
                    request_id_dict[path_out+f_out] = era5_z_download(dataset = era5_z_dataset, 
                                                                    product_type = era5_z_product_type,
                                                                    pressure_level = era5_z_pressure_level,
                                                                    variables = curr_var, 
                                                                    years = yr, 
                                                                    months = mons, 
                                                                    days = total_days, 
                                                                    latmin = latmin, 
                                                                    latmax = latmax, 
                                                                    lonmin = lonmin, 
                                                                    lonmax = lonmax)
                elif (curr_var_abbrev == "t2m") | (curr_var_abbrev == "swvl1"):
                    request_id_dict[path_out+f_out] = era5_land_download(dataset = era5_land_dataset, 
                                                                    variables = curr_var, 
                                                                    years = yr, 
                                                                    months = mons, 
                                                                    days = total_days, 
                                                                    latmin = latmin, 
                                                                    latmax = latmax, 
                                                                    lonmin = lonmin, 
                                                                    lonmax = lonmax)
                else:
                    print('VAR ABBREV ERROR')

        with open(path_out+region_str+'_'+curr_var_abbrev+'_missing.json', 'w') as fp:
            json.dump(request_id_dict, fp)