# Download ERA5 data from the CDS

In [1]:
import os
import xarray as xr

import cdsapi

In [2]:
dest = "/mnt/CORDEX_CMIP6_tmp/aux_data/era5/"
dataset = "reanalysis-era5-single-levels-monthly-means"
YEARS = [
    "1979",
    "1980",
    "1981",
    "1982",
    "1983",
    "1984",
    "1985",
    "1986",
    "1987",
    "1988",
    "1989",
    "1990",
    "1991",
    "1992",
    "1993",
    "1994",
    "1995",
    "1996",
    "1997",
    "1998",
    "1999",
    "2000",
    "2001",
    "2002",
    "2003",
    "2004",
    "2005",
    "2006",
    "2007",
    "2008",
    "2009",
    "2010",
    "2011",
    "2012",
    "2013",
    "2014",
    "2015",
    "2016",
    "2017",
    "2018",
    "2019",
    "2020"
]

In [3]:
variables = {
    "tas": "2m_temperature",
    "pr": "total_precipitation",
    "msl": "mean_sea_level_pressure",
}

In [None]:
for var in variables.keys():
    for year in YEARS:
        filedir = f"{dest}mon/{var}/"
        os.makedirs(filedir, exist_ok=True)
        filename = f"{var}_ERA5_mon_{year}.nc"
        request = {
            "product_type": ["monthly_averaged_reanalysis"],
            "variable": [variables[var]],
            "year": [year],
            "month": [
                "01",
                "02",
                "03",
                "04",
                "05",
                "06",
                "07",
                "08",
                "09",
                "10",
                "11",
                "12",
            ],
            "time": ["00:00"],
            "data_format": "netcdf",
            "download_format": "unarchived",
            "area": [75, -35, 20, 50],
        }

        client = cdsapi.Client()
        client.retrieve(dataset, request).download(f"{filedir}{filename}")

## Download fixed variables (land-sea mask and orography)

In [6]:
variables = {
    "land_sea_mask": "sftlf",
    "geopotential": "orog"
}
dataset = "reanalysis-era5-single-levels"

In [None]:
for var, var_name in variables.items():
    filedir = f"{dest}fx/"
    os.makedirs(filedir, exist_ok=True)
    filename = f"{var_name}_ERA5_fx.nc"    
    request = {
        "product_type": ["reanalysis"],
        "variable": [var],
        "year": ["1940"],
        "month": ["01"],
        "day": ["01"],
        "time": ["00:00"],
        "data_format": "netcdf",
        "download_format": "unarchived"
    }

    client = cdsapi.Client()
    client.retrieve(dataset, request).download(f"{filedir}{filename}")

In [3]:
def traverseDir(root):
    for dirpath, dirnames, filenames in os.walk(root):
        for file in filenames:
            if file.endswith(".nc"):
                yield os.path.join(dirpath, file)

In [4]:
filedir = "/mnt/CORDEX_CMIP6_tmp/aux_data/era5/fx/"
files = list(traverseDir(filedir))

In [6]:
# rename variables
for file in files:
    var = file.split("/")[-1].split("_")[0]
    ds = xr.open_dataset(file)
    if var in ["orog", "sftlf"]:
        data_vars = ds.data_vars
        if len(data_vars) == 1:
            old_name = list(data_vars)[0]
            ds = ds.rename({old_name: var})
            if var == "orog":
                ds[var] = ds[var]/9.81 # geopotential to elev in m
        else:
            print("There is more than one data variable; you need to specify which one to rename.")
        for tt in ['time', 'valid_time']:
            if tt in ds.variables:
                ds = ds[var].isel({tt:0}).squeeze()
                if isinstance(ds, xr.DataArray):
                    ds = ds.to_dataset()
        
        filename = f"{var}_ERA5_fx_fixed.nc"
        ds.to_netcdf(f"{filedir}{filename}", mode='w', engine='netcdf4')