# Download CERRA-Land data from the CDS

In [1]:
import os

import cdsapi
import numpy as np
import xarray as xr

In [2]:
dest = "/mnt/CORDEX_CMIP6_tmp/aux_data/cerra-land/"
dataset = "reanalysis-cerra-land"
YEARS = [
    "1984",
    "1985",
    "1986",
    "1987",
    "1988",
    "1989",
    "1990",
    "1991",
    "1992",
    "1993",
    "1994",
    "1995",
    "1996",
    "1997",
    "1998",
    "1999",
    "2000",
    "2001",
    "2002",
    "2003",
    "2004",
    "2005",
    "2006",
    "2007",
    "2008",
    "2009",
    "2010",
    "2011",
    "2012",
    "2013",
    "2014",
    "2015",
    "2016",
    "2017",
    "2018",
    "2019",
    "2020"
]
variable = "pr"
temp_agg = "day"

In [None]:
for year in YEARS:
    filedir = f"{dest}{temp_agg}/{variable}/"
    os.makedirs(filedir, exist_ok=True)
    filename = f"{variable}_CERRA-Land_day_{year}.nc"
    request = {
        "variable": ["total_precipitation"],
        "level_type": ["surface"],
        "product_type": ["analysis"],
        "year": [year],
        "month": [
            "01",
            "02",
            "03",
            "04",
            "05",
            "06",
            "07",
            "08",
            "09",
            "10",
            "11",
            "12",
        ],
        "day": [
            "01",
            "02",
            "03",
            "04",
            "05",
            "06",
            "07",
            "08",
            "09",
            "10",
            "11",
            "12",
            "13",
            "14",
            "15",
            "16",
            "17",
            "18",
            "19",
            "20",
            "21",
            "22",
            "23",
            "24",
            "25",
            "26",
            "27",
            "28",
            "29",
            "30",
            "31",
        ],
        "time": ["06:00"],
        "data_format": "netcdf",
        "download_format": "unarchived",
    }
    client = cdsapi.Client()
    client.retrieve(dataset, request).download(f"{filedir}{filename}")

2025-05-26 10:45:32,197 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2025-05-26 10:45:32,361 INFO Request ID is 732c1429-6b24-4cb6-85b2-321de8f31747
2025-05-26 10:45:32,444 INFO status has been updated to accepted
2025-05-26 10:45:46,221 INFO status has been updated to running


### Resample to monthly temporal aggregation

In [8]:
def traverseDir(root):
    for dirpath, dirnames, filenames in os.walk(root):
        for file in filenames:
            if file.endswith(".nc"):
                yield os.path.join(dirpath, file)

In [11]:
filedir = f"{dest}{temp_agg}/{variable}/"
files = np.sort(list(traverseDir(filedir)))

In [12]:
for file in files:
    print(file.split("/")[-1])
    ds = xr.open_dataset(file)
    ds = ds.resample(valid_time="ME").mean()
    filedir = f"{dest}{'mon'}/{variable}/"
    os.makedirs(filedir, exist_ok=True)
    ds = ds.to_netcdf(
        f"{filedir}{file.split('/')[-1].replace('_day_', '_mon_')}",
        encoding={"tp": {"zlib": True, "complevel": 1}},
    )

pr_CERRA-Land_day_2020.nc


## Download fixed variables (land-sea mask and orography)

In [3]:
import sys

In [4]:
variables = {
    "land_sea_mask": "sftlf",         
    "orography": "orog"
}
dataset = "reanalysis-cerra-land"

In [5]:
filedir = "/mnt/CORDEX_CMIP6_tmp/aux_data/cerra-land/"

In [None]:
for var, var_name in variables.items():
    filedir = f"{dest}fx/"
    os.makedirs(filedir, exist_ok=True)
    filename = f"{var_name}_CERRA-Land_fx.nc"
    
    request = {
        "variable": [var],
        "level_type": ["surface"],
        "product_type": ["analysis"],
        "year": ["1984"],
        "month": ["09"],
        "day": ["01"],
        "time": ["00:00"],
        "data_format": "netcdf"
    }

    client = cdsapi.Client()
    client.retrieve(dataset, request).download(f"{filedir}{filename}")

In [10]:
filedir = "/mnt/CORDEX_CMIP6_tmp/aux_data/cerra-land/fx/"
files = list(traverseDir(filedir))

In [11]:
# rename variables
for file in files:
    var = file.split("/")[-1].split("_")[0]
    ds = xr.open_dataset(file)
    if var in ["orog", "sftlf"]:
        data_vars = ds.data_vars
        if len(data_vars) == 1:
            old_name = list(data_vars)[0]
            ds = ds.rename({old_name: var})

        else:
            print("There is more than one data variable; you need to specify which one to rename.")
        for tt in ['time', 'valid_time']:
            if tt in ds.variables:
                ds = ds[var].isel({tt:0}).squeeze()
                if isinstance(ds, xr.DataArray):
                    ds = ds.to_dataset()
        
        filename = f"{var}_CERRA-Land_fx_fixed.nc"
        ds.to_netcdf(f"{filedir}{filename}", mode='w', engine='netcdf4')