In [2]:
import os
from pathlib import Path
import cdsapi
import xarray as xr

# -------------------
# CONFIG (edit here)
# -------------------
YEARS = list(range(2010, 2025))  # Year from 2010 to 2025 can be changed
# Tajikistan bbox [North, West, South, East] for CDS
AREA_TJK = [41.1, 67.3, 36.5, 75.2]  
OUT_DIR = Path("../data")            # where the final NetCDF will go
FINAL_NC = OUT_DIR / "heat_tajikistan_2010_2024.nc"
TMP_DIR = Path("./_tmp_era5_tjk")    # temp download/processing folder

OUT_DIR.mkdir(parents=True, exist_ok=True)
TMP_DIR.mkdir(parents=True, exist_ok=True)

# -------------------
# 1) DOWNLOAD PER YEAR
# -------------------
c = cdsapi.Client()
hourly_files = []

for y in YEARS:
    hourly_nc = TMP_DIR / f"era5_t2m_hourly_tjk_{y}.nc"
    if hourly_nc.exists():
        print(f"[skip] already exists: {hourly_nc}")
        hourly_files.append(hourly_nc)
        continue
    print(f"[download] ERA5 hourly t2m for {y}")
    c.retrieve(
        "reanalysis-era5-single-levels",
        {
            "product_type": "reanalysis",
            "variable": "2m_temperature",
            "year": str(y),
            "month": [f"{m:02d}" for m in range(1, 13)],
            "day": [f"{d:02d}" for d in range(1, 32)],
            "time": [f"{h:02d}:00" for h in range(24)],
            "area": AREA_TJK,          # [N, W, S, E]
            "format": "netcdf",
        },
        str(hourly_nc),
    )
    hourly_files.append(hourly_nc)

# -------------------
# 2) PROCESS PER YEAR -> DAILY TMAX (°C)
#  
# -------------------
daily_files = []
for hourly_nc in sorted(hourly_files):
    y = hourly_nc.stem.split("_")[-1]  # crude parse of year
    daily_nc = TMP_DIR / f"era5_t2m_dailyTmax_tjk_{y}.nc"
    if daily_nc.exists():
        print(f"[skip] already exists: {daily_nc}")
        daily_files.append(daily_nc)
        continue

    print(f"[process] {hourly_nc.name} -> daily Tmax (°C)")
    ds_hour = xr.open_dataset(hourly_nc, chunks={"time": 240})  # dask-friendly
    da = ds_hour["t2m"] - 273.15                                # Kelvin -> °C
    da.name = "t2m"
    da.attrs["units"] = "degC"
    # daily maximum over the 24 hours
    time_dim = "time" if "time" in da.dims else "valid_time"
    da_daily_tmax = da.resample({time_dim: "1D"}).max(skipna=True)
    da_daily_tmax.to_dataset(name="t2m").to_netcdf(daily_nc)
    daily_files.append(daily_nc)
    ds_hour.close()

# -------------------
# 3) CONCAT ALL YEARS -> SINGLE DAILY NETCDF
# -------------------
print("[concat] assembling all yearly daily files -> final NetCDF")
# combine by coordinates handles contiguous time properly
ds_all = xr.open_mfdataset([str(f) for f in sorted(daily_files)],
                           combine="by_coords", parallel=True)
# optional: ensure nice attrs
ds_all["t2m"].attrs["long_name"] = "Daily maximum 2m air temperature"
ds_all["t2m"].attrs["units"] = "degC"

# write final file
ds_all.to_netcdf(FINAL_NC)
ds_all.close()

print(f"✅ Done. Final file ready for your pipeline:\n{FINAL_NC.resolve()}")


[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2010.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2011.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2012.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2013.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2014.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2015.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2016.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2017.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2018.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2019.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2020.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2021.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2022.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2023.nc
[skip] already exists: _tmp_era5_tjk/era5_t2m_hourly_tjk_2024.nc
[skip] already exists: _t

In [1]:
# -------------------
#  In case of the kernel is full
# -------------------
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"