In [1]:
import cdsapi
import xarray as xr
import pandas as pd
import numpy as np
from pathlib import Path
import time

In [2]:
c = cdsapi.Client()

In [3]:
# specifying sizes and thinnings

lat_dict = {
    'full': slice(50, 25),
    'small': slice(45, 30),
    'slgt_small': slice(50, 25),
    'slgt_full': slice(50, 25)
}

lon_dict = {
    'full': slice(360-125, 360-66),
    'small': slice(360-105, 360-85),
    'slgt_small': slice(360-125, 360-66),
    'slgt_full': slice(360-125, 360-66)
}

levels_dict = {
    'full': [925, 850, 700, 500, 300],
    'small': [925, 850, 700, 500, 300],
    'slgt_small': [925, 850, 700, 500, 300],
    'slgt_full': [925, 850, 700, 500, 300]
}

time_thin_dict = {
    'full': 1,
    'small': 6,
    'slgt_small': 6,
    'slgt_full': 1,
}

space_thin_dict = {
    'full': 1,
    'small': 4,
    'slgt_small': 4,
    'slgt_full': 1
}

risk_level_dict = {
    'full': ['MDT', 'HIGH'],
    'small': ['MDT', 'HIGH'],
    'slgt_small': ['SLGT', 'ENH', 'MDT', 'HIGH'],
    'slgt_full': ['SLGT', 'ENH', 'MDT', 'HIGH']
}

pressure_var_dict = {
    'full': ["geopotential", "potential_vorticity", "specific_humidity", "temperature", "u_component_of_wind", "v_component_of_wind", "vertical_velocity"],
    'small': ["geopotential", "potential_vorticity", "specific_humidity", "temperature", "u_component_of_wind", "v_component_of_wind", "vertical_velocity"],
    'slgt_small': ["geopotential", "potential_vorticity", "specific_humidity", "temperature", "u_component_of_wind", "v_component_of_wind", "vertical_velocity"],
    'slgt_full': ["geopotential", "potential_vorticity", "specific_humidity", "temperature", "u_component_of_wind", "v_component_of_wind", "vertical_velocity"]
}

surface_var_dict = {
    'full': ["10m_u_component_of_wind", "10m_v_component_of_wind", "2m_dewpoint_temperature", "2m_temperature", "geopotential_at_surface", "toa_incident_solar_radiation"],
    'small': ["10m_u_component_of_wind", "10m_v_component_of_wind", "2m_dewpoint_temperature", "2m_temperature", "geopotential_at_surface", "toa_incident_solar_radiation"],
    'slgt_small': ["10m_u_component_of_wind", "10m_v_component_of_wind", "2m_dewpoint_temperature", "2m_temperature", "geopotential_at_surface", "toa_incident_solar_radiation"],
    'slgt_full': ["10m_u_component_of_wind", "10m_v_component_of_wind", "2m_dewpoint_temperature", "2m_temperature", "geopotential_at_surface", "toa_incident_solar_radiation"]
}

In [4]:
detail = 'slgt_full'

In [5]:
# --- risk days
pph = xr.load_dataset("data/raw_data/labelled_pph.nc")
missing_dates = [
    '200204250000', '200208300000', '200304150000', '200304160000',
    '200306250000', '200307270000', '200307280000', '200312280000',
    '200404140000', '200408090000', '200905280000', '201105210000',
    '202005240000', '200510240000'
]
dates_of_interest = pph["time"][pph["MAX_CAT"].isin(risk_level_dict[detail])]
dates_of_interest = dates_of_interest[dates_of_interest > "200203310000"]
dates_of_interest = dates_of_interest[~(dates_of_interest.isin(missing_dates))]
selected_days = pd.to_datetime(dates_of_interest.values, format="%Y%m%d%H%M").normalize()

years = np.unique(selected_days.year)

In [6]:
out_dir = Path("/glade/work/milesep/era5_cds_" + detail)
out_dir.mkdir(parents=True, exist_ok=True)

pl_files = []
sfc_files = []

# --- derive requested hours directly from thin factor
hours = [f"{h:02d}:00" for h in range(0, 24, time_thin_dict[detail])]

In [7]:
def safe_retrieve(dataset, request, target, max_retries=5, wait=30):
    """
    Robust CDSAPI download:
    - Writes to .part file first
    - Retries with exponential backoff if download fails
    """
    tmp_target = target.with_suffix(".nc.part")

    for attempt in range(1, max_retries + 1):
        try:
            c.retrieve(dataset, request, str(tmp_target))
            tmp_target.rename(target)  # rename only after success
            print(f"✅ Downloaded: {target}")
            return target
        except Exception as e:
            print(f"⚠️ Attempt {attempt} failed for {target}: {e}")
            if tmp_target.exists():
                tmp_target.unlink()  # clean up bad partials
            if attempt < max_retries:
                sleep_time = wait * attempt
                print(f"Retrying in {sleep_time}s...")
                time.sleep(sleep_time)
            else:
                raise RuntimeError(f"Failed to download {target} after {max_retries} attempts.")

In [8]:
area = [lat_dict[detail].start,
        lon_dict[detail].start - 360,
        lat_dict[detail].stop,
        lon_dict[detail].stop - 360]

levels = [str(l) for l in levels_dict[detail]]

all_sfc_vars = surface_var_dict[detail]
possible_accum_vars = ['toa_incident_solar_radiation']

sfc_inst_vars = [x for x in all_sfc_vars if x not in possible_accum_vars]
sfc_inst_vars = ['geopotential' if x == 'geopotential_at_surface' else x for x in sfc_inst_vars]
sfc_accum_vars = [x for x in all_sfc_vars if x in possible_accum_vars]


def chunk_list(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

In [None]:
for year in years:

    days_this_year = selected_days[selected_days.year == year]

    for month in sorted(set(days_this_year.month)):
        days_this_month = days_this_year[days_this_year.month == month]
        days = sorted({f"{d.day:02d}" for d in days_this_month})
        month_str = f"{month:02d}"

        print(year, month, len(days_this_month))

        # ------------------ Pressure levels ------------------
        chunk_size = 10  # adjust if needed
        for i, day_chunk in enumerate(chunk_list(days, chunk_size), start=1):
            pl_file = out_dir / f"era5_pl_{year}_{month_str}_{i}.nc"
            if pl_file.exists():
                try:
                    xr.open_dataset(pl_file).close()
                    print(f"Skipping (exists): {pl_file}")
                    pl_files.append(pl_file)
                    continue
                except Exception:
                    print(f"Corrupt file detected, redownloading: {pl_file}")
                    pl_file.unlink()

            # request only this chunk of days
            safe_retrieve(
                "reanalysis-era5-pressure-levels",
                {
                    "product_type": "reanalysis",
                    "format": "netcdf",
                    "variable": pressure_var_dict[detail],
                    "pressure_level": levels,
                    "year": str(year),
                    "month": month_str,
                    "day": day_chunk,
                    "time": hours,
                    "area": area,
                },
                pl_file,
            )
            pl_files.append(pl_file)


        # ------------------ Single levels: instantaneous ------------------
        sfc_inst_file = out_dir / f"era5_sfc_inst_{year}_{month_str}.nc"
        if sfc_inst_file.exists():
            try:
                xr.open_dataset(sfc_inst_file).close()
                print(f"Skipping (exists): {sfc_inst_file}")
                sfc_files.append(sfc_inst_file)
            except Exception:
                print(f"Corrupt file detected, redownloading: {sfc_inst_file}")
                sfc_inst_file.unlink()
        if not sfc_inst_file.exists() and sfc_inst_vars:
            safe_retrieve(
                "reanalysis-era5-single-levels",
                {
                    "product_type": "reanalysis",
                    "format": "netcdf",
                    "variable": sfc_inst_vars,
                    "year": str(year),
                    "month": month_str,
                    "day": days,
                    "time": hours,
                    "area": area,
                },
                sfc_inst_file,
            )
            sfc_files.append(sfc_inst_file)

        # ------------------ Single levels: accumulated ------------------
        sfc_accum_file = out_dir / f"era5_sfc_accum_{year}_{month_str}.nc"
        if sfc_accum_file.exists():
            try:
                xr.open_dataset(sfc_accum_file).close()
                print(f"Skipping (exists): {sfc_accum_file}")
                sfc_files.append(sfc_accum_file)
            except Exception:
                print(f"Corrupt file detected, redownloading: {sfc_accum_file}")
                sfc_accum_file.unlink()
        if not sfc_accum_file.exists() and sfc_accum_vars:
            safe_retrieve(
                "reanalysis-era5-single-levels",
                {
                    "product_type": "reanalysis",
                    "format": "netcdf",
                    "variable": sfc_accum_vars,
                    "year": str(year),
                    "month": month_str,
                    "day": days,
                    "time": hours,
                    "area": area,
                },
                sfc_accum_file,
            )
            sfc_files.append(sfc_accum_file)

2002 4 23


2025-08-21 13:42:26,019 INFO Request ID is 1d205d67-4475-45db-af4c-4588c43122a2
2025-08-21 13:42:26,191 INFO status has been updated to accepted
2025-08-21 13:42:39,083 INFO status has been updated to running
2025-08-21 13:52:46,731 INFO status has been updated to successful


2dc41427d96700193c8dd34fdec2c09b.nc:   0%|          | 0.00/376M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_04_1.nc


2025-08-21 13:53:30,161 INFO Request ID is 4f761969-06cd-461c-937f-6dad6fd5fab2
2025-08-21 13:53:30,341 INFO status has been updated to accepted
2025-08-21 13:53:38,109 INFO status has been updated to running
2025-08-21 14:01:50,749 INFO status has been updated to successful


aa6387d1c7e13232b67a9137cfbaa786.nc:   0%|          | 0.00/376M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_04_2.nc


2025-08-21 14:07:30,417 INFO Request ID is c02cd512-b7a7-4b78-9c12-774634cd1746
2025-08-21 14:07:30,988 INFO status has been updated to accepted
2025-08-21 14:07:44,125 INFO status has been updated to running
2025-08-21 14:10:23,517 INFO status has been updated to successful


8a326e11ce00b04d286275e8731efaea.nc:   0%|          | 0.00/121M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_04_3.nc


2025-08-21 14:10:36,561 INFO Request ID is d8d3665d-9f23-4a87-9d3c-c9eb370ac803
2025-08-21 14:10:36,732 INFO status has been updated to accepted
2025-08-21 14:11:08,927 INFO status has been updated to running
2025-08-21 14:13:28,723 INFO status has been updated to successful


eccd6627f1521a0c6e5c9ba708b42f94.nc:   0%|          | 0.00/99.3M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2002_04.nc


2025-08-21 14:13:45,141 INFO Request ID is 9fc50e48-33b3-456c-9b8a-d93834c7edf6
2025-08-21 14:13:45,308 INFO status has been updated to accepted
2025-08-21 14:14:06,035 INFO status has been updated to running
2025-08-21 14:15:00,642 INFO status has been updated to successful


2b9f1f689a6cc7f641c2f35a9e36d1cc.nc:   0%|          | 0.00/8.44M [00:00<?, ?B/s]

2025-08-21 14:15:03,734 INFO Request ID is b0cc102d-0919-4da5-83b4-381a5435d574


✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_04.nc
2002 5 28


2025-08-21 14:15:03,935 INFO status has been updated to accepted
2025-08-21 14:15:16,795 INFO status has been updated to running
2025-08-21 14:23:23,880 INFO status has been updated to successful


a5bb88b5d9eacdb36b5b4242bcc73784.nc:   0%|          | 0.00/376M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_05_1.nc


2025-08-21 14:24:01,765 INFO Request ID is 0754ec46-af40-4dd1-bd52-5e4096bc3b31
2025-08-21 14:24:01,936 INFO status has been updated to accepted
2025-08-21 14:24:22,533 INFO status has been updated to running
2025-08-21 14:32:21,956 INFO status has been updated to successful


c40ae7c15087139ecd9d60d1eb956e7e.nc:   0%|          | 0.00/376M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_05_2.nc


2025-08-21 14:32:55,686 INFO Request ID is 65e3a4e0-1f90-4dd8-8c59-68c3176a162d
2025-08-21 14:32:55,839 INFO status has been updated to accepted
2025-08-21 14:33:08,738 INFO status has been updated to running
2025-08-21 14:39:15,527 INFO status has been updated to successful


73114d39310cca09572c431481c172d4.nc:   0%|          | 0.00/306M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_05_3.nc


2025-08-21 14:39:53,434 INFO Request ID is bbcff399-7bef-41e8-affa-b72f40f17352
2025-08-21 14:39:53,614 INFO status has been updated to accepted
2025-08-21 14:40:14,380 INFO status has been updated to running
2025-08-21 14:44:12,794 INFO status has been updated to successful


4e74c3c7fbcca4592c28578344d66cf5.nc:   0%|          | 0.00/121M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2002_05.nc


2025-08-21 14:44:46,231 INFO Request ID is ee757010-3097-4103-b4cf-89f2bb7f8173
2025-08-21 14:44:46,382 INFO status has been updated to accepted
2025-08-21 14:44:59,224 INFO status has been updated to running
2025-08-21 14:46:01,559 INFO status has been updated to successful


⚠️ Attempt 1 failed for /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_05.nc: Result not ready, job is running
Retrying in 30s...


2025-08-21 14:46:32,058 INFO Request ID is df5ee037-01d5-4836-8422-ae38ab37c5e1
2025-08-21 14:46:32,224 INFO status has been updated to accepted
2025-08-21 14:46:52,825 INFO status has been updated to successful


e8cedb0072ae56f17c0c7cf0d039ba5.nc:   0%|          | 0.00/10.7M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_05.nc
2002 6 30


2025-08-21 14:46:56,217 INFO Request ID is dacc4ff7-ea0b-4980-ac0b-1c62128eddc7
2025-08-21 14:46:56,373 INFO status has been updated to accepted
2025-08-21 14:47:09,263 INFO status has been updated to running
2025-08-21 14:55:16,353 INFO status has been updated to successful


a504719d349103e5a4da51702d066746.nc:   0%|          | 0.00/379M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_06_1.nc


2025-08-21 14:56:02,980 INFO Request ID is 8fed9ce8-2a05-4f44-b2e1-ef33ca92ed78
2025-08-21 14:56:03,171 INFO status has been updated to accepted
2025-08-21 14:56:16,162 INFO status has been updated to running
2025-08-21 15:04:24,596 INFO status has been updated to successful


fad2cadbcefc174b84cf424438a27843.nc:   0%|          | 0.00/380M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_06_2.nc


2025-08-21 15:05:18,116 INFO Request ID is 8ab2acd0-1b2e-44e3-b269-38dd4a4b0c85
2025-08-21 15:05:18,293 INFO status has been updated to accepted
2025-08-21 15:05:38,962 INFO status has been updated to running
2025-08-21 15:15:38,892 INFO status has been updated to successful


899d633a6a3f1c001fa83c25b891e809.nc:   0%|          | 0.00/378M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_06_3.nc


2025-08-21 15:17:06,165 INFO Request ID is 0082df54-d670-4b19-b4af-c239571ca820
2025-08-21 15:17:06,332 INFO status has been updated to accepted
2025-08-21 15:18:21,596 INFO status has been updated to running
2025-08-21 15:25:26,277 INFO status has been updated to successful


74a6fafaef02e21ee5919a810e4a3285.nc:   0%|          | 0.00/129M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2002_06.nc


2025-08-21 15:25:56,286 INFO Request ID is 6c75df69-e6eb-4d8b-bd57-2345c0052196
2025-08-21 15:25:56,462 INFO status has been updated to accepted
2025-08-21 15:26:09,326 INFO status has been updated to running
2025-08-21 15:26:17,093 INFO status has been updated to accepted
2025-08-21 15:26:28,660 INFO status has been updated to running
2025-08-21 15:27:50,328 INFO status has been updated to successful


817077a19097ec618d3cdcbe20b6cf4d.nc:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_06.nc
2002 7 30


2025-08-21 15:27:53,665 INFO Request ID is ca89cb89-415c-4339-bbc1-12f4a18555f4
2025-08-21 15:27:54,216 INFO status has been updated to accepted
2025-08-21 15:28:07,105 INFO status has been updated to running
2025-08-21 15:36:14,280 INFO status has been updated to successful


1ccbc3d73274d87d88739890f9ff1d02.nc:   0%|          | 0.00/378M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_07_1.nc


2025-08-21 15:38:24,687 INFO Request ID is f8fde1c6-3ac6-4eec-a1ad-14c9a7addfe0
2025-08-21 15:38:24,857 INFO status has been updated to accepted
2025-08-21 15:38:32,504 INFO status has been updated to running
2025-08-21 15:46:45,130 INFO status has been updated to successful


b3c95e70de34f608f1932b79d21ecc4d.nc:   0%|          | 0.00/375M [00:00<?, ?B/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

2025-08-21 16:09:45,926 INFO status has been updated to successful


af7a234347c5fb49364c3c4d0e0345af.nc:   0%|          | 0.00/378M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_08_1.nc


2025-08-21 16:10:17,364 INFO Request ID is 53ff0822-9057-42b1-8bcc-bd32bc98d70f
2025-08-21 16:10:17,535 INFO status has been updated to accepted
2025-08-21 16:10:32,625 INFO status has been updated to running
2025-08-21 16:18:39,661 INFO status has been updated to successful


7d562f73f4f2780f49028b11d8531cb.nc:   0%|          | 0.00/374M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_08_2.nc


2025-08-21 16:19:10,071 INFO Request ID is 44a105c7-5fc8-40b4-a03d-9488508583c7
2025-08-21 16:19:10,241 INFO status has been updated to accepted
2025-08-21 16:19:30,990 INFO status has been updated to running
2025-08-21 16:27:30,359 INFO status has been updated to successful


eb16f9e282a715b99c12f2d40b572e98.nc:   0%|          | 0.00/374M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_08_3.nc


2025-08-21 16:28:37,500 INFO Request ID is a5b342fa-16df-4151-8413-11e91422cc49
2025-08-21 16:28:37,666 INFO status has been updated to accepted
2025-08-21 16:28:45,347 INFO status has been updated to running
2025-08-21 16:32:56,715 INFO status has been updated to successful


d3ab6a4e3dba65f030428f850a8ca85c.nc:   0%|          | 0.00/129M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2002_08.nc


2025-08-21 16:33:48,337 INFO Request ID is 64d9bedd-a729-447d-831f-53f574d77a23
2025-08-21 16:33:48,515 INFO status has been updated to accepted
2025-08-21 16:34:09,197 INFO status has been updated to running
2025-08-21 16:35:42,440 INFO status has been updated to successful


bbf50318a194f174c333db739e37de4f.nc:   0%|          | 0.00/11.2M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_08.nc
2002 9 19


2025-08-21 16:35:45,963 INFO Request ID is caf7f715-2235-43bd-9ef8-f8707096ef13
2025-08-21 16:35:46,125 INFO status has been updated to accepted
2025-08-21 16:35:59,055 INFO status has been updated to running
2025-08-21 16:36:06,802 INFO status has been updated to accepted
2025-08-21 16:36:18,349 INFO status has been updated to running
2025-08-21 16:44:06,473 INFO status has been updated to successful


3d07f64ff6b2b6f53b61a5ad6f4d76c3.nc:   0%|          | 0.00/376M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_09_1.nc


2025-08-21 16:44:42,077 INFO Request ID is e60cc311-cc35-4377-adde-9acb61830de6
2025-08-21 16:44:42,235 INFO status has been updated to accepted
2025-08-21 16:44:50,013 INFO status has been updated to running
2025-08-21 16:53:02,473 INFO status has been updated to successful


3ae212c6814f676d94511ddb8be02c5e.nc:   0%|          | 0.00/342M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_09_2.nc


2025-08-21 16:54:35,057 INFO Request ID is c9f7c8aa-8d64-47e0-823a-9529f4515ac3
2025-08-21 16:54:35,253 INFO status has been updated to accepted
2025-08-21 16:54:48,780 INFO status has been updated to running
2025-08-21 16:57:27,951 INFO status has been updated to successful


9b192a09ac600e9cc4eec804f93a783f.nc:   0%|          | 0.00/82.0M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2002_09.nc


2025-08-21 16:57:57,205 INFO Request ID is bf7aa932-e766-4cf6-b44d-fa23a857fb2b
2025-08-21 16:57:57,732 INFO status has been updated to accepted
2025-08-21 16:58:11,085 INFO status has been updated to running
2025-08-21 16:59:14,317 INFO status has been updated to successful


6c874b66c9c294556efa24e35f8061ee.nc:   0%|          | 0.00/6.51M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_09.nc
2002 10 18


2025-08-21 16:59:17,583 INFO Request ID is a37a1081-4ae4-48df-991b-f9e1ea7f47b9
2025-08-21 16:59:17,771 INFO status has been updated to accepted
2025-08-21 16:59:25,680 INFO status has been updated to running
2025-08-21 17:09:39,007 INFO status has been updated to successful


40d5c4183a583820d294861519f88ee9.nc:   0%|          | 0.00/374M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_10_1.nc


2025-08-21 17:10:57,440 INFO Request ID is 747ad472-27ad-4272-bd2e-c1ef0c2951c3
2025-08-21 17:10:57,608 INFO status has been updated to accepted
2025-08-21 17:11:10,725 INFO status has been updated to running
2025-08-21 17:19:17,851 INFO status has been updated to successful


18c30822b1a9b95c509785559f95567f.nc:   0%|          | 0.00/302M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_10_2.nc


2025-08-21 17:20:15,893 INFO Request ID is cf77bd29-ca33-4627-9d4b-f224f4230729
2025-08-21 17:20:16,065 INFO status has been updated to accepted
2025-08-21 17:20:29,090 INFO status has been updated to running
2025-08-21 17:23:08,501 INFO status has been updated to successful


a451c9ff9db18cb3d91cf1d7c8f2834c.nc:   0%|          | 0.00/77.6M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2002_10.nc


2025-08-21 17:23:18,166 INFO Request ID is 88465317-3cc3-4223-b051-7ce2e0462c95
2025-08-21 17:23:18,330 INFO status has been updated to accepted
2025-08-21 17:23:31,152 INFO status has been updated to running
2025-08-21 17:24:34,017 INFO status has been updated to successful


5a8626cede10db57dda29010176adc96.nc:   0%|          | 0.00/5.78M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_10.nc
2002 11 9


2025-08-21 17:24:38,316 INFO Request ID is 64aeb476-32e2-4cb1-9736-d10db220a272
2025-08-21 17:24:38,510 INFO status has been updated to accepted
2025-08-21 17:24:46,307 INFO status has been updated to running
2025-08-21 17:32:59,038 INFO status has been updated to successful


1b560438c84090b2a75eecee8e4039d5.nc:   0%|          | 0.00/347M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_11_1.nc


2025-08-21 17:35:21,199 INFO Request ID is 41e0efc6-9849-4bd0-94f7-172bd83cd738
2025-08-21 17:35:21,372 INFO status has been updated to accepted
2025-08-21 17:35:34,252 INFO status has been updated to running
2025-08-21 17:37:15,543 INFO status has been updated to successful


c8f9834f9ba24d0b6565997cd29284c0.nc:   0%|          | 0.00/39.1M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2002_11.nc


2025-08-21 17:37:20,912 INFO Request ID is 1928a18b-2bff-402f-bab0-4df43ae1c7e1
2025-08-21 17:37:21,077 INFO status has been updated to accepted
2025-08-21 17:37:33,979 INFO status has been updated to running
2025-08-21 17:38:36,381 INFO status has been updated to successful


6513f767e4c632689df1969964de8385.nc:   0%|          | 0.00/2.75M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_11.nc
2002 12 16


2025-08-21 17:38:39,713 INFO Request ID is 83d4aad2-2fb2-4284-a9e3-3d95863da00f
2025-08-21 17:38:39,867 INFO status has been updated to accepted
2025-08-21 17:38:52,795 INFO status has been updated to running
2025-08-21 17:47:00,150 INFO status has been updated to successful


7652e6cc665449eeffe6d9beacd04b73.nc:   0%|          | 0.00/375M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_12_1.nc


2025-08-21 17:48:05,876 INFO Request ID is f8ce4f59-d273-4c6b-9409-b5a0b5cb0af4
2025-08-21 17:48:06,052 INFO status has been updated to accepted
2025-08-21 17:48:38,258 INFO status has been updated to running
2025-08-21 17:54:25,747 INFO status has been updated to successful


5e3966bc26928f905ddc214f201eb7c3.nc:   0%|          | 0.00/239M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2002_12_2.nc


2025-08-21 17:55:06,977 INFO Request ID is 7ff4690b-fd3f-4365-a522-3fec1ed10ad1
2025-08-21 17:55:07,146 INFO status has been updated to accepted
2025-08-21 17:55:27,699 INFO status has been updated to running
2025-08-21 17:57:59,054 INFO status has been updated to successful


3eba9e83ed489286fcad094163ab47e2.nc:   0%|          | 0.00/69.4M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2002_12.nc


2025-08-21 17:58:42,379 INFO Request ID is 9e600982-683e-43d7-a5f4-98fb59021f8b
2025-08-21 17:58:42,550 INFO status has been updated to accepted
2025-08-21 17:58:55,471 INFO status has been updated to running
2025-08-21 17:59:32,031 INFO status has been updated to successful


4beabd34a08157dc1bcf9a42f751e442.nc:   0%|          | 0.00/4.42M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2002_12.nc
2003 1 1


2025-08-21 17:59:35,468 INFO Request ID is c5027c79-1c7c-4c28-9fc5-2ee3041fbcd4
2025-08-21 17:59:35,627 INFO status has been updated to accepted
2025-08-21 17:59:48,510 INFO status has been updated to running
2025-08-21 18:00:50,896 INFO status has been updated to successful


8aba9f44766d1a9862d5b42284203c01.nc:   0%|          | 0.00/37.1M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2003_01_1.nc


2025-08-21 18:00:56,563 INFO Request ID is a0fd1911-5c76-4cd3-95e5-55fa19459eb2
2025-08-21 18:00:56,793 INFO status has been updated to accepted
2025-08-21 18:01:09,624 INFO status has been updated to running
2025-08-21 18:01:46,233 INFO status has been updated to successful


429e66959c110aade6d59ea35d86684b.nc:   0%|          | 0.00/4.48M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2003_01.nc


2025-08-21 18:01:49,402 INFO Request ID is 0d73c83f-54d2-43c6-9e4b-d984762b394e
2025-08-21 18:01:49,558 INFO status has been updated to accepted
2025-08-21 18:02:38,998 INFO status has been updated to running
2025-08-21 18:03:04,811 INFO status has been updated to successful


b7607304ddf3b2ea1b7746a2e32a2cfb.nc:   0%|          | 0.00/295k [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2003_01.nc
2003 2 11


2025-08-21 18:03:07,262 INFO Request ID is 44afbe27-4c4e-4e1e-a433-946cfb21763a
2025-08-21 18:03:07,465 INFO status has been updated to accepted
2025-08-21 18:03:20,332 INFO status has been updated to running
2025-08-21 18:11:27,443 INFO status has been updated to successful


c74f477e834ddab83eacc5aee5cb1338.nc:   0%|          | 0.00/373M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2003_02_1.nc


2025-08-21 18:12:21,914 INFO Request ID is 219170b2-2e36-4b28-bd00-f892896ff21c
2025-08-21 18:12:22,084 INFO status has been updated to accepted
2025-08-21 18:12:34,975 INFO status has been updated to running
2025-08-21 18:12:42,719 INFO status has been updated to accepted
2025-08-21 18:12:54,286 INFO status has been updated to running
2025-08-21 18:13:37,324 INFO status has been updated to successful


ba599876feb5157172d85aca9359725c.nc:   0%|          | 0.00/35.4M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2003_02_2.nc


2025-08-21 18:13:55,392 INFO Request ID is 6f8c9901-09ce-4ac6-8014-ffc245f4f407
2025-08-21 18:13:56,086 INFO status has been updated to accepted
2025-08-21 18:14:28,709 INFO status has been updated to running
2025-08-21 18:15:11,791 INFO status has been updated to successful


⚠️ Attempt 1 failed for /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2003_02.nc: Result not ready, job is running
Retrying in 30s...


2025-08-21 18:15:42,776 INFO Request ID is b0563db8-43ee-4e1c-9f16-6966b39d6caf
2025-08-21 18:15:42,937 INFO status has been updated to accepted
2025-08-21 18:16:15,125 INFO status has been updated to successful


be04aa6a11b7c59377dedb2a1d5fcabb.nc:   0%|          | 0.00/47.8M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2003_02.nc


2025-08-21 18:16:20,217 INFO Request ID is 09de3cb5-cdec-4809-8456-2b74ee43898d
2025-08-21 18:16:20,373 INFO status has been updated to accepted
2025-08-21 18:16:33,813 INFO status has been updated to running
2025-08-21 18:17:10,408 INFO status has been updated to successful


5c12e57277f669dd79fb638d5a7551ed.nc:   0%|          | 0.00/3.48M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2003_02.nc
2003 3 21


2025-08-21 18:17:13,157 INFO Request ID is 5e01b64a-57ee-46ab-b86c-0dab136292c6
2025-08-21 18:17:13,310 INFO status has been updated to accepted
2025-08-21 18:17:26,202 INFO status has been updated to running
2025-08-21 18:25:33,420 INFO status has been updated to successful


1df282651fe8c73129ede6b27e50e8bc.nc:   0%|          | 0.00/372M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2003_03_1.nc


2025-08-21 18:26:36,854 INFO Request ID is 1db8862d-087d-44d6-9d6a-13e546a1e2bf
2025-08-21 18:26:37,039 INFO status has been updated to accepted
2025-08-21 18:26:50,059 INFO status has been updated to running
2025-08-21 18:26:57,809 INFO status has been updated to accepted
2025-08-21 18:27:09,353 INFO status has been updated to running
2025-08-21 18:34:57,461 INFO status has been updated to successful


252822ac8fb6b8dc7f12a54ad6559191.nc:   0%|          | 0.00/378M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2003_03_2.nc


2025-08-21 18:35:29,609 INFO Request ID is 6d124f7b-2d16-473c-ae55-9e7eb64275ae
2025-08-21 18:35:29,784 INFO status has been updated to accepted
2025-08-21 18:35:50,388 INFO status has been updated to running
2025-08-21 18:37:23,659 INFO status has been updated to successful


3d9b698e2df6ef599d6984e4212e05a6.nc:   0%|          | 0.00/36.8M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2003_03_3.nc


2025-08-21 18:37:28,441 INFO Request ID is 334bad6b-73f6-4499-87f9-568e86511522
2025-08-21 18:37:28,706 INFO status has been updated to accepted
2025-08-21 18:37:41,620 INFO status has been updated to running
2025-08-21 18:40:20,883 INFO status has been updated to successful


6f37ed267a78663b920b37172f27c347.nc:   0%|          | 0.00/91.4M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2003_03.nc


2025-08-21 18:40:59,225 INFO Request ID is 7f336c84-6d1c-4763-a6df-96ea224fedcb
2025-08-21 18:40:59,395 INFO status has been updated to accepted
2025-08-21 18:41:31,587 INFO status has been updated to running
2025-08-21 18:42:14,817 INFO status has been updated to successful


36e7d9eee6f626b25ce73bb928a30b0d.nc:   0%|          | 0.00/7.04M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_accum_2003_03.nc
2003 4 22


2025-08-21 18:42:17,786 INFO Request ID is d86f2150-2b3c-4875-92b3-8c31f5bfeccc
2025-08-21 18:42:17,956 INFO status has been updated to accepted
2025-08-21 18:42:30,798 INFO status has been updated to running
2025-08-21 18:50:38,071 INFO status has been updated to successful


d778b52ce2e790f624c71c9794699daf.nc:   0%|          | 0.00/374M [00:00<?, ?B/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

2025-08-22 10:18:10,617 INFO status has been updated to successful


b07b55f2dac0d79e890c64677adb412d.nc:   0%|          | 0.00/376M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2005_07_1.nc


2025-08-22 10:18:36,028 INFO Request ID is 51276f4c-d3cb-4cac-bada-bee80b323895
2025-08-22 10:18:36,184 INFO status has been updated to accepted
2025-08-22 10:36:59,836 INFO status has been updated to running
2025-08-22 10:45:01,749 INFO status has been updated to successful


a71e5eafb78e11f3763e366d2d822850.nc:   0%|          | 0.00/374M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2005_07_2.nc


2025-08-22 10:45:53,874 INFO Request ID is 1a97a4e8-048c-4afc-a334-1e641a3f5301
2025-08-22 10:45:54,241 INFO status has been updated to accepted
2025-08-22 11:12:19,614 INFO status has been updated to running
2025-08-22 11:18:21,030 INFO status has been updated to successful


491d9e929c9106c6d12b85dd0767d451.nc:   0%|          | 0.00/346M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_pl_2005_07_3.nc


2025-08-22 11:19:40,473 INFO Request ID is 523a81fe-ef6b-4786-ba00-45491ee53463
2025-08-22 11:19:40,738 INFO status has been updated to accepted
2025-08-22 11:40:03,656 INFO status has been updated to running
2025-08-22 11:44:04,882 INFO status has been updated to successful


ba64e965d41458ee9217ecae45a587e6.nc:   0%|          | 0.00/125M [00:00<?, ?B/s]

✅ Downloaded: /glade/work/milesep/era5_cds_slgt_full/era5_sfc_inst_2005_07.nc


2025-08-22 11:45:02,608 INFO Request ID is 0d36f1bd-874f-4477-aeb0-9b4341bff25d
2025-08-22 11:45:02,787 INFO status has been updated to accepted
2025-08-22 11:53:23,455 INFO status has been updated to running


In [13]:
pl = xr.open_mfdataset(pl_files, combine="by_coords")
sfc = xr.open_mfdataset(sfc_files, combine="by_coords").rename({"z": "z_sfc"})

ds = xr.merge([pl, sfc])
if "valid_time" in ds:
    ds = ds.rename(valid_time="time")  # or vice versa depending on your pipeline
    ds = ds.drop_vars(["number", "expver"], errors="ignore")

In [14]:
# subset exactly the selected days again (to be safe)
time_days = ds.time.dt.floor("D")
ds = ds.sel(time=ds.time[np.isin(time_days, selected_days)])

ds = ds.rename({"pressure_level": "level"})

# add day/tod index, thin spatially
ds = ds.assign_coords(day=ds.time.dt.floor("D"), tod=ds.time.dt.hour)
ds = ds.set_index(time=["day", "tod"]).unstack("time")

ds = ds.drop_vars(["time"], errors="ignore")

ds = ds.thin({"latitude": space_thin_dict[detail], "longitude": space_thin_dict[detail]})

# Rechunk so that day has uniform chunks
ds = ds.chunk({"day": 30})
ds

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 484.52 MiB 548.44 kiB Shape (5, 26, 60, 4071, 4) (3, 13, 30, 30, 4) Dask graph 1088 chunks in 606 graph layers Data type float32 numpy.ndarray",26  5  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 484.52 MiB 548.44 kiB Shape (5, 26, 60, 4071, 4) (3, 13, 30, 30, 4) Dask graph 1088 chunks in 606 graph layers Data type float32 numpy.ndarray",26  5  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 484.52 MiB 548.44 kiB Shape (5, 26, 60, 4071, 4) (3, 13, 30, 30, 4) Dask graph 1088 chunks in 606 graph layers Data type float32 numpy.ndarray",26  5  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 484.52 MiB 548.44 kiB Shape (5, 26, 60, 4071, 4) (3, 13, 30, 30, 4) Dask graph 1088 chunks in 606 graph layers Data type float32 numpy.ndarray",26  5  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 484.52 MiB 548.44 kiB Shape (5, 26, 60, 4071, 4) (3, 13, 30, 30, 4) Dask graph 1088 chunks in 606 graph layers Data type float32 numpy.ndarray",26  5  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 484.52 MiB 548.44 kiB Shape (5, 26, 60, 4071, 4) (3, 13, 30, 30, 4) Dask graph 1088 chunks in 606 graph layers Data type float32 numpy.ndarray",26  5  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 484.52 MiB 548.44 kiB Shape (5, 26, 60, 4071, 4) (3, 13, 30, 30, 4) Dask graph 1088 chunks in 606 graph layers Data type float32 numpy.ndarray",26  5  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,484.52 MiB,548.44 kiB
Shape,"(5, 26, 60, 4071, 4)","(3, 13, 30, 30, 4)"
Dask graph,1088 chunks in 606 graph layers,1088 chunks in 606 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 96.90 MiB 731.25 kiB Shape (26, 60, 4071, 4) (26, 60, 30, 4) Dask graph 136 chunks in 524 graph layers Data type float32 numpy.ndarray",26  1  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 96.90 MiB 731.25 kiB Shape (26, 60, 4071, 4) (26, 60, 30, 4) Dask graph 136 chunks in 524 graph layers Data type float32 numpy.ndarray",26  1  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 96.90 MiB 731.25 kiB Shape (26, 60, 4071, 4) (26, 60, 30, 4) Dask graph 136 chunks in 524 graph layers Data type float32 numpy.ndarray",26  1  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 96.90 MiB 731.25 kiB Shape (26, 60, 4071, 4) (26, 60, 30, 4) Dask graph 136 chunks in 524 graph layers Data type float32 numpy.ndarray",26  1  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 96.90 MiB 731.25 kiB Shape (26, 60, 4071, 4) (26, 60, 30, 4) Dask graph 136 chunks in 524 graph layers Data type float32 numpy.ndarray",26  1  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 96.90 MiB 731.25 kiB Shape (26, 60, 4071, 4) (26, 60, 30, 4) Dask graph 136 chunks in 524 graph layers Data type float32 numpy.ndarray",26  1  4  4071  60,

Unnamed: 0,Array,Chunk
Bytes,96.90 MiB,731.25 kiB
Shape,"(26, 60, 4071, 4)","(26, 60, 30, 4)"
Dask graph,136 chunks in 524 graph layers,136 chunks in 524 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [17]:
ds.to_zarr(f"/glade/work/milesep/convective_outlook_ml/inputs_raw_{detail}_cds.zarr",
           mode="w", consolidated=True)

<xarray.backends.zarr.ZarrStore at 0x14fd3e5957c0>

In [16]:
def estimate_dataset_size_bytes(ds):
    total_bytes = 0
    for var in ds.data_vars.values():
        if var.chunks is not None:
            total_bytes += var.nbytes
        else:
            # Use 64-bit integers to avoid overflow
            n_elements = np.prod(var.shape, dtype=np.int64)
            dtype_size = np.dtype(var.dtype).itemsize
            total_bytes += int(n_elements * dtype_size)
    return total_bytes


size_bytes = estimate_dataset_size_bytes(ds)
print(f"Estimated uncompressed size: {size_bytes / 1e9:.2f} GB")

Estimated uncompressed size: 4.17 GB
