In [None]:
import intake
import easygems.healpix as egh
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib as mpl
import cartopy.crs as ccrs
from datetime import datetime
import glob
import logging
import os
import pathlib
import dask
dask.config.set(**{'array.slicing.split_large_chunks': True})
import global3d_track as g3d
src = g3d.scripts.src

savedir = pathlib.Path('../figures')
outdir = pathlib.Path(f'/work/bb1153/b382635/plots/tracked_results_2025/dataset_paper/results_data/acp_submission/')
os.makedirs(savedir, exist_ok=True)

In [None]:
# load icon data
logging.info(f"{datetime.now()} loading model data")
cat = intake.open_catalog("https://data.nextgems-h2020.eu/catalog.yaml")
globaldata = cat.ICON.ngc4008a(time="PT15M", zoom=9).to_dask().sel(time=slice('20210701','20210708'))

# subselect region
domain_extents = {"amazon": (277,317,-15,15),}
def cells_of_domain(ds, domain_name):
    lon_min, lon_max, lat_min, lat_max = domain_extents[domain_name]
    cells = ds.cell
    c1 = cells.where(ds.lon>lon_min).where(ds.lon<lon_max).where(ds.lat>lat_min).where(ds.lat<lat_max)
    return c1.dropna('cell')

c = cells_of_domain(globaldata.pipe(egh.attach_coords), domain_name='amazon')
data = globaldata.pipe(egh.attach_coords).sel(cell=c.astype(int))

In [None]:
# random sample
data_sample = data[['ta','zg']].isel(cell=np.random.randint(0,data.cell.size,100000), time=np.random.randint(0,data.time.size,100))
ta = data_sample.ta
z_prof = data_sample.zg.mean(('cell'))

In [None]:
# calculate and save
temperature_profile = xr.Dataset({'ta_mean':ta.mean(('time','cell')),
                                   'ta_min':ta.min(('time','cell')),
                                   'ta_max':ta.max(('time','cell')),
                                   'z_mean':z_prof / 1000}) # to km
temperature_profile.to_netcdf(outdir / "domain_mean_temperature_profile.nc")