# Simple pre-processing of HEALPix output for cyclone tracking with TempestExtremes

Modified from `TE_pre-process.ipynb` specifically for preparing files for NodeFileCompose.

In [1]:
import os, intake, datetime
import xarray as xr
import numpy as np
import easygems.healpix as egh
import healpix as hp
from tqdm import tqdm

  _set_context_ca_bundle_path(ca_bundle_path)


In [2]:
# Select simulation and make an output directory (this may need to be on a group workspace or on scratch, rather than ~/)
run = 'um_glm_n1280_GAL9'
#run="um_glm_n2560_RAL3p3"

type="2D"
#type="3D"

working_dir = '/work/scratch-nopw2/sbourdin/'
output_dir = os.path.join(working_dir,run,'data_pp/')
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

In [3]:
cat = intake.open_catalog('https://digital-earths-global-hackathon.github.io/catalog/catalog.yaml')['online']

In [4]:
# Select zoom level, variables and levels
if run == 'um_glm_n1280_GAL9':
    zoom=9
elif run == "um_glm_n2560_RAL3p3":
    zoom=10
else:
    print("Please define your run's output grid")
if type == "2D":
    variables_1h_2d = ['uas', 'vas', 'pr', 'psl']   # TempestExtremes tracking is psl-based, with surface wind maxima added to tracks
    ds_1h_2d = cat[run](zoom=zoom, time='PT1H').to_dask()
elif type == "3D":
    variables_3h_3d = ['zg']   # zg is used by TempestExtremes for warm-core detection and (later, optionally) computing cyclone phase-space parameters (see Stella Bourdin's code)
    plevc = np.array([925,500,250])
    ds_3h_3d = cat[run](zoom=zoom, time='PT3H').to_dask()

  'dims': dict(self._ds.dims),


In [5]:
# Set up grid
# Find the HEALPix pixels that are closest to, for example, the .2x.2 degree grid points.
# N.B. A 1x1 has lines at lon=90, 180, 270 (need to avoid these).

if run == 'um_glm_n1280_GAL9':
    lon = np.linspace(0, 360, 3600)
    lat = np.linspace(90, -90, 1800)
elif run == "um_glm_n2560_RAL3p3":
    lon = np.linspace(0, 360, 3600*2)
    lat = np.linspace(90, -90, 1800*2)
else:
    print("Please define your run's output grid")

pix = xr.DataArray(
    hp.ang2pix(ds_1h_2d.crs.healpix_nside, *np.meshgrid(lon, lat), nest=True, lonlat=True),
    coords=(("lat", lat), ("lon", lon)))

In [7]:
# Loop over dataset, saving a file for each timestep (these can be combined later using "cdo cat...", if preferred)
date_start = str(ds_1h_2d.time[0].to_numpy())[:10]
date_end = str(ds_1h_2d.time[-2].to_numpy())[:10]
print('date start: {}'.format(date_start))
print('date end: {}'.format(date_end))

if type == "2D":
    for var in variables_1h_2d:
        print(var)
        for t in tqdm(ds_1h_2d.time.values[::6]):   # select 6 hourly data
            output_fn = '_'.join([run,var,str(t)[:13]])+'.nc'
            output_ffp = os.path.join(output_dir,output_fn)
            if not os.path.exists(output_ffp):
                d = ds_1h_2d[var].sel(time=t).isel(cell=pix)
                d.expand_dims("time").to_netcdf(output_ffp)
elif type == "3D":      
    for var in variables_3h_3d:
        print(var)
        for t in ds_3h_3d.time.values[::2]:   # select 6 hourly data
            output_fn = '_'.join([run,var,str(t)[:13]])+'.nc'
            output_ffp = os.path.join(output_dir,output_fn)
            if not os.path.exists(output_ffp):
                d = ds_3h_3d[var].sel(time=t,pressure=plevc).isel(cell=pix)
                d.to_netcdf(output_ffp)

date start: 2020-01-20
date end: 2021-03-31
uas


100%|██████████| 1749/1749 [00:00<00:00, 3749.49it/s]


vas


100%|██████████| 1749/1749 [00:00<00:00, 4704.18it/s]


pr


100%|██████████| 1749/1749 [00:00<00:00, 4934.00it/s]


psl


100%|██████████| 1749/1749 [00:00<00:00, 4899.06it/s]
