# Download variables for Snapshot retrieval with TempestExtremes
By Stella Bourdin

Corresponding script is `pre-processing.py`, makes it easier to run over several variables

In [1]:
import os, intake, datetime
import xarray as xr
import numpy as np
from tqdm import tqdm

In [2]:
# Script parameters: Select zoom level, variable
zoom = 9 # Select zoom level closest to the model's actual resolution and/or highest zoom available
var = "pr" 

In [3]:
# Load catalog
cat = intake.open_catalog('https://digital-earths-global-hackathon.github.io/catalog/catalog.yaml')['UK']
# List available simulations
list(cat.keys())

['CERES_EBAF',
 'ERA5',
 'IR_IMERG',
 'JRA3Q',
 'MERRA2',
 'arp-gem-1p3km',
 'arp-gem-2p6km',
 'casesm2_10km_nocumulus',
 'icon_d3hp003',
 'icon_d3hp003aug',
 'icon_d3hp003feb',
 'icon_ngc4008',
 'ifs_tco3999-ng5_deepoff',
 'ifs_tco3999-ng5_rcbmf',
 'ifs_tco3999-ng5_rcbmf_cf',
 'ifs_tco3999_rcbmf',
 'nicam_220m_test',
 'nicam_gl11',
 'scream-dkrz',
 'tracking-d3hp003',
 'um_Africa_km4p4_RAL3P3_n1280_GAL9_nest',
 'um_CTC_km4p4_RAL3P3_n1280_GAL9_nest',
 'um_SAmer_km4p4_RAL3P3_n1280_GAL9_nest',
 'um_SEA_km4p4_RAL3P3_n1280_GAL9_nest',
 'um_glm_n1280_CoMA9_TBv1p2',
 'um_glm_n1280_GAL9',
 'um_glm_n2560_RAL3p3']

In [4]:
# Define the run you want to track, run-specific info + working directories
run='um_glm_n1280_GAL9'
## run-specific info
time_name ='PT1H' # Select the time name for the dataset containing the variable you are looking for

scr_dir = '/work/scratch-nopw2/sbourdin/'
run_dir = os.path.join(scr_dir,run)
if not os.path.isdir(run_dir):
    os.makedirs(run_dir)
output_dir = os.path.join(run_dir,'data_healpix/')
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

In [5]:
# Load data
ds = cat[run](zoom=zoom, time=time_name).to_dask()[var]

  _set_context_ca_bundle_path(ca_bundle_path)
  'dims': dict(self._ds.dims),


In [6]:
# Select 6-hourly
ds = ds.isel(time=(ds.time.dt.hour % 6 == 0))

In [7]:
if "bounds" in list(ds.attrs.keys()):
    del ds.attrs["bounds"]
if "regional" in list(ds.attrs.keys()):
    del ds.attrs["regional"]

In [8]:
# Save one file per month
mth_list = np.unique(ds.time.astype(str).str.slice(0,7))
for mth in tqdm(mth_list):
    fname = output_dir+mth+"_"+var+"_zoom_"+str(zoom)+".nc"
    if not os.path.exists(fname):
        ds.sel(time = mth).to_netcdf(fname)
    else:
        print(mth, "File already exists")

100%|██████████| 16/16 [04:10<00:00, 15.64s/it]
