### Imports

In [1]:
import glob
import numpy as np
import xarray as xr
import pandas as pd
import metpy.calc

In [2]:
from dask_jobqueue import PBSCluster
from distributed import Client
from dask import delayed

### File paths

In [3]:
era5_path = '/glade/campaign/collections/rda/data/d633000/e5.oper.an.pl'
prof449_path = "/glade/campaign/collections/gdex/data/special_projects/pythia_2025/eol-cookbook/m2hats_iss2_data/prof449Mhz_30min_winds"
lustre_scratch  = "/lustre/desc1/scratch/myasears"

### Spin up a cluster

In [None]:
cluster = PBSCluster(
        job_name = 'dask-eol-25',
        cores = 1,
        memory = '4GiB',
        processes = 1,
        local_directory = lustre_scratch + '/dask/spill',
        log_directory = lustre_scratch + '/dask/logs/',
        resource_spec = 'select=1:ncpus=1:mem=4GB',
        queue = 'casper',
        walltime = '3:00:00',
        interface = 'ext')

In [5]:
client = Client(cluster)

In [6]:
# Scale the cluster and display cluster dashboard URL
n_workers = 5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/myasears/proxy/41819/status,Workers: 5
Total threads: 5,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.94:34311,Workers: 5
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/myasears/proxy/41819/status,Total threads: 5
Started: 1 minute ago,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.175:36751,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/myasears/proxy/39127/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.175:36399,
Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-0wj53hsp,Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-0wj53hsp
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.87 MiB,Spilled bytes: 0 B
Read bytes: 19.10 MiB,Write bytes: 12.83 MiB

0,1
Comm: tcp://128.117.208.175:36065,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/myasears/proxy/40965/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.175:42941,
Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-6xn1w6v2,Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-6xn1w6v2
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.94 MiB,Spilled bytes: 0 B
Read bytes: 14.01 MiB,Write bytes: 9.58 MiB

0,1
Comm: tcp://128.117.208.175:40815,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/myasears/proxy/43697/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.175:46575,
Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-etho2tjx,Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-etho2tjx
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.94 MiB,Spilled bytes: 0 B
Read bytes: 3.67 MiB,Write bytes: 2.34 MiB

0,1
Comm: tcp://128.117.208.173:34931,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/myasears/proxy/46115/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.173:34631,
Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-0y2urlim,Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-0y2urlim
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 77.98 MiB,Spilled bytes: 0 B
Read bytes: 165.79 MiB,Write bytes: 216.87 MiB

0,1
Comm: tcp://128.117.208.175:40603,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/myasears/proxy/34509/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.175:39939,
Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-6ty7449_,Local directory: /lustre/desc1/scratch/myasears/dask/spill/dask-scratch-space/worker-6ty7449_
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.97 MiB,Spilled bytes: 0 B
Read bytes: 12.21 MiB,Write bytes: 9.99 MiB


### Load ERA5 data

In [10]:
target_lat = 38.0
target_lon = 243.0

start_date = pd.Timestamp("2023-07-11T00:00:00")
end_date = pd.Timestamp("2023-09-27T23:59:59")
yyyymm = ["202307", "202308", "202309"]

var_map = {"Z": "e5.oper.an.pl.128_129_z",
           "U": "e5.oper.an.pl.128_131_u",
           "V": "e5.oper.an.pl.128_132_v",
           "W": "e5.oper.an.pl.128_135_w"
           }

In [11]:
def open_variable(varname, file_prefix, yyyymm):
    files = []
    for month in yyyymm:
        files.extend(sorted(glob.glob(f'{era5_path}/{month}/{file_prefix}*')))

    ds = xr.open_mfdataset(files, combine="by_coords", parallel=True)

    # Subset to single lat/lon point and campaign time slice
    ds_point = ds[varname].sel(latitude=target_lat, longitude=target_lon, time=slice(start_date, end_date))

    return ds_point

In [12]:
points = []
for varname, file_prefix in var_map.items():
    points.append(open_variable(varname, file_prefix, yyyymm))

# Merge them together
combined_era5 = xr.merge(points)
combined_era5

Unnamed: 0,Array,Chunk
Bytes,274.03 kiB,148 B
Shape,"(1896, 37)","(1, 37)"
Dask graph,1896 chunks in 186 graph layers,1896 chunks in 186 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 274.03 kiB 148 B Shape (1896, 37) (1, 37) Dask graph 1896 chunks in 186 graph layers Data type float32 numpy.ndarray",37  1896,

Unnamed: 0,Array,Chunk
Bytes,274.03 kiB,148 B
Shape,"(1896, 37)","(1, 37)"
Dask graph,1896 chunks in 186 graph layers,1896 chunks in 186 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,274.03 kiB,148 B
Shape,"(1896, 37)","(1, 37)"
Dask graph,1896 chunks in 186 graph layers,1896 chunks in 186 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 274.03 kiB 148 B Shape (1896, 37) (1, 37) Dask graph 1896 chunks in 186 graph layers Data type float32 numpy.ndarray",37  1896,

Unnamed: 0,Array,Chunk
Bytes,274.03 kiB,148 B
Shape,"(1896, 37)","(1, 37)"
Dask graph,1896 chunks in 186 graph layers,1896 chunks in 186 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,274.03 kiB,148 B
Shape,"(1896, 37)","(1, 37)"
Dask graph,1896 chunks in 186 graph layers,1896 chunks in 186 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 274.03 kiB 148 B Shape (1896, 37) (1, 37) Dask graph 1896 chunks in 186 graph layers Data type float32 numpy.ndarray",37  1896,

Unnamed: 0,Array,Chunk
Bytes,274.03 kiB,148 B
Shape,"(1896, 37)","(1, 37)"
Dask graph,1896 chunks in 186 graph layers,1896 chunks in 186 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,274.03 kiB,148 B
Shape,"(1896, 37)","(1, 37)"
Dask graph,1896 chunks in 186 graph layers,1896 chunks in 186 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 274.03 kiB 148 B Shape (1896, 37) (1, 37) Dask graph 1896 chunks in 186 graph layers Data type float32 numpy.ndarray",37  1896,

Unnamed: 0,Array,Chunk
Bytes,274.03 kiB,148 B
Shape,"(1896, 37)","(1, 37)"
Dask graph,1896 chunks in 186 graph layers,1896 chunks in 186 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


### Load 449 data

In [13]:
files = []
files.extend(sorted(glob.glob(f'{prof449_path}/*.nc')))

In [14]:
# Retrieve common height grid (with a step of 100m) using max and min values
step = 100
min_height = min(xr.open_dataset(f).height.min().item() for f in files)
max_height = max(xr.open_dataset(f).height.max().item() for f in files)

# Create common height grid
common_agl = np.arange(min_height, max_height + step, step)

# Retrieve altitude value from fifth file (after setup -- checked manually)
altitude = xr.open_dataset(files[5]).alt.values

# Use alt to create common MSL, geopotential grids
common_msl = common_agl + altitude
common_geopotential = (9.80665 * 6371008.7714 * common_msl) / (6371008.7714 + common_msl)

In [15]:
def open_and_regrid(f, common_agl, common_msl, common_geopotential):
    ds = xr.open_dataset(f, chunks={})
    
    msl_height = ds['height'].isel(time=0) + altitude
    geopotential = (9.80665 * 6371008.7714 * msl_height) / (6371008.7714 + msl_height)
    
    height_1d = ds['height'].isel(time=0).values
    ds = ds.assign_coords(height=("height", height_1d))
    ds = ds.reindex(height=common_agl)
    
    # Update coords to the reindexed grid
    ds = ds.assign_coords(
        height_agl=("height", common_agl),
        height_msl=("height", common_msl),
        geopotential=("height", common_geopotential)
    )
    
    # Metadata
    ds.height_agl.attrs.update({"long_name": "Height above ground level", "units": "meters"})
    ds.height_msl.attrs.update({"long_name": "Height above mean sea level", "units": "meters"})
    ds.geopotential.attrs.update({"long_name": "Geopotential", "units": "m**2 s**-2"})
    
    # Swap to make geopotential the vertical coordinate
    ds = ds.swap_dims({"height": "geopotential"}).drop_vars("height")

    return ds

In [16]:
datasets = [delayed(open_and_regrid)(f, common_agl, common_msl, common_geopotential) for f in files[2:]]
datasets = [d.compute() for d in datasets]
combined_profiler = xr.concat(datasets, dim="time")
combined_profiler

Unnamed: 0,Array,Chunk
Bytes,28.88 kiB,384 B
Shape,"(3696,)","(48,)"
Dask graph,77 chunks in 155 graph layers,77 chunks in 155 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 28.88 kiB 384 B Shape (3696,) (48,) Dask graph 77 chunks in 155 graph layers Data type datetime64[ns] numpy.ndarray",3696  1,

Unnamed: 0,Array,Chunk
Bytes,28.88 kiB,384 B
Shape,"(3696,)","(48,)"
Dask graph,77 chunks in 155 graph layers,77 chunks in 155 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 404 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 404 graph layers,385 chunks in 404 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 481 graph layers,385 chunks in 481 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 8.62 kiB Shape (3696, 97) (48, 46) Dask graph 385 chunks in 481 graph layers Data type float32 numpy.ndarray",97  3696,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,8.62 kiB
Shape,"(3696, 97)","(48, 46)"
Dask graph,385 chunks in 481 graph layers,385 chunks in 481 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


### Combine the datasets

ERA5 data measures on constant pressure; profiler data is measured on constant height. Will need to consider this factor when combining datasets, likely interpolating ERA5 to a common height and assigning this as a dependent coordinate before combination. Thinking of other combination methods in the meantime. 

https://unidata.github.io/MetPy/latest/api/generated/metpy.calc.geopotential_to_height.html