## Imports

In [1]:
import xarray as xr
import pathlib
import numpy as np
import pandas as pd
import matplotlib as mpl
import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import os
import xesmf as xe
import time
import src.utils
import copy

## specify filepath for data
DATA_FP = pathlib.Path(os.environ["DATA_FP"])

## set plotting specs
sns.set(rc={"axes.facecolor": "white", "axes.grid": False})

## bump up DPI for presentation
mpl.rcParams["figure.dpi"] = 100

## Shared functions

In [2]:
def trim(
    data, lon_range=[130, 290], lat_range=[-5, 5], lon_name="TLONG", lat_name="TLAT"
):
    """select part of data in given longitude/latitude range"""

    ## helper function to check if 'x' is in 'x_range'
    isin_range = lambda x, x_range: (x_range[0] <= x) & (x <= x_range[1])

    ## get mask for data in given lon/lat range
    in_lon_range = isin_range(data[lon_name], lon_range)
    in_lat_range = isin_range(data[lat_name], lat_range)
    in_lonlat_range = in_lon_range & in_lat_range

    ## load to memory
    in_lonlat_range.load()

    ## Retain all points with at least one valid grid cell
    x_idx = in_lonlat_range.any("nlat")
    y_idx = in_lonlat_range.any("nlon")

    ## select given points
    return data.isel(nlon=x_idx, nlat=y_idx)

## Subsurface ocean data

In [None]:
def get_ensemble_ids():
    """get files for given variable name"""

    ## path to cesm2 lens data
    cesm2_fp = pathlib.Path(
        "/glade/campaign/collections/rda/data/d651056/CESM2-LE/ocn/proc/tseries/month_1"
    )

    ## path to FSNS (arbitrary, just want the ids)
    data_fp = cesm2_fp / "WVEL"

    ## get list of ensemble ids
    ensemble_ids = []
    for f in data_fp.glob("*.nc"):
        ensemble_ids.append(str(f)[-53:-28])

    ## get unique values and sort
    ensemble_ids = sorted(list(set(ensemble_ids)))

    return ensemble_ids


def load_var(varname, ensemble_id):
    """Load variable for given ensemble ID"""

    ## get path to data
    cesm2_fp = pathlib.Path(
        "/glade/campaign/collections/rda/data/d651056/CESM2-LE/ocn/proc/tseries/month_1"
    )

    ## path to data
    data_fp = cesm2_fp / varname

    ## get z-coordinate name
    if varname == "TEMP":
        z_coord = "z_t"

    elif varname == "WVEL":
        z_coord = "z_w_top"

    else:
        print("Not a valid variable")
        return

    ## open data for ensemble member
    data = xr.open_mfdataset(
        data_fp.glob(f"*{ensemble_id}*.nc"),
        decode_timedelta=True,
        chunks={"time": 12, z_coord: 60, "nlat": 384, "nlon": 320},
        parallel=True,
    )[varname]

    ## trim to eq Pac
    data = trim(data, lat_range=[-1.5, 1.5], lon_range=[130, 290])

    ## subset longitude and get top 300 m
    data = data.isel({z_coord: slice(None, 27), "nlon": slice(None, None, 2)})

    ## average over latitudes
    data = data.assign_coords({"lon": data["TLONG"].mean("nlat")})
    data = data.mean("nlat")

    return data


def preprocess_ensemble(varname, temp_dir):
    """compute net heat flux for full ensemble. Save to temp directory"""

    ## get ensemble ids
    ensemble_ids = get_ensemble_ids()

    ## loop through members
    # idx 60 throwing "NetCDF: HDF error" for WVEL
    for i in tqdm.tqdm(ensemble_ids[61:]):
        # for i in tqdm.tqdm(ensemble_ids):

        ## save filepath
        save_fp = pathlib.Path(temp_dir, f"{varname}_{i}.nc")

        if save_fp.is_file():
            pass

        else:
            data = load_var(varname=varname, ensemble_id=i)
            data.to_netcdf(save_fp)

    return

#### Initialize cluster

In [None]:
from dask.distributed import LocalCluster, Client

cluster = LocalCluster(n_workers=16)
client = Client(cluster)
client

In [None]:
## compute wvel for each file
# preprocess_ensemble(varname="TEMP", temp_dir=pathlib.Path(DATA_FP, "cesm", "temp_temp_v2"))
preprocess_ensemble(
    varname="WVEL", temp_dir=pathlib.Path(DATA_FP, "cesm", "wvel_temp_v2")
)

### can't open one of the files; not sure why

In [None]:
# ids = get_ensemble_ids()
# temp_dir = "wvel_temp"
# save_fp = pathlib.Path(DATA_FP, "cesm", "wvel_temp", f"WVEL_{ids[60]}.nc")

# ## load data
# d = load_var("WVEL", ids[60])

## Surface data ($u$, $v$)

In [3]:
def get_ensemble_ids():
    """get files for given variable name"""

    ## path to cesm2 lens data
    cesm2_fp = pathlib.Path(
        "/glade/campaign/collections/rda/data/d651056/CESM2-LE/ocn/proc/tseries/month_1"
    )

    ## path to FSNS (arbitrary, just want the ids)
    data_fp = cesm2_fp / "UVEL"

    ## get list of ensemble ids
    ensemble_ids = []
    for f in data_fp.glob("*.nc"):
        ensemble_ids.append(str(f)[-53:-28])

    ## get unique values and sort
    ensemble_ids = sorted(list(set(ensemble_ids)))

    return ensemble_ids

def load_var(varname, ensemble_id):
    """Load variable for given ensemble ID"""

    ## get path to data
    cesm2_fp = pathlib.Path(
        "/glade/campaign/collections/rda/data/d651056/CESM2-LE/ocn/proc/tseries/month_1"
    )

    ## path to data
    data_fp = cesm2_fp / varname

    ## open data for ensemble member
    data = xr.open_mfdataset(
        data_fp.glob(f"*{ensemble_id}*.nc"),
        decode_timedelta=True,
        chunks={"time": 60, "nlat": 384, "nlon": 320},
        parallel=True,
    )[varname]

    ## get top layer
    data = data.isel(z_t=0)

    ## trim to eq Pac
    data = trim(
        data,
        lat_range=[-15, 15],
        lon_range=[120, 300],
        lon_name="ULONG",
        lat_name="ULAT",
    )

    return data

def preprocess_ensemble(varname, temp_dir):
    """compute net heat flux for full ensemble. Save to temp directory"""

    ## get ensemble ids
    ensemble_ids = get_ensemble_ids()

    ## loop through members
    for i in tqdm.tqdm(ensemble_ids):

        ## save filepath
        save_fp = pathlib.Path(temp_dir, f"{varname}_{i}.nc")

        if save_fp.is_file():
            pass

        else:
            data = load_var(varname=varname, ensemble_id=i)
            data.to_netcdf(save_fp)

    return

### Initialize cluster

In [4]:
from dask.distributed import LocalCluster, Client

cluster = LocalCluster(n_workers=16)
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/8787/status,Workers: 16
Total threads: 16,Total memory: 64.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:45427,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:34223,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/36153/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:44917,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-khog54zo,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-khog54zo

0,1
Comm: tcp://127.0.0.1:43169,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/34719/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:35691,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-2s_fzizp,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-2s_fzizp

0,1
Comm: tcp://127.0.0.1:41873,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/45019/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:35939,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-t7zfkzzs,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-t7zfkzzs

0,1
Comm: tcp://127.0.0.1:43769,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/40247/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:44823,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-fmq3ueod,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-fmq3ueod

0,1
Comm: tcp://127.0.0.1:44187,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/40447/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:44767,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-ntg99084,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-ntg99084

0,1
Comm: tcp://127.0.0.1:38027,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/43063/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:33651,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-9ub8k5lb,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-9ub8k5lb

0,1
Comm: tcp://127.0.0.1:41375,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/42867/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:32789,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-4gxe5tf2,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-4gxe5tf2

0,1
Comm: tcp://127.0.0.1:38575,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/42323/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:38535,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-c7ycercq,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-c7ycercq

0,1
Comm: tcp://127.0.0.1:37055,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/43247/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:36097,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-n5738qef,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-n5738qef

0,1
Comm: tcp://127.0.0.1:34181,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/40001/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:34395,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-htjup03c,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-htjup03c

0,1
Comm: tcp://127.0.0.1:46293,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/46491/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:40361,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-28uyyxe5,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-28uyyxe5

0,1
Comm: tcp://127.0.0.1:42053,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/41965/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:35737,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-ed9e1s_s,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-ed9e1s_s

0,1
Comm: tcp://127.0.0.1:45481,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/45643/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:40571,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-v222k6o3,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-v222k6o3

0,1
Comm: tcp://127.0.0.1:41411,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/40075/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:43109,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-1cxt7sx1,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-1cxt7sx1

0,1
Comm: tcp://127.0.0.1:32779,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/43801/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:33471,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-3ilox2rd,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-3ilox2rd

0,1
Comm: tcp://127.0.0.1:34407,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kcarr/preprocess/proxy/46355/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:38327,
Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-_fz9w7sg,Local directory: /glade/derecho/scratch/kcarr/tmp/dask-scratch-space/worker-_fz9w7sg


### Compute

In [None]:
# ## test it works
# ids = get_ensemble_ids()
# d = load_var(varname="UVEL", ensemble_id=ids[0])

preprocess_ensemble(
    varname="UVEL", temp_dir=pathlib.Path(DATA_FP, "cesm", "uvel_temp")
)

 24%|██▍       | 24/100 [25:14<1:19:05, 62.45s/it]