In [1]:
# !pip install -e /home/jovyan/PROJECTS/scale-aware-air-sea

In [2]:
#TODO: I want attrs to propagate through...need to start in the first step though...

In [1]:
from dask_gateway import Gateway
gateway = Gateway()

# close existing clusters
open_clusters = gateway.list_clusters()
print(list(open_clusters))
if len(open_clusters)>0:
    for c in open_clusters:
        cluster = gateway.connect(c.name)
        cluster.shutdown()
print('setting up new cluster')

options = gateway.cluster_options()
# options.environment = dict(
#     DASK_DISTRIBUTED__SCHEDULER__WORKER_SATURATION="1.0",
# )
# options.worker_memory = 12
# options.worker_cores = 2
options.worker_memory = 23 
options.worker_cores = 6

# options

# Create a cluster with those options
cluster = gateway.new_cluster(options)
client = cluster.get_client()

# cluster.adapt(10, 200)
# cluster.scale(200)
cluster.scale(20)
client

[]
setting up new cluster


0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: /services/dask-gateway/clusters/prod.23be19df7de346c3a08030d721695e72/status,


In [2]:
import gcsfs
import xarray as xr
import numpy as np
from scale_aware_air_sea.utils import filter_inputs_dataset, to_zarr_split, open_zarr, weighted_coarsen
from scale_aware_air_sea.parameters import get_params

In [3]:
fs = gcsfs.GCSFileSystem()

# load global parameters
params = get_params('v0.7.0', test=False) 

mapper_filter_fluxes = fs.get_mapper(params['paths']['CM26']['filter_fluxes'])
mapper_coarse_fluxes = fs.get_mapper(params['paths']['CM26']['coarse_fluxes'])

mapper_filter_flux_decomposition_monthly = fs.get_mapper(params['paths']['CM26']['filter_decomposition_monthly'])
mapper_coarse_flux_decomposition_monthly = fs.get_mapper(params['paths']['CM26']['coarse_decomposition_monthly'])

mapper_filter_flux_decomposition_daily = fs.get_mapper(params['paths']['CM26']['filter_decomposition_daily'])
mapper_coarse_flux_decomposition_daily = fs.get_mapper(params['paths']['CM26']['coarse_decomposition_daily'])

mapper_filter_flux_decomposition_mean = fs.get_mapper(params['paths']['CM26']['filter_decomposition_mean'])
mapper_coarse_flux_decomposition_mean = fs.get_mapper(params['paths']['CM26']['coarse_decomposition_mean'])

save_vars = ['ql', 'qh']

In [4]:
# new scale separation
def decomposition_filter(ds, output_terms):
    """Decomposing filtered flux outputs into different terms to evaluate"""
    def filt(ds):
        return filter_inputs_dataset(ds, ['yt_ocean', 'xt_ocean'], params['filter_scale'])
    
    # all possible terms: ['Q_H', 'Q_H_bar', 'Q_L', 'Q_L_bar']
    
    # ['Q_star', `Q_star_star`, 'Q_star_star_ocean', 'Q_star_star_atmos']
    
    tdict = {}
    
    # Q_H (AB) - high resolution input
    tdict['Q_H'] = ds.sel(smoothing='smooth_none')
    tdict['Q_H_bar'] = filt(tdict['Q_H'])
    # Q_L low resolution input
    tdict['Q_L'] = ds.sel(smoothing='smooth_all')
    tdict['Q_L_bar'] = filt(tdict['Q_L'])
    tdict['Q_L_prime'] = tdict['Q_L'] - tdict['Q_L_bar'] # TODO: I could potentially compute this on the fly...
    
    # Inferred Small scale
    tdict['Q_star'] = tdict['Q_H_bar'] - tdict['Q_L']
    tdict['Q_star_star'] = tdict['Q_H_bar'] - tdict['Q_L_bar']
    
    # mixed low resolution input
    tdict['Q_L_ocean'] = ds.sel(smoothing='smooth_vel_tracer_ocean')
    tdict['Q_L_ocean_bar'] = filt(tdict['Q_L_ocean'])
    
    tdict['Q_L_atmos'] = ds.sel(smoothing='smooth_vel_tracer_atmos')
    tdict['Q_L_atmos_bar'] = filt(tdict['Q_L_atmos'])
    
    #tdict['Q_star_ocean'] = tdict['Q_H_bar'] - tdict['Q_L_ocean']
    #tdict['Q_star_ocean_bar'] = filt(tdict['Q_star_ocean'])
    tdict['Q_star_star_ocean'] = tdict['Q_H_bar'] - tdict['Q_L_ocean_bar']
    
    #tdict['Q_star_atmos'] = tdict['Q_H_bar'] - tdict['Q_L_atmos']
    #tdict['Q_star_atmos_bar'] = filt(tdict['Q_star_atmos'])
    tdict['Q_star_star_atmos'] = tdict['Q_H_bar'] - tdict['Q_L_atmos_bar']
    
    #tdict['Q_star_res_wrong'] = tdict['Q_star'] - tdict['Q_star_star_ocean'] - tdict['Q_star_star_atmos']
    #tdict['Q_star_res'] = tdict['Q_star'] - tdict['Q_star_ocean'] - tdict['Q_star_atmos']
    #tdict['Q_star_star_res'] = tdict['Q_star_star'] - tdict['Q_star_star_ocean'] - tdict['Q_star_star_atmos'] 
    
    # for testing
    #tdict['Q_H_bar_bar'] = filt(tdict['Q_H_bar'])
    #tdict['Q_star_star_star'] = tdict['Q_H_bar_bar'] - tdict['Q_L_bar']
    #tdict['Q_star_res'] = tdict['Q_star'] - tdict['Q_star_ocean'] - tdict['Q_star_atmos']
    
    # concat into a single dataset
    datasets = [tdict[t].assign_coords(term=t).drop([dvar for dvar in ['smoothing'] if dvar in tdict[t]]) for t in output_terms]
    ds_out = xr.concat(datasets, dim='term', combine_attrs="override")
    ds_out.attrs = ds.attrs
    return ds_out


def decomposition_coarse(ds_filter, ds_coarse):
    def smooth(ds):
        return weighted_coarsen(
            ds, 
            {'xt_ocean':params['n_coarsen'], 'yt_ocean':params['n_coarsen']}, 
            'area_t'
        )
    
    decomp = {}
    # Q_H (AB) - high resolution input
    Q_H = ds_filter.sel(smoothing='smooth_none') # dont include in dict, because it has different size
    decomp['Q_H_bar'] = smooth(Q_H)
    # Q_L low resolution input
    decomp['Q_L'] = ds_coarse.sel(smoothing='smooth_all')

    
    # Inferred Small scale
    decomp['Q_star'] = decomp['Q_H_bar'] - decomp['Q_L']
    
    # concat into a single dataset
    datasets = [ds.drop([dvar for dvar in ['smoothing'] if dvar in ds]).assign_coords(term=k) for k,ds in decomp.items()]
    ds_out = xr.concat(datasets, dim='term', combine_attrs="override")
    ds_out.attrs = ds_coarse.attrs
    return ds_out

In [6]:
ds_filter_fluxes = open_zarr(mapper_filter_fluxes)[save_vars]
# ds_filter_fluxes
ds_coarse_fluxes = open_zarr(mapper_coarse_fluxes, chunks={'time':3000})[save_vars]
# ds_coarse_fluxes

In [8]:
# long terms means of both
ds_coarse_fluxes_mean = ds_coarse_fluxes.mean('time')
ds_filter_fluxes_mean = ds_filter_fluxes.mean('time')

In [11]:
# persist the means to reduce computation later
ds_coarse_fluxes_mean = ds_coarse_fluxes_mean.persist()
ds_filter_fluxes_mean = ds_filter_fluxes_mean.persist()

In [9]:
ds_filter_fluxes_monthly = ds_filter_fluxes.resample(time='1MS').mean()
ds_coarse_fluxes_monthly = ds_coarse_fluxes.resample(time='1MS').mean()

In [19]:
filter_decomp_mean = decomposition_filter(
    ds_filter_fluxes_mean,
    ['Q_star', 'Q_star_star', 'Q_star_star_ocean', 'Q_star_star_atmos', 'Q_H_bar', 'Q_L_bar']
)

NameError: name 'ds_filter_fluxes_mean' is not defined

In [52]:
coarse_decomp_mean = decomposition_coarse(ds_filter_fluxes_mean, ds_coarse_fluxes_mean)
# Some checks in the weighted coarsen trigger computation for the first slice (which here is the full dataset). 
# I persisted both above to only do that once.
coarse_decomp_mean

[<xarray.Dataset>
Dimensions:   (algo: 5, yt_ocean: 54, xt_ocean: 72)
Coordinates:
  * algo      (algo) <U8 'ncar' 'ecmwf' 'coare3p0' 'coare3p6' 'andreas'
    area_t    (yt_ocean, xt_ocean) float64 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0
    dyt       (yt_ocean, xt_ocean) float64 4.697e+03 4.697e+03 ... 370.3
    geolat_t  (yt_ocean, xt_ocean) float32 0.0 0.0 0.0 0.0 ... 52.3 0.0 0.0 0.0
    geolon_t  (yt_ocean, xt_ocean) float32 0.0 0.0 0.0 0.0 ... 57.11 0.0 0.0 0.0
    ht        (yt_ocean, xt_ocean) float32 0.0 0.0 0.0 0.0 ... 14.79 0.0 0.0 0.0
    kmt       (yt_ocean, xt_ocean) float32 0.0 0.0 0.0 0.0 ... 1.462 0.0 0.0 0.0
    wet       (yt_ocean, xt_ocean) float64 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0
  * xt_ocean  (xt_ocean) float64 -277.5 -272.5 -267.5 -262.5 ... 67.5 72.5 77.5
  * yt_ocean  (yt_ocean) float64 -80.07 -77.96 -75.85 ... 84.72 86.83 88.94
    dxt       (yt_ocean, xt_ocean) float64 1.917e+03 1.917e+03 ... 4.525e+03
    term      <U7 'Q_H_bar'
Data variables:
    ql        (algo, y

Unnamed: 0,Array,Chunk
Bytes,455.62 kiB,30.38 kiB
Shape,"(3, 5, 54, 72)","(1, 1, 54, 72)"
Dask graph,15 chunks in 27 graph layers,15 chunks in 27 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 455.62 kiB 30.38 kiB Shape (3, 5, 54, 72) (1, 1, 54, 72) Dask graph 15 chunks in 27 graph layers Data type float64 numpy.ndarray",3  1  72  54  5,

Unnamed: 0,Array,Chunk
Bytes,455.62 kiB,30.38 kiB
Shape,"(3, 5, 54, 72)","(1, 1, 54, 72)"
Dask graph,15 chunks in 27 graph layers,15 chunks in 27 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,455.62 kiB,30.38 kiB
Shape,"(3, 5, 54, 72)","(1, 1, 54, 72)"
Dask graph,15 chunks in 27 graph layers,15 chunks in 27 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 455.62 kiB 30.38 kiB Shape (3, 5, 54, 72) (1, 1, 54, 72) Dask graph 15 chunks in 27 graph layers Data type float64 numpy.ndarray",3  1  72  54  5,

Unnamed: 0,Array,Chunk
Bytes,455.62 kiB,30.38 kiB
Shape,"(3, 5, 54, 72)","(1, 1, 54, 72)"
Dask graph,15 chunks in 27 graph layers,15 chunks in 27 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [10]:
filter_decomp_monthly = decomposition_filter(ds_filter_fluxes_monthly, ['Q_star_star','Q_H_bar', 'Q_L_bar'])
# I could only write out the _bar terms and recompute the star star term easily after...

In [10]:
coarse_decomp_monthly = decomposition_coarse(ds_filter_fluxes_monthly, ds_coarse_fluxes_monthly)

In [8]:
coarse_decomp_daily = decomposition_coarse(ds_filter_fluxes, ds_coarse_fluxes)
filter_decomp_daily = decomposition_filter(
    ds_filter_fluxes, 
    ['Q_star_star_ocean'] # for the case_study.ipynb
    # should add daily Q** for the histogram...
)

### Save out all decompositions

In [12]:
# fs.rm(mapper_filter_flux_decomposition_mean.root, recursive=True)
# fs.rm(mapper_coarse_flux_decomposition_mean.root, recursive=True)
fs.rm(mapper_filter_flux_decomposition_daily.root, recursive=True)
# fs.rm(mapper_coarse_flux_decomposition_daily.root, recursive=True)
# fs.rm(mapper_filter_flux_decomposition_monthly.root, recursive=True)
# fs.rm(mapper_coarse_flux_decomposition_monthly.root, recursive=True)

In [13]:
for ds, mapper, msg in [
    # (filter_decomp_mean, mapper_filter_flux_decomposition_mean, 'Long Term Mean Filter'),
    # (coarse_decomp_mean, mapper_coarse_flux_decomposition_mean, 'Long Term Mean Coarse'),
    # (coarse_decomp_daily, mapper_coarse_flux_decomposition_daily, 'Daily Coarse'),
    (filter_decomp_daily.isel(time=slice(0,10)), mapper_filter_flux_decomposition_daily, 'Daily Filter (Shortened)'),
    # (coarse_decomp_monthly, mapper_coarse_flux_decomposition_monthly, 'Montlhy Coarse'),
    # (filter_decomp_monthly, mapper_filter_flux_decomposition_monthly, 'Montlhy Filter'),

]:
    for va in ds.variables:
        if 'chunks' in ds[va].encoding.keys():
            # print(va)
            del ds[va].encoding['chunks']

    print(f"Saving {msg}: {ds.nbytes/1e9}GB")
    if 'time' in ds.dims:
        to_zarr_split(
            ds,
            mapper,
            # split_interval=200
            split_interval=60 # less memory pressure on the scheduler. Set back to 60 when using dask-gateway...
        )
    else:
        ds.to_zarr(mapper)

Saving Daily Filter (Shortened): 4.354610708GB
Writing to leap-persistent/jbusecke/scale-aware-air-sea/results/CM26_fluxes_filter_decomposed_daily_v0.7.0.zarr ...


0it [00:00, ?it/s]

In [13]:
# cluster.shutdown()