In [1]:
import xarray as xr
import zarr
from xmitgcm import open_mdsdataset
import lzma
from numcodecs import LZMA
from dask.diagnostics import ProgressBar

In [2]:
lzma_filters = [dict(id=lzma.FILTER_DELTA, dist=1),
                dict(id=lzma.FILTER_LZMA2, preset=1)]
compressor = LZMA(filters=lzma_filters, format=lzma.FORMAT_RAW)

In [14]:
def create_mask_ds(ds):
    mask_c = (ds.hFacC > 0).reset_coords(drop=True).rename('mask_c')
    mask_w = (ds.hFacW > 0).reset_coords(drop=True).rename('mask_w')
    mask_s = (ds.hFacS > 0).reset_coords(drop=True).rename('mask_s')

    ds_mask = xr.merge([mask_c, mask_w, mask_s])
    for c in ds_mask.coords:
        ds_mask[c] = ds_mask[c].astype('i2')
    ds_mask = ds_mask.chunk({'k': 1, 'face': -1})
    for v in ds_mask.data_vars:
        ds_mask[v].encoding.update({'compressor': compressor})
    return ds_mask

In [57]:
dsets = {#'4320': '/home6/dmenemen/llc_4320/grid',
         #'2160': '/home6/dmenemen/llc_2160/grid',
         '1080': '/home6/dmenemen/llc_1080/grid'
        }

In [63]:
outdir = '/nobackup/rpaberna/llc/masks/'
for name, grid_dir in dsets.items():
    print(name, grid_dir)
    ds = open_mdsdataset(grid_dir, iters=None, geometry='llc')
    grid_ds = create_mask_ds(ds)
    outname = f'{outdir}/llc_{name}_masks.zarr'
    with ProgressBar():
        grid_ds.to_zarr(outname, consolidated=True, mode='w')

1080 /home6/dmenemen/llc_1080/grid


  "in %s or %s. Using default version." % (data_dir, grid_dir))


[########################################] | 100% Completed | 14.4s


In [75]:
def check_mask_length(ds):
    for vname in ds.data_vars:
        with ProgressBar():
            nbytes = ds[vname].sum().values * 4
        print(vname, nbytes)
        
from llcreader import random_utils
def generate_indexes(ds):
    index = {}
    for point in 'c', 'w', 's':
        mask_var = 'mask_' + point
        mask_data = ds[mask_var].data
        with ProgressBar():
            index[point] = random_utils.face_mask_to_facet_index_list(mask_data)
    return index

In [101]:
all_indexes = {}
for name in ['4320']:
    fname = f'/nobackup/rpaberna/llc/masks/llc_{name}_masks.zarr/'
    ds_zarr = xr.open_zarr(fname, consolidated=True)
    check_mask_length(ds_zarr)
    all_indexes[int(name)] = generate_indexes(ds_zarr)

[########################################] | 100% Completed |  9.0s
mask_c 39893288036
[########################################] | 100% Completed |  8.0s
mask_s 39833916904
[########################################] | 100% Completed |  7.7s
mask_w 39836455336
[########################################] | 100% Completed |  7.8s
[########################################] | 100% Completed |  7.7s
[########################################] | 100% Completed |  7.8s


In [102]:
from pprint import pprint
with open('llcreader/shrunk_index.py', mode='w') as f:
    f.write('all_index_data = \\\n')
    pprint(all_indexes, stream=f, compact=True)