In [1]:
from hurricanes.models import rtofs
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
extent = [-71, -62, 15, 22]

In [3]:
with rtofs() as ds:
    # Save rtofs lon and lat as variables to speed up indexing calculation
    grid_lons = ds.lon.values[0,:]
    grid_lats = ds.lat.values[:,0]
    grid_x = ds.x.values
    grid_y = ds.y.values
    
    # Find x, y indexes of the area we want to subset
    lons_ind = np.interp(extent[:2], grid_lons, grid_x)
    lats_ind = np.interp(extent[2:], grid_lats, grid_y)

    # Use np.floor on the 1st index and np.ceil on the 2nd index of each slice 
    # in order to widen the area of the extent slightly.
    extent_ind = [
        np.floor(lons_ind[0]).astype(int),
        np.ceil(lons_ind[1]).astype(int),
        np.floor(lats_ind[0]).astype(int),
        np.ceil(lats_ind[1]).astype(int)
        ]

In [4]:
url = "https://tds.marine.rutgers.edu/thredds/dodsC/cool/rtofs/rtofs_us_east_scraped"
rds = xr.open_dataset(url, chunks={"x": 371, "y": 342, "time": 20}).drop_vars(['u', 'v'])
rds

Unnamed: 0,Array,Chunk
Bytes,34.96 kiB,160 B
Shape,"(4475,)","(20,)"
Count,225 Tasks,224 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 34.96 kiB 160 B Shape (4475,) (20,) Count 225 Tasks 224 Chunks Type float64 numpy.ndarray",4475  1,

Unnamed: 0,Array,Chunk
Bytes,34.96 kiB,160 B
Shape,"(4475,)","(20,)"
Count,225 Tasks,224 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.84 MiB,495.63 kiB
Shape,"(1710, 742)","(342, 371)"
Count,11 Tasks,10 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.84 MiB 495.63 kiB Shape (1710, 742) (342, 371) Count 11 Tasks 10 Chunks Type float32 numpy.ndarray",742  1710,

Unnamed: 0,Array,Chunk
Bytes,4.84 MiB,495.63 kiB
Shape,"(1710, 742)","(342, 371)"
Count,11 Tasks,10 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.84 MiB,495.63 kiB
Shape,"(1710, 742)","(342, 371)"
Count,11 Tasks,10 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.84 MiB 495.63 kiB Shape (1710, 742) (342, 371) Count 11 Tasks 10 Chunks Type float32 numpy.ndarray",742  1710,

Unnamed: 0,Array,Chunk
Bytes,4.84 MiB,495.63 kiB
Shape,"(1710, 742)","(342, 371)"
Count,11 Tasks,10 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,846.08 GiB,387.21 MiB
Shape,"(4475, 40, 1710, 742)","(20, 40, 342, 371)"
Count,2241 Tasks,2240 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 846.08 GiB 387.21 MiB Shape (4475, 40, 1710, 742) (20, 40, 342, 371) Count 2241 Tasks 2240 Chunks Type float32 numpy.ndarray",4475  1  742  1710  40,

Unnamed: 0,Array,Chunk
Bytes,846.08 GiB,387.21 MiB
Shape,"(4475, 40, 1710, 742)","(20, 40, 342, 371)"
Count,2241 Tasks,2240 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,846.08 GiB,387.21 MiB
Shape,"(4475, 40, 1710, 742)","(20, 40, 342, 371)"
Count,2241 Tasks,2240 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 846.08 GiB 387.21 MiB Shape (4475, 40, 1710, 742) (20, 40, 342, 371) Count 2241 Tasks 2240 Chunks Type float32 numpy.ndarray",4475  1  742  1710  40,

Unnamed: 0,Array,Chunk
Bytes,846.08 GiB,387.21 MiB
Shape,"(4475, 40, 1710, 742)","(20, 40, 342, 371)"
Count,2241 Tasks,2240 Chunks
Type,float32,numpy.ndarray


In [7]:
# Use .isel selector on x/y since we know indexes that we want to slice
ds_slice = rds.isel(x=slice(extent_ind[0], extent_ind[1]), y=slice(extent_ind[2], extent_ind[3]))
ds_slice

Unnamed: 0,Array,Chunk
Bytes,34.96 kiB,160 B
Shape,"(4475,)","(20,)"
Count,225 Tasks,224 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 34.96 kiB 160 B Shape (4475,) (20,) Count 225 Tasks 224 Chunks Type float64 numpy.ndarray",4475  1,

Unnamed: 0,Array,Chunk
Bytes,34.96 kiB,160 B
Shape,"(4475,)","(20,)"
Count,225 Tasks,224 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,38.55 kiB
Shape,"(94, 113)","(94, 105)"
Count,13 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 41.49 kiB 38.55 kiB Shape (94, 113) (94, 105) Count 13 Tasks 2 Chunks Type float32 numpy.ndarray",113  94,

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,38.55 kiB
Shape,"(94, 113)","(94, 105)"
Count,13 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,38.55 kiB
Shape,"(94, 113)","(94, 105)"
Count,13 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 41.49 kiB 38.55 kiB Shape (94, 113) (94, 105) Count 13 Tasks 2 Chunks Type float32 numpy.ndarray",113  94,

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,38.55 kiB
Shape,"(94, 113)","(94, 105)"
Count,13 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.08 GiB,30.12 MiB
Shape,"(4475, 40, 94, 113)","(20, 40, 94, 105)"
Count,2689 Tasks,448 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.08 GiB 30.12 MiB Shape (4475, 40, 94, 113) (20, 40, 94, 105) Count 2689 Tasks 448 Chunks Type float32 numpy.ndarray",4475  1  113  94  40,

Unnamed: 0,Array,Chunk
Bytes,7.08 GiB,30.12 MiB
Shape,"(4475, 40, 94, 113)","(20, 40, 94, 105)"
Count,2689 Tasks,448 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.08 GiB,30.12 MiB
Shape,"(4475, 40, 94, 113)","(20, 40, 94, 105)"
Count,2689 Tasks,448 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.08 GiB 30.12 MiB Shape (4475, 40, 94, 113) (20, 40, 94, 105) Count 2689 Tasks 448 Chunks Type float32 numpy.ndarray",4475  1  113  94  40,

Unnamed: 0,Array,Chunk
Bytes,7.08 GiB,30.12 MiB
Shape,"(4475, 40, 94, 113)","(20, 40, 94, 105)"
Count,2689 Tasks,448 Chunks
Type,float32,numpy.ndarray


In [8]:
trange = pd.date_range(pd.Timestamp(2019, 7, 17, 12, 0, 0), pd.Timestamp(2021, 3, 4, 12, 0, 0), freq='1D')
trange

DatetimeIndex(['2019-07-17 12:00:00', '2019-07-18 12:00:00',
               '2019-07-19 12:00:00', '2019-07-20 12:00:00',
               '2019-07-21 12:00:00', '2019-07-22 12:00:00',
               '2019-07-23 12:00:00', '2019-07-24 12:00:00',
               '2019-07-25 12:00:00', '2019-07-26 12:00:00',
               ...
               '2021-02-23 12:00:00', '2021-02-24 12:00:00',
               '2021-02-25 12:00:00', '2021-02-26 12:00:00',
               '2021-02-27 12:00:00', '2021-02-28 12:00:00',
               '2021-03-01 12:00:00', '2021-03-02 12:00:00',
               '2021-03-03 12:00:00', '2021-03-04 12:00:00'],
              dtype='datetime64[ns]', length=597, freq='D')

In [9]:
ds_slice_trange = ds_slice.sel(time=trange, method='nearest')
ds_slice_trange

Unnamed: 0,Array,Chunk
Bytes,4.66 kiB,64 B
Shape,"(597,)","(8,)"
Count,342 Tasks,117 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 4.66 kiB 64 B Shape (597,) (8,) Count 342 Tasks 117 Chunks Type float64 numpy.ndarray",597  1,

Unnamed: 0,Array,Chunk
Bytes,4.66 kiB,64 B
Shape,"(597,)","(8,)"
Count,342 Tasks,117 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,38.55 kiB
Shape,"(94, 113)","(94, 105)"
Count,13 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 41.49 kiB 38.55 kiB Shape (94, 113) (94, 105) Count 13 Tasks 2 Chunks Type float32 numpy.ndarray",113  94,

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,38.55 kiB
Shape,"(94, 113)","(94, 105)"
Count,13 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,38.55 kiB
Shape,"(94, 113)","(94, 105)"
Count,13 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 41.49 kiB 38.55 kiB Shape (94, 113) (94, 105) Count 13 Tasks 2 Chunks Type float32 numpy.ndarray",113  94,

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,38.55 kiB
Shape,"(94, 113)","(94, 105)"
Count,13 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 GiB,12.05 MiB
Shape,"(597, 40, 94, 113)","(8, 40, 94, 105)"
Count,2923 Tasks,234 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.94 GiB 12.05 MiB Shape (597, 40, 94, 113) (8, 40, 94, 105) Count 2923 Tasks 234 Chunks Type float32 numpy.ndarray",597  1  113  94  40,

Unnamed: 0,Array,Chunk
Bytes,0.94 GiB,12.05 MiB
Shape,"(597, 40, 94, 113)","(8, 40, 94, 105)"
Count,2923 Tasks,234 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 GiB,12.05 MiB
Shape,"(597, 40, 94, 113)","(8, 40, 94, 105)"
Count,2923 Tasks,234 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.94 GiB 12.05 MiB Shape (597, 40, 94, 113) (8, 40, 94, 105) Count 2923 Tasks 234 Chunks Type float32 numpy.ndarray",597  1  113  94  40,

Unnamed: 0,Array,Chunk
Bytes,0.94 GiB,12.05 MiB
Shape,"(597, 40, 94, 113)","(8, 40, 94, 105)"
Count,2923 Tasks,234 Chunks
Type,float32,numpy.ndarray


In [10]:
from dask.diagnostics import ProgressBar

# or distributed.progress when using the distributed scheduler
delayed_obj = ds_slice_trange.to_netcdf("rtofs_201907_20210304.nc", compute=False)

with ProgressBar():
    results = delayed_obj.compute()

[########################################] | 100% Completed |  1hr 27min  5.7s


In [11]:
rds2 = xr.open_dataset(url).drop_vars(['u', 'v'])
rds2

In [12]:
# Use .isel selector on x/y since we know indexes that we want to slice
ds2_slice = rds2.isel(x=slice(extent_ind[0], extent_ind[1]), y=slice(extent_ind[2], extent_ind[3]))
ds2_slice

In [13]:
ds2_slice_trange = ds2_slice.sel(time=trange, method='nearest')
ds2_slice_trange

In [15]:
ds2_slice_trange_chunked = ds2_slice_trange.chunk({'time': 100})
ds2_slice_trange_chunked

Unnamed: 0,Array,Chunk
Bytes,8.86 kiB,800 B
Shape,"(1134,)","(100,)"
Count,13 Tasks,12 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 8.86 kiB 800 B Shape (1134,) (100,) Count 13 Tasks 12 Chunks Type float64 numpy.ndarray",1134  1,

Unnamed: 0,Array,Chunk
Bytes,8.86 kiB,800 B
Shape,"(1134,)","(100,)"
Count,13 Tasks,12 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,41.49 kiB
Shape,"(94, 113)","(94, 113)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 41.49 kiB 41.49 kiB Shape (94, 113) (94, 113) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",113  94,

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,41.49 kiB
Shape,"(94, 113)","(94, 113)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,41.49 kiB
Shape,"(94, 113)","(94, 113)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 41.49 kiB 41.49 kiB Shape (94, 113) (94, 113) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",113  94,

Unnamed: 0,Array,Chunk
Bytes,41.49 kiB,41.49 kiB
Shape,"(94, 113)","(94, 113)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.79 GiB,162.08 MiB
Shape,"(1134, 40, 94, 113)","(100, 40, 94, 113)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.79 GiB 162.08 MiB Shape (1134, 40, 94, 113) (100, 40, 94, 113) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",1134  1  113  94  40,

Unnamed: 0,Array,Chunk
Bytes,1.79 GiB,162.08 MiB
Shape,"(1134, 40, 94, 113)","(100, 40, 94, 113)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.79 GiB,162.08 MiB
Shape,"(1134, 40, 94, 113)","(100, 40, 94, 113)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.79 GiB 162.08 MiB Shape (1134, 40, 94, 113) (100, 40, 94, 113) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",1134  1  113  94  40,

Unnamed: 0,Array,Chunk
Bytes,1.79 GiB,162.08 MiB
Shape,"(1134, 40, 94, 113)","(100, 40, 94, 113)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray


In [None]:
from dask.diagnostics import ProgressBar

# or distributed.progress when using the distributed scheduler
delayed_obj = ds_slice_trange.to_netcdf("rtofs_201907_20220823.nc", compute=False)

with ProgressBar():
    results = delayed_obj.compute()