From 0fb433d5066e638b1825575d829d0f19608f1c27 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 2 May 2022 15:16:39 -0400 Subject: [PATCH 01/19] added inline_array kwarg --- xarray/backends/api.py | 20 ++++++++++++++++++++ xarray/core/dataarray.py | 17 ++++++++++++++++- xarray/core/dataset.py | 9 ++++++++- xarray/core/variable.py | 17 +++++++++++++++-- 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 9967b0a08c0..f474d8bfb2b 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -274,6 +274,7 @@ def _chunk_ds( engine, chunks, overwrite_encoded_chunks, + inline_array, **extra_tokens, ): from dask.base import tokenize @@ -282,6 +283,8 @@ def _chunk_ds( token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens) name_prefix = f"open_dataset-{token}" + print(f"inline_array={inline_array}") + variables = {} for name, var in backend_ds.variables.items(): var_chunks = _get_chunk(var, chunks) @@ -292,6 +295,7 @@ def _chunk_ds( overwrite_encoded_chunks=overwrite_encoded_chunks, name_prefix=name_prefix, token=token, + inline_array=inline_array, ) return backend_ds._replace(variables) @@ -303,6 +307,7 @@ def _dataset_from_backend_dataset( chunks, cache, overwrite_encoded_chunks, + inline_array, **extra_tokens, ): if not isinstance(chunks, (int, dict)) and chunks not in {None, "auto"}: @@ -320,6 +325,7 @@ def _dataset_from_backend_dataset( engine, chunks, overwrite_encoded_chunks, + inline_array, **extra_tokens, ) @@ -444,6 +450,11 @@ def open_dataset( appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", "scipy", "pynio", "pseudonetcdf", "cfgrib". + - 'inline_array': How to include the array in the dask task graph. By + default(``inline_array=False``) the array is included in a task by + itself, and each chunk refers to that task by its key. With + ``inline_array=True``, Dask will instead inline the array directly + in the values of the task graph. See `dask.array.from_array()`. See engine open function for kwargs accepted by each specific engine. @@ -463,6 +474,8 @@ def open_dataset( -------- open_mfdataset """ + print("using altered version of xr.open_dataset") + if len(args) > 0: raise TypeError( "open_dataset() takes only 1 positional argument starting from version 0.18.0, " @@ -492,6 +505,7 @@ def open_dataset( ) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) + inline_array = kwargs.pop("inline_array", False) backend_ds = backend.open_dataset( filename_or_obj, drop_variables=drop_variables, @@ -505,6 +519,7 @@ def open_dataset( chunks, cache, overwrite_encoded_chunks, + inline_array, drop_variables=drop_variables, **decoders, **kwargs, @@ -627,6 +642,11 @@ def open_dataarray( appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", "scipy", "pynio", "pseudonetcdf", "cfgrib". + - 'inline_array': How to include the array in the dask task graph. By + default(``inline_array=False``) the array is included in a task by + itself, and each chunk refers to that task by its key. With + ``inline_array=True``, Dask will instead inline the array directly + in the values of the task graph. See `dask.array.from_array()`. See engine open function for kwargs accepted by each specific engine. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d15cbd00c0d..1bca3e6d87a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1113,6 +1113,7 @@ def chunk( name_prefix: str = "xarray-", token: str = None, lock: bool = False, + inline_array: bool = False, **chunks_kwargs: Any, ) -> DataArray: """Coerce this array's data into a dask arrays with the given chunks. @@ -1137,6 +1138,9 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + inline_array: optional + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. **chunks_kwargs : {dim: chunks, ...}, optional The keyword arguments form of ``chunks``. One of chunks or chunks_kwargs must be provided. @@ -1144,6 +1148,13 @@ def chunk( Returns ------- chunked : xarray.DataArray + + See Also + -------- + DataArray.chunks + DataArray.chunksizes + xarray.unify_chunks + dask.array.from_array """ if chunks is None: warnings.warn( @@ -1162,7 +1173,11 @@ def chunk( chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") ds = self._to_temp_dataset().chunk( - chunks, name_prefix=name_prefix, token=token, lock=lock + chunks, + name_prefix=name_prefix, + token=token, + lock=lock, + inline_array=inline_array, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 76776b4bc44..9ef1ae0ebf1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -239,6 +239,7 @@ def _maybe_chunk( lock=None, name_prefix="xarray-", overwrite_encoded_chunks=False, + inline_array=False, ): from dask.base import tokenize @@ -250,7 +251,8 @@ def _maybe_chunk( # subtle bugs result otherwise. see GH3350 token2 = tokenize(name, token if token else var._data, chunks) name2 = f"{name_prefix}{name}-{token2}" - var = var.chunk(chunks, name=name2, lock=lock) + print(type(var)) + var = var.chunk(chunks, name=name2, lock=lock, inline_array=inline_array) if overwrite_encoded_chunks and var.chunks is not None: var.encoding["chunks"] = tuple(x[0] for x in var.chunks) @@ -1994,6 +1996,7 @@ def chunk( name_prefix: str = "xarray-", token: str = None, lock: bool = False, + inline_array: bool = False, **chunks_kwargs: Any, ) -> Dataset: """Coerce all arrays in this dataset into dask arrays with the given @@ -2018,6 +2021,9 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + inline_array: optional + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. **chunks_kwargs : {dim: chunks, ...}, optional The keyword arguments form of ``chunks``. One of chunks or chunks_kwargs must be provided @@ -2031,6 +2037,7 @@ def chunk( Dataset.chunks Dataset.chunksizes xarray.unify_chunks + dask.array.from_array """ if chunks is None and chunks_kwargs is None: warnings.warn( diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 05c70390b46..2445921f4a0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1023,6 +1023,7 @@ def chunk( ) = {}, name: str = None, lock: bool = False, + inline_array: bool = False, **chunks_kwargs: Any, ) -> Variable: """Coerce this array's data into a dask array with the given chunks. @@ -1046,6 +1047,9 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + inline_array: optional + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. **chunks_kwargs : {dim: chunks, ...}, optional The keyword arguments form of ``chunks``. One of chunks or chunks_kwargs must be provided. @@ -1053,6 +1057,13 @@ def chunk( Returns ------- chunked : xarray.Variable + + See Also + -------- + Variable.chunks + Variable.chunksizes + xarray.unify_chunks + dask.array.from_array """ import dask.array as da @@ -1098,7 +1109,9 @@ def chunk( if utils.is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(self.shape)) - data = da.from_array(data, chunks, name=name, lock=lock, **kwargs) + data = da.from_array( + data, chunks, name=name, lock=lock, inline_array=inline_array, **kwargs + ) return self._replace(data=data) @@ -2710,7 +2723,7 @@ def values(self, values): f"Please use DataArray.assign_coords, Dataset.assign_coords or Dataset.assign as appropriate." ) - def chunk(self, chunks={}, name=None, lock=False): + def chunk(self, chunks={}, name=None, lock=False, inline_array=False): # Dummy - do not chunk. This method is invoked e.g. by Dataset.chunk() return self.copy(deep=False) From 8765acbc2f1a1630d408a799f02d532272267805 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 2 May 2022 15:20:09 -0400 Subject: [PATCH 02/19] remove cheeky print statements --- xarray/backends/api.py | 2 -- xarray/core/dataset.py | 1 - 2 files changed, 3 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f474d8bfb2b..0d94db1483d 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -283,8 +283,6 @@ def _chunk_ds( token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens) name_prefix = f"open_dataset-{token}" - print(f"inline_array={inline_array}") - variables = {} for name, var in backend_ds.variables.items(): var_chunks = _get_chunk(var, chunks) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9ef1ae0ebf1..987248bf7ff 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -251,7 +251,6 @@ def _maybe_chunk( # subtle bugs result otherwise. see GH3350 token2 = tokenize(name, token if token else var._data, chunks) name2 = f"{name_prefix}{name}-{token2}" - print(type(var)) var = var.chunk(chunks, name=name2, lock=lock, inline_array=inline_array) if overwrite_encoded_chunks and var.chunks is not None: From 480fd8c8eb68f75b33f623d1115ae672c1454111 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Tue, 3 May 2022 12:02:07 -0400 Subject: [PATCH 03/19] Remove another rogue print statement --- xarray/backends/api.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0d94db1483d..3fa2b5e3832 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -472,8 +472,6 @@ def open_dataset( -------- open_mfdataset """ - print("using altered version of xr.open_dataset") - if len(args) > 0: raise TypeError( "open_dataset() takes only 1 positional argument starting from version 0.18.0, " From b6afdd471be1fc551e7b1cbc84a2a6c053a27071 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 4 May 2022 11:03:08 -0400 Subject: [PATCH 04/19] bump dask dependency --- ci/requirements/min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 76e2b28093d..3fe65b00ad0 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -16,7 +16,7 @@ dependencies: - cfgrib=0.9 - cftime=1.2 - coveralls - - dask-core=2.30 + - dask-core=2021.01.0 - distributed=2.30 - h5netcdf=0.8 - h5py=2.10 From ecb5cc21fea8b619f4955ef9a9ef18404a0ec4db Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 4 May 2022 11:27:35 -0400 Subject: [PATCH 05/19] update multiple dependencies based on min-deps-check.py --- ci/requirements/min-all-deps.yml | 38 ++++++++++++------------ doc/getting-started-guide/installing.rst | 2 +- setup.cfg | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 3fe65b00ad0..06bd396a897 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -8,34 +8,34 @@ dependencies: # When upgrading python, numpy, or pandas, must also change # doc/installing.rst and setup.py. - python=3.8 - - boto3=1.13 + - boto3=1.17 - bottleneck=1.3 # cartopy 0.18 conflicts with pynio - - cartopy=0.17 + - cartopy=0.19 - cdms2=3.1 - cfgrib=0.9 - - cftime=1.2 + - cftime=1.4 - coveralls - - dask-core=2021.01.0 - - distributed=2.30 - - h5netcdf=0.8 - - h5py=2.10 + - dask-core=2021.4 + - distributed=2021.4 + - h5netcdf=0.11 + - h5py=3.2 # hdf5 1.12 conflicts with h5py=2.10 - - hdf5=1.10 + - hdf5=1.12 - hypothesis - - iris=2.4 + - iris=3.0 - lxml=4.6 # Optional dep of pydap - - matplotlib-base=3.3 + - matplotlib-base=3.4 - nc-time-axis=1.2 # netcdf follows a 1.major.minor[.patch] convention # (see https://github.com/Unidata/netcdf4-python/issues/1090) # bumping the netCDF4 version is currently blocked by #4491 - netcdf4=1.5.3 - - numba=0.51 - - numpy=1.18 - - packaging=20.0 - - pandas=1.1 - - pint=0.16 + - numba=0.53 + - numpy=1.19 + - packaging=20.9 + - pandas=1.2 + - pint=0.17 - pip - pseudonetcdf=3.1 - pydap=3.2 @@ -44,12 +44,12 @@ dependencies: - pytest-cov - pytest-env - pytest-xdist - - rasterio=1.1 - - scipy=1.5 + - rasterio=1.2 + - scipy=1.6 - seaborn=0.11 - - sparse=0.11 + - sparse=0.12 - toolz=0.11 - typing_extensions=3.7 - - zarr=2.5 + - zarr=2.8 - pip: - numbagg==0.1 diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index 0668853946f..bd9e1c8b53d 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -9,7 +9,7 @@ Required dependencies - Python (3.8 or later) - `numpy `__ (1.18 or later) - `packaging `__ (20.0 or later) -- `pandas `__ (1.1 or later) +- `pandas `__ (1.2 or later) .. _optional-dependencies: diff --git a/setup.cfg b/setup.cfg index 05b202810b4..b3290c4543c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -76,7 +76,7 @@ include_package_data = True python_requires = >=3.8 install_requires = numpy >= 1.18 - pandas >= 1.1 + pandas >= 1.2 packaging >= 20.0 [options.extras_require] From a2db21f687939396e8b175c24b7e5c4d1ff8939e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 4 May 2022 16:54:56 -0400 Subject: [PATCH 06/19] update environment to match #6559 --- ci/requirements/min-all-deps.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 06bd396a897..d8ee5c19757 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -8,7 +8,7 @@ dependencies: # When upgrading python, numpy, or pandas, must also change # doc/installing.rst and setup.py. - python=3.8 - - boto3=1.17 + - boto3=1.13 - bottleneck=1.3 # cartopy 0.18 conflicts with pynio - cartopy=0.19 @@ -16,14 +16,14 @@ dependencies: - cfgrib=0.9 - cftime=1.4 - coveralls - - dask-core=2021.4 - - distributed=2021.4 + - dask-core=2021.04 + - distributed=2021.04 - h5netcdf=0.11 - - h5py=3.2 + - h5py=3.1 # hdf5 1.12 conflicts with h5py=2.10 - - hdf5=1.12 + - hdf5=1.10 - hypothesis - - iris=3.0 + - iris=2.4 - lxml=4.6 # Optional dep of pydap - matplotlib-base=3.4 - nc-time-axis=1.2 @@ -33,7 +33,7 @@ dependencies: - netcdf4=1.5.3 - numba=0.53 - numpy=1.19 - - packaging=20.9 + - packaging=20.0 - pandas=1.2 - pint=0.17 - pip From 032d9f30c5139aef1638911fef2e81305d799d85 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 4 May 2022 17:08:04 -0400 Subject: [PATCH 07/19] Update h5py in ci/requirements/min-all-deps.yml --- ci/requirements/min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index d8ee5c19757..fe94a62acb7 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -19,7 +19,7 @@ dependencies: - dask-core=2021.04 - distributed=2021.04 - h5netcdf=0.11 - - h5py=3.1 + - h5py=2.10 # hdf5 1.12 conflicts with h5py=2.10 - hdf5=1.10 - hypothesis From cae84eacde63074009e94ca65a7b8e9846feb39e Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 4 May 2022 17:44:50 -0400 Subject: [PATCH 08/19] Update ci/requirements/min-all-deps.yml --- ci/requirements/min-all-deps.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index fe94a62acb7..d454ac01c0a 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -19,8 +19,8 @@ dependencies: - dask-core=2021.04 - distributed=2021.04 - h5netcdf=0.11 - - h5py=2.10 - # hdf5 1.12 conflicts with h5py=2.10 + - h5py=3.1 + # hdf5 1.12 conflicts with h5py=3.1 - hdf5=1.10 - hypothesis - iris=2.4 From 43995694eeef7f6b4395aecab12ee9f9c2f5e03a Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 4 May 2022 18:25:23 -0400 Subject: [PATCH 09/19] remove pynio from test env --- ci/requirements/min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index d454ac01c0a..fbe073dee69 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -39,7 +39,7 @@ dependencies: - pip - pseudonetcdf=3.1 - pydap=3.2 - - pynio=1.5 + # - pynio=1.5.5 - pytest - pytest-cov - pytest-env From d582576742c068f4d732348d2386f7a85f8483fb Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Fri, 6 May 2022 10:51:34 -0400 Subject: [PATCH 10/19] Update ci/requirements/min-all-deps.yml --- ci/requirements/min-all-deps.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index fbe073dee69..ecabde06622 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -10,7 +10,6 @@ dependencies: - python=3.8 - boto3=1.13 - bottleneck=1.3 - # cartopy 0.18 conflicts with pynio - cartopy=0.19 - cdms2=3.1 - cfgrib=0.9 From a2a2419e1fed4405204c6eb237f421e7e180c952 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 11 May 2022 12:43:00 -0400 Subject: [PATCH 11/19] promote inline_array kwarg to be top-level kwarg --- xarray/backends/api.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 3fa2b5e3832..b345ce10da0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -350,6 +350,7 @@ def open_dataset( concat_characters=None, decode_coords=None, drop_variables=None, + inline_array=False, backend_kwargs=None, **kwargs, ): @@ -434,6 +435,12 @@ def open_dataset( A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. + inline_array: bool, optional + How to include the array in the dask task graph. + By default(``inline_array=False``) the array is included in a task by + itself, and each chunk refers to that task by its key. With + ``inline_array=True``, Dask will instead inline the array directly + in the values of the task graph. See `dask.array.from_array()`. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. @@ -501,7 +508,6 @@ def open_dataset( ) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) - inline_array = kwargs.pop("inline_array", False) backend_ds = backend.open_dataset( filename_or_obj, drop_variables=drop_variables, @@ -537,6 +543,7 @@ def open_dataarray( concat_characters=None, decode_coords=None, drop_variables=None, + inline_array=False, backend_kwargs=None, **kwargs, ): @@ -624,6 +631,12 @@ def open_dataarray( A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. + inline_array: bool, optional + How to include the array in the dask task graph. + By default(``inline_array=False``) the array is included in a task by + itself, and each chunk refers to that task by its key. With + ``inline_array=True``, Dask will instead inline the array directly + in the values of the task graph. See `dask.array.from_array()`. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. @@ -676,6 +689,7 @@ def open_dataarray( chunks=chunks, cache=cache, drop_variables=drop_variables, + inline_array=inline_array, backend_kwargs=backend_kwargs, use_cftime=use_cftime, decode_timedelta=decode_timedelta, From 07e2c8dcc22fd52ee59ef42a1620a5e92bf6f906 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 11 May 2022 13:07:57 -0400 Subject: [PATCH 12/19] whatsnew --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4882402073c..005211c0530 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,9 @@ New Features - Allow passing chunks in ``**kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. +- Expose `inline_array` kwarg from `dask.array.from_array` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, + :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ From 070b45aae989f03a9fa64178eafe7ca6c4898b6b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 11 May 2022 13:24:28 -0400 Subject: [PATCH 13/19] add test --- xarray/tests/test_backends.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 81bfeb11a1e..fa7f4e33f5f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3825,6 +3825,23 @@ def test_load_dataarray(self): # load_dataarray ds.to_netcdf(tmp) + def test_inline_array(self): + with create_tmp_file() as tmp: + original = Dataset({"foo": ("x", np.random.randn(10))}) + original.to_netcdf(tmp) + chunks = {"time": 10} + + def num_graph_nodes(obj): + return len(obj.__dask_graph__()) + + not_inlined = open_dataset(tmp, inline_array=False, chunks=chunks) + inlined = open_dataset(tmp, inline_array=True, chunks=chunks) + assert num_graph_nodes(inlined) < num_graph_nodes(not_inlined) + + not_inlined = open_dataarray(tmp, inline_array=False, chunks=chunks) + inlined = open_dataarray(tmp, inline_array=True, chunks=chunks) + assert num_graph_nodes(inlined) < num_graph_nodes(not_inlined) + @requires_scipy_or_netCDF4 @requires_pydap From 91a955ff0deed1098f1c948f88857cdf33e3059c Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 11 May 2022 13:38:29 -0400 Subject: [PATCH 14/19] Remove repeated docstring entry Co-authored-by: Deepak Cherian --- xarray/backends/api.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index b345ce10da0..1308bd9540a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -455,11 +455,6 @@ def open_dataset( appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", "scipy", "pynio", "pseudonetcdf", "cfgrib". - - 'inline_array': How to include the array in the dask task graph. By - default(``inline_array=False``) the array is included in a task by - itself, and each chunk refers to that task by its key. With - ``inline_array=True``, Dask will instead inline the array directly - in the values of the task graph. See `dask.array.from_array()`. See engine open function for kwargs accepted by each specific engine. From 8bed2bb0aee4266fd43d525ecc4e88975d165fbd Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 11 May 2022 13:38:39 -0400 Subject: [PATCH 15/19] Remove repeated docstring entry Co-authored-by: Deepak Cherian --- xarray/backends/api.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 1308bd9540a..60db847c244 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -646,11 +646,6 @@ def open_dataarray( appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", "scipy", "pynio", "pseudonetcdf", "cfgrib". - - 'inline_array': How to include the array in the dask task graph. By - default(``inline_array=False``) the array is included in a task by - itself, and each chunk refers to that task by its key. With - ``inline_array=True``, Dask will instead inline the array directly - in the values of the task graph. See `dask.array.from_array()`. See engine open function for kwargs accepted by each specific engine. From cebd89aac3180f4eca70c132ec2d970b69e428c1 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 11 May 2022 13:42:59 -0400 Subject: [PATCH 16/19] hyperlink to dask functions --- xarray/backends/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 60db847c244..1672d4f7ad2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -440,7 +440,7 @@ def open_dataset( By default(``inline_array=False``) the array is included in a task by itself, and each chunk refers to that task by its key. With ``inline_array=True``, Dask will instead inline the array directly - in the values of the task graph. See `dask.array.from_array()`. + in the values of the task graph. See :py:func:`dask.array.from_array`. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. @@ -631,7 +631,7 @@ def open_dataarray( By default(``inline_array=False``) the array is included in a task by itself, and each chunk refers to that task by its key. With ``inline_array=True``, Dask will instead inline the array directly - in the values of the task graph. See `dask.array.from_array()`. + in the values of the task graph. See :py:func:`dask.array.from_array`. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. From 7dbe364d8830cd45c9896608acf7094cd109dc7b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 11 May 2022 14:37:44 -0400 Subject: [PATCH 17/19] skip test if on windows --- xarray/tests/test_backends.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fa7f4e33f5f..4b9aa3e4f79 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3825,6 +3825,10 @@ def test_load_dataarray(self): # load_dataarray ds.to_netcdf(tmp) + @pytest.skiif( + ON_WINDOWS, + reason="counting number of tasks in graph fails on windows for some reason", + ) def test_inline_array(self): with create_tmp_file() as tmp: original = Dataset({"foo": ("x", np.random.randn(10))}) From 7eb0569d11c82787087830528651bab7fd6de37a Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 11 May 2022 14:38:42 -0400 Subject: [PATCH 18/19] correct spelling --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4b9aa3e4f79..b1bf494f10a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3825,7 +3825,7 @@ def test_load_dataarray(self): # load_dataarray ds.to_netcdf(tmp) - @pytest.skiif( + @pytest.skipif( ON_WINDOWS, reason="counting number of tasks in graph fails on windows for some reason", ) From 058630f31a9b3a1ee817eb5c12b2c04b48610be8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 11 May 2022 14:38:42 -0400 Subject: [PATCH 19/19] correct spelling --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4b9aa3e4f79..e3ed220faaf 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3825,7 +3825,7 @@ def test_load_dataarray(self): # load_dataarray ds.to_netcdf(tmp) - @pytest.skiif( + @pytest.mark.skipif( ON_WINDOWS, reason="counting number of tasks in graph fails on windows for some reason", )