From 84fde3f07c331abf9cb0fcc6b2b5480c64d8a2ba Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Mon, 6 Nov 2023 21:35:38 -0500 Subject: [PATCH 01/18] ENH: query_max_dist option to speedup interpolate.NearestNDInterpolator This allows for far quicker queries for large datasets where prior information about when/where to interpolate is known. --- scipy/interpolate/_ndgriddata.py | 18 +++++++++++++++--- scipy/interpolate/tests/test_ndgriddata.py | 16 ++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index 5fe556e918ef..d2ff6b26ac21 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -44,6 +44,9 @@ class NearestNDInterpolator(NDInterpolatorBase): Options passed to the underlying ``cKDTree``. .. versionadded:: 0.17.0 + query_max_dist : float, optional + Option to truncate ``cKDTree`` nearest neighbor query to some + ``query_max_dist``. This is useful for larger interpolation problems. See Also -------- @@ -89,13 +92,14 @@ class NearestNDInterpolator(NDInterpolatorBase): """ - def __init__(self, x, y, rescale=False, tree_options=None): + def __init__(self, x, y, rescale=False, tree_options=None, query_max_dist=np.inf): NDInterpolatorBase.__init__(self, x, y, rescale=rescale, need_contiguous=False, need_values=False) if tree_options is None: tree_options = dict() self.tree = cKDTree(self.points, **tree_options) + self.query_max_dist = query_max_dist self.values = np.asarray(y) def __call__(self, *args): @@ -116,8 +120,16 @@ def __call__(self, *args): xi = _ndim_coords_from_arrays(args, ndim=self.points.shape[1]) xi = self._check_call_shape(xi) xi = self._scale_x(xi) - dist, i = self.tree.query(xi) - return self.values[i] + dist, i = self.tree.query(xi, distance_upper_bound=self.query_max_dist) + if np.isfinite(self.query_max_dist): + # dist will be infinite for points where NN returned no neighbors. set those to nan, and mask in arr. + valid_mask = np.isfinite(dist) + interp_values = np.zeros(i.shape) + interp_values[~valid_mask] = np.nan + interp_values[valid_mask] = self.values[i[valid_mask]] + return interp_values + else: + return self.values[i] #------------------------------------------------------------------------------ diff --git a/scipy/interpolate/tests/test_ndgriddata.py b/scipy/interpolate/tests/test_ndgriddata.py index 8f0a310e29d1..648d52449c1f 100644 --- a/scipy/interpolate/tests/test_ndgriddata.py +++ b/scipy/interpolate/tests/test_ndgriddata.py @@ -196,6 +196,22 @@ def test_nearest_list_argument(self): NI = NearestNDInterpolator((d[0], d[1]), list(d[2])) assert_array_equal(NI([0.1, 0.9], [0.1, 0.9]), [0, 2]) + def test_nearest_max_dist(self): + nd = np.array([[0, 1, 0, 1], + [0, 0, 1, 1], + [0, 1, 1, 2]]) + delta = 0.1 + query_points = [0 + delta, 1 + delta], [0 + delta, 1 + delta] + + # case 1 - query max_dist is smaller than the query points' nearest distance to nd. + NI = NearestNDInterpolator((nd[0], nd[1]), nd[2], query_max_dist=np.sqrt(delta ** 2 + delta ** 2) - 1e-7) + assert_array_equal(NI(query_points), [np.nan, np.nan]) + + # case 2 - query max_dist is larger, so should return non np.nan + NI = NearestNDInterpolator((nd[0], nd[1]), nd[2], query_max_dist=np.sqrt(delta ** 2 + delta ** 2) + 1e-7) + assert_array_equal(NI(query_points), [0, 2]) + + class TestNDInterpolators: @parametrize_interpolators From ef4e6c55b7aa15f9ac8e08cfc37e253dbb2b350c Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Mon, 6 Nov 2023 21:39:12 -0500 Subject: [PATCH 02/18] ENH: query_max_dist option to speedup interpolate.NearestNDInterpolator This allows for far quicker queries for large datasets where prior information about when/where to interpolate is known. --- scipy/interpolate/_ndgriddata.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index d2ff6b26ac21..7e5fb7811744 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -48,6 +48,8 @@ class NearestNDInterpolator(NDInterpolatorBase): Option to truncate ``cKDTree`` nearest neighbor query to some ``query_max_dist``. This is useful for larger interpolation problems. + .. versionadded:: 1.11.4 + See Also -------- griddata : From cf2e6b7959fa9828b43d719550f265554b844b8f Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Tue, 7 Nov 2023 20:01:34 -0500 Subject: [PATCH 03/18] MAINT: apply suggestions from code review Co-authored-by: Jake Bowhay <60778417+j-bowhay@users.noreply.github.com> --- scipy/interpolate/_ndgriddata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index 7e5fb7811744..1ab0bd0b0b21 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -94,7 +94,7 @@ class NearestNDInterpolator(NDInterpolatorBase): """ - def __init__(self, x, y, rescale=False, tree_options=None, query_max_dist=np.inf): + def __init__(self, x, y, rescale=False, tree_options=None, *, query_max_dist=np.inf): NDInterpolatorBase.__init__(self, x, y, rescale=rescale, need_contiguous=False, need_values=False) From 2fa01c9110a7c4db730625ac2efb57e94b8d7f3d Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Tue, 7 Nov 2023 20:06:52 -0500 Subject: [PATCH 04/18] ENH: distance_upper_bound & workers for NearestNDInterpolator This allows for far quicker queries for large datasets where prior information about when/where to interpolate is known. --- scipy/interpolate/_ndgriddata.py | 25 +++++++++++++++------- scipy/interpolate/tests/test_ndgriddata.py | 6 ++++-- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index 1ab0bd0b0b21..a20a4e58c557 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -44,11 +44,16 @@ class NearestNDInterpolator(NDInterpolatorBase): Options passed to the underlying ``cKDTree``. .. versionadded:: 0.17.0 - query_max_dist : float, optional + distance_upper_bound : float, optional Option to truncate ``cKDTree`` nearest neighbor query to some - ``query_max_dist``. This is useful for larger interpolation problems. + ``distance_upper_bound``. This is useful for larger interpolation problems. - .. versionadded:: 1.11.4 + .. versionadded:: 1.12.0 + workers : int, optional + Number of workers to use for parallel processing during nearest neighbor searches. + If -1 is given all CPU threads are used. Default: 1. + + .. versionadded:: 1.12.0 See Also -------- @@ -94,14 +99,15 @@ class NearestNDInterpolator(NDInterpolatorBase): """ - def __init__(self, x, y, rescale=False, tree_options=None, *, query_max_dist=np.inf): + def __init__(self, x, y, rescale=False, tree_options=None, *, distance_upper_bound=np.inf, workers=1): NDInterpolatorBase.__init__(self, x, y, rescale=rescale, need_contiguous=False, need_values=False) if tree_options is None: tree_options = dict() self.tree = cKDTree(self.points, **tree_options) - self.query_max_dist = query_max_dist + self.distance_upper_bound = distance_upper_bound + self.workers = workers self.values = np.asarray(y) def __call__(self, *args): @@ -122,9 +128,12 @@ def __call__(self, *args): xi = _ndim_coords_from_arrays(args, ndim=self.points.shape[1]) xi = self._check_call_shape(xi) xi = self._scale_x(xi) - dist, i = self.tree.query(xi, distance_upper_bound=self.query_max_dist) - if np.isfinite(self.query_max_dist): - # dist will be infinite for points where NN returned no neighbors. set those to nan, and mask in arr. + dist, i = self.tree.query(xi, distance_upper_bound=self.distance_upper_bound, workers=workers) + if np.isfinite(self.distance_upper_bound): + # if distance_upper_bound is set to not be infinite, then we need to consider the case where cKDtree + # does not find any points within distance_upper_bound to return. It marks those points as having infinte + # distance, which is what will be used below to mask the array and return only the points that were deemed + # to have a close enough neighbor to return something useful. valid_mask = np.isfinite(dist) interp_values = np.zeros(i.shape) interp_values[~valid_mask] = np.nan diff --git a/scipy/interpolate/tests/test_ndgriddata.py b/scipy/interpolate/tests/test_ndgriddata.py index 648d52449c1f..f74c51157402 100644 --- a/scipy/interpolate/tests/test_ndgriddata.py +++ b/scipy/interpolate/tests/test_ndgriddata.py @@ -204,11 +204,13 @@ def test_nearest_max_dist(self): query_points = [0 + delta, 1 + delta], [0 + delta, 1 + delta] # case 1 - query max_dist is smaller than the query points' nearest distance to nd. - NI = NearestNDInterpolator((nd[0], nd[1]), nd[2], query_max_dist=np.sqrt(delta ** 2 + delta ** 2) - 1e-7) + NI = NearestNDInterpolator((nd[0], nd[1]), nd[2], + distance_upper_bound=np.sqrt(delta ** 2 + delta ** 2) - 1e-7) assert_array_equal(NI(query_points), [np.nan, np.nan]) # case 2 - query max_dist is larger, so should return non np.nan - NI = NearestNDInterpolator((nd[0], nd[1]), nd[2], query_max_dist=np.sqrt(delta ** 2 + delta ** 2) + 1e-7) + NI = NearestNDInterpolator((nd[0], nd[1]), nd[2], + distance_upper_bound=np.sqrt(delta ** 2 + delta ** 2) + 1e-7) assert_array_equal(NI(query_points), [0, 2]) From b625a2892f688899e1cb481bca36ad68ad477ba7 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Tue, 7 Nov 2023 20:52:11 -0500 Subject: [PATCH 05/18] BUG: add missing self for workers --- scipy/interpolate/_ndgriddata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index a20a4e58c557..57d34811f5c5 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -128,7 +128,7 @@ def __call__(self, *args): xi = _ndim_coords_from_arrays(args, ndim=self.points.shape[1]) xi = self._check_call_shape(xi) xi = self._scale_x(xi) - dist, i = self.tree.query(xi, distance_upper_bound=self.distance_upper_bound, workers=workers) + dist, i = self.tree.query(xi, distance_upper_bound=self.distance_upper_bound, workers=self.workers) if np.isfinite(self.distance_upper_bound): # if distance_upper_bound is set to not be infinite, then we need to consider the case where cKDtree # does not find any points within distance_upper_bound to return. It marks those points as having infinte From ff1d52ffe1a827bf4941ccbc213dd5c21939f767 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Fri, 10 Nov 2023 21:07:06 -0500 Subject: [PATCH 06/18] ENH: supply query_options to NearestNDInterpolator --- scipy/interpolate/_ndgriddata.py | 38 +++++++++++++--------- scipy/interpolate/tests/test_ndgriddata.py | 34 +++++++++++++------ 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index 57d34811f5c5..a51376cfc581 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -44,16 +44,6 @@ class NearestNDInterpolator(NDInterpolatorBase): Options passed to the underlying ``cKDTree``. .. versionadded:: 0.17.0 - distance_upper_bound : float, optional - Option to truncate ``cKDTree`` nearest neighbor query to some - ``distance_upper_bound``. This is useful for larger interpolation problems. - - .. versionadded:: 1.12.0 - workers : int, optional - Number of workers to use for parallel processing during nearest neighbor searches. - If -1 is given all CPU threads are used. Default: 1. - - .. versionadded:: 1.12.0 See Also -------- @@ -99,37 +89,53 @@ class NearestNDInterpolator(NDInterpolatorBase): """ - def __init__(self, x, y, rescale=False, tree_options=None, *, distance_upper_bound=np.inf, workers=1): + def __init__(self, x, y, rescale=False, tree_options=None): NDInterpolatorBase.__init__(self, x, y, rescale=rescale, need_contiguous=False, need_values=False) if tree_options is None: tree_options = dict() self.tree = cKDTree(self.points, **tree_options) - self.distance_upper_bound = distance_upper_bound - self.workers = workers self.values = np.asarray(y) - def __call__(self, *args): + def __call__(self, *args, query_options=None): """ Evaluate interpolator at given points. Parameters ---------- + query_options : dict, optional + This allows `eps`, `p`, `distance_upper_bound`, and `workers` being passed to the cKDTree's query function + to be explicitly set. See the `scipy.spatial.cKDTree.query` for an overview of the different options. Note + that k is restricted to 1 since NearestNDInterpolator has to have k=1 by definition. + + ..versionadded:: 1.12.0 + x1, x2, ... xn : array-like of float Points where to interpolate data at. x1, x2, ... xn can be array-like of float with broadcastable shape. or x1 can be array-like of float with shape ``(..., ndim)`` """ + if query_options is not None and not isinstance(query_options, dict): + raise TypeError("query_options must be a dictionary") + + # gather the query options to pass to cKDTree.query + query_options = query_options or {} + eps = query_options.get('eps', 0) + p = query_options.get('p', 2) + distance_upper_bound = query_options.get('distance_upper_bound', np.inf) + workers = query_options.get('workers', 1) + # For the sake of enabling subclassing, NDInterpolatorBase._set_xi performs some operations # which are not required by NearestNDInterpolator.__call__, hence here we operate # on xi directly, without calling a parent class function. xi = _ndim_coords_from_arrays(args, ndim=self.points.shape[1]) xi = self._check_call_shape(xi) xi = self._scale_x(xi) - dist, i = self.tree.query(xi, distance_upper_bound=self.distance_upper_bound, workers=self.workers) - if np.isfinite(self.distance_upper_bound): + dist, i = self.tree.query(xi, eps=eps, p=p, distance_upper_bound=distance_upper_bound, workers=workers) + + if np.isfinite(distance_upper_bound): # if distance_upper_bound is set to not be infinite, then we need to consider the case where cKDtree # does not find any points within distance_upper_bound to return. It marks those points as having infinte # distance, which is what will be used below to mask the array and return only the points that were deemed diff --git a/scipy/interpolate/tests/test_ndgriddata.py b/scipy/interpolate/tests/test_ndgriddata.py index f74c51157402..f7aed5cf9000 100644 --- a/scipy/interpolate/tests/test_ndgriddata.py +++ b/scipy/interpolate/tests/test_ndgriddata.py @@ -196,7 +196,7 @@ def test_nearest_list_argument(self): NI = NearestNDInterpolator((d[0], d[1]), list(d[2])) assert_array_equal(NI([0.1, 0.9], [0.1, 0.9]), [0, 2]) - def test_nearest_max_dist(self): + def test_nearest_query_options(self): nd = np.array([[0, 1, 0, 1], [0, 0, 1, 1], [0, 1, 1, 2]]) @@ -204,15 +204,29 @@ def test_nearest_max_dist(self): query_points = [0 + delta, 1 + delta], [0 + delta, 1 + delta] # case 1 - query max_dist is smaller than the query points' nearest distance to nd. - NI = NearestNDInterpolator((nd[0], nd[1]), nd[2], - distance_upper_bound=np.sqrt(delta ** 2 + delta ** 2) - 1e-7) - assert_array_equal(NI(query_points), [np.nan, np.nan]) - - # case 2 - query max_dist is larger, so should return non np.nan - NI = NearestNDInterpolator((nd[0], nd[1]), nd[2], - distance_upper_bound=np.sqrt(delta ** 2 + delta ** 2) + 1e-7) - assert_array_equal(NI(query_points), [0, 2]) - + NI = NearestNDInterpolator((nd[0], nd[1]), nd[2]) + distance_upper_bound = np.sqrt(delta ** 2 + delta ** 2) - 1e-7 + assert_array_equal(NI(query_points, query_options={"distance_upper_bound": distance_upper_bound}), + [np.nan, np.nan]) + + # case 2 - query p is inf, will return [0, 2] + distance_upper_bound = np.sqrt(delta ** 2 + delta ** 2) - 1e-7 + p = np.inf + assert_array_equal(NI(query_points, query_options={"distance_upper_bound": distance_upper_bound, "p": p}), + [0, 2]) + + # case 3 - query max_dist is larger, so should return non np.nan + distance_upper_bound = np.sqrt(delta ** 2 + delta ** 2) + 1e-7 + assert_array_equal(NI(query_points, query_options={"distance_upper_bound": distance_upper_bound}), + [0, 2]) + + def test_nearest_query_valid_inputs(self): + nd = np.array([[0, 1, 0, 1], + [0, 0, 1, 1], + [0, 1, 1, 2]]) + NI = NearestNDInterpolator((nd[0], nd[1]), nd[2]) + with self.assertRaises(TypeError): + NI([0.5, 0.5], query_options="not a dictionary") class TestNDInterpolators: From b8671121dccd2736ccbe41a81a80120b6271d695 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Tue, 14 Nov 2023 21:20:43 -0500 Subject: [PATCH 07/18] MAINT: apply suggestions from code review Co-authored-by: Jake Bowhay <60778417+j-bowhay@users.noreply.github.com> --- scipy/interpolate/_ndgriddata.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index a51376cfc581..ad297e9e0acd 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -98,7 +98,7 @@ def __init__(self, x, y, rescale=False, tree_options=None): self.tree = cKDTree(self.points, **tree_options) self.values = np.asarray(y) - def __call__(self, *args, query_options=None): + def __call__(self, *args, **query_options): """ Evaluate interpolator at given points. @@ -117,23 +117,13 @@ def __call__(self, *args, query_options=None): or x1 can be array-like of float with shape ``(..., ndim)`` """ - if query_options is not None and not isinstance(query_options, dict): - raise TypeError("query_options must be a dictionary") - - # gather the query options to pass to cKDTree.query - query_options = query_options or {} - eps = query_options.get('eps', 0) - p = query_options.get('p', 2) - distance_upper_bound = query_options.get('distance_upper_bound', np.inf) - workers = query_options.get('workers', 1) - # For the sake of enabling subclassing, NDInterpolatorBase._set_xi performs some operations # which are not required by NearestNDInterpolator.__call__, hence here we operate # on xi directly, without calling a parent class function. xi = _ndim_coords_from_arrays(args, ndim=self.points.shape[1]) xi = self._check_call_shape(xi) xi = self._scale_x(xi) - dist, i = self.tree.query(xi, eps=eps, p=p, distance_upper_bound=distance_upper_bound, workers=workers) + dist, i = self.tree.query(xi, **query_options) if np.isfinite(distance_upper_bound): # if distance_upper_bound is set to not be infinite, then we need to consider the case where cKDtree From 0041cac9ad9ed2b6e7b9b5abbbda1f62edf4a198 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Tue, 14 Nov 2023 21:28:42 -0500 Subject: [PATCH 08/18] MAINT: update tests and doc with new API --- scipy/interpolate/_ndgriddata.py | 12 +++++------- scipy/interpolate/tests/test_ndgriddata.py | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index ad297e9e0acd..29c61ce2bdb6 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -104,17 +104,15 @@ def __call__(self, *args, **query_options): Parameters ---------- - query_options : dict, optional - This allows `eps`, `p`, `distance_upper_bound`, and `workers` being passed to the cKDTree's query function - to be explicitly set. See the `scipy.spatial.cKDTree.query` for an overview of the different options. Note - that k is restricted to 1 since NearestNDInterpolator has to have k=1 by definition. - - ..versionadded:: 1.12.0 - x1, x2, ... xn : array-like of float Points where to interpolate data at. x1, x2, ... xn can be array-like of float with broadcastable shape. or x1 can be array-like of float with shape ``(..., ndim)`` + query_options : dict, optional + This allows `eps`, `p`, `distance_upper_bound`, and `workers` being passed to the cKDTree's query function + to be explicitly set. See the `scipy.spatial.cKDTree.query` for an overview of the different options. + + ..versionadded:: 1.12.0 """ # For the sake of enabling subclassing, NDInterpolatorBase._set_xi performs some operations diff --git a/scipy/interpolate/tests/test_ndgriddata.py b/scipy/interpolate/tests/test_ndgriddata.py index f7aed5cf9000..8cebc177583b 100644 --- a/scipy/interpolate/tests/test_ndgriddata.py +++ b/scipy/interpolate/tests/test_ndgriddata.py @@ -206,18 +206,18 @@ def test_nearest_query_options(self): # case 1 - query max_dist is smaller than the query points' nearest distance to nd. NI = NearestNDInterpolator((nd[0], nd[1]), nd[2]) distance_upper_bound = np.sqrt(delta ** 2 + delta ** 2) - 1e-7 - assert_array_equal(NI(query_points, query_options={"distance_upper_bound": distance_upper_bound}), + assert_array_equal(NI(query_points, distance_upper_bound=distance_upper_bound), [np.nan, np.nan]) # case 2 - query p is inf, will return [0, 2] distance_upper_bound = np.sqrt(delta ** 2 + delta ** 2) - 1e-7 p = np.inf - assert_array_equal(NI(query_points, query_options={"distance_upper_bound": distance_upper_bound, "p": p}), + assert_array_equal(NI(query_points, distance_upper_bound=distance_upper_bound, p=p), [0, 2]) # case 3 - query max_dist is larger, so should return non np.nan distance_upper_bound = np.sqrt(delta ** 2 + delta ** 2) + 1e-7 - assert_array_equal(NI(query_points, query_options={"distance_upper_bound": distance_upper_bound}), + assert_array_equal(NI(query_points, distance_upper_bound=distance_upper_bound), [0, 2]) def test_nearest_query_valid_inputs(self): From 2f9f2f231f1c4ba85b425902580c1a62dbb94f49 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Tue, 14 Nov 2023 22:05:18 -0500 Subject: [PATCH 09/18] BUG: check all dists instead of if. --- scipy/interpolate/_ndgriddata.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index 29c61ce2bdb6..ef47d24dcccd 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -123,18 +123,15 @@ def __call__(self, *args, **query_options): xi = self._scale_x(xi) dist, i = self.tree.query(xi, **query_options) - if np.isfinite(distance_upper_bound): - # if distance_upper_bound is set to not be infinite, then we need to consider the case where cKDtree - # does not find any points within distance_upper_bound to return. It marks those points as having infinte - # distance, which is what will be used below to mask the array and return only the points that were deemed - # to have a close enough neighbor to return something useful. - valid_mask = np.isfinite(dist) - interp_values = np.zeros(i.shape) - interp_values[~valid_mask] = np.nan - interp_values[valid_mask] = self.values[i[valid_mask]] - return interp_values - else: - return self.values[i] + # if distance_upper_bound is set to not be infinite, then we need to consider the case where cKDtree + # does not find any points within distance_upper_bound to return. It marks those points as having infinte + # distance, which is what will be used below to mask the array and return only the points that were deemed + # to have a close enough neighbor to return something useful. + valid_mask = np.isfinite(dist) + interp_values = np.zeros(i.shape) + interp_values[~valid_mask] = np.nan + interp_values[valid_mask] = self.values[i[valid_mask]] + return interp_values #------------------------------------------------------------------------------ From 43ea043d25a5f46b78605acc05ac425dc0077c45 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Wed, 15 Nov 2023 22:44:07 -0500 Subject: [PATCH 10/18] BUG: update old test. --- scipy/interpolate/tests/test_ndgriddata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/interpolate/tests/test_ndgriddata.py b/scipy/interpolate/tests/test_ndgriddata.py index 8cebc177583b..bfcf428f46ae 100644 --- a/scipy/interpolate/tests/test_ndgriddata.py +++ b/scipy/interpolate/tests/test_ndgriddata.py @@ -225,7 +225,7 @@ def test_nearest_query_valid_inputs(self): [0, 0, 1, 1], [0, 1, 1, 2]]) NI = NearestNDInterpolator((nd[0], nd[1]), nd[2]) - with self.assertRaises(TypeError): + with assert_raises(TypeError): NI([0.5, 0.5], query_options="not a dictionary") From 3d750bfbc6af2e3892828e000ff256a37e1dab59 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Wed, 22 Nov 2023 01:45:35 -0500 Subject: [PATCH 11/18] BUG: fix nD case --- scipy/interpolate/_ndgriddata.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index ef47d24dcccd..fdd87b29de43 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -128,9 +128,20 @@ def __call__(self, *args, **query_options): # distance, which is what will be used below to mask the array and return only the points that were deemed # to have a close enough neighbor to return something useful. valid_mask = np.isfinite(dist) - interp_values = np.zeros(i.shape) - interp_values[~valid_mask] = np.nan - interp_values[valid_mask] = self.values[i[valid_mask]] + + if self.values.ndim == 1: + interp_shape = xi.shape[0] + else: + interp_shape = (xi.shape[0],) + self.values.shape[1:] + + interp_values = np.full(interp_shape, np.nan, dtype=self.values.dtype) + + # Indexing self.values - need to consider case where values is nD + if self.values.ndim == 1: + interp_values[valid_mask] = self.values[i[valid_mask]] + else: + interp_values[valid_mask] = self.values[i[valid_mask], ...] + return interp_values From 2e497d3fa26a2341b83908329c35cd2159452275 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Wed, 22 Nov 2023 06:31:07 -0500 Subject: [PATCH 12/18] BUG: fix tests and multidimensional query & y points --- scipy/interpolate/_ndgriddata.py | 30 +++++++++++++++------- scipy/interpolate/tests/test_ndgriddata.py | 4 +-- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index fdd87b29de43..c70a2f80fd60 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -30,9 +30,9 @@ class NearestNDInterpolator(NDInterpolatorBase): Parameters ---------- - x : (npoints, ndims) 2-D ndarray of floats + x : (npoints, ndims) n-D ndarray of floats Data point coordinates. - y : (npoints, ) 1-D ndarray of float or complex + y : (npoints, ...) n-D ndarray of floats or complex Data values. rescale : boolean, optional Rescale points to unit cube before performing interpolation. @@ -121,27 +121,38 @@ def __call__(self, *args, **query_options): xi = _ndim_coords_from_arrays(args, ndim=self.points.shape[1]) xi = self._check_call_shape(xi) xi = self._scale_x(xi) - dist, i = self.tree.query(xi, **query_options) + + # We need to handle two important cases for compatibility with a flexible griddata: + # (1) the case where xi is of some dimension (n, m, ..., D), where D is the coordinate dimension, and + # (2) the case where y is multidimensional (npoints, k, l, ...). + # We will first flatten xi to deal with case (1) and build an intermediate return array with shape + # (n*m*.., k, l, ...) and then reshape back to (n, m, ..., k, l, ...). + + # Flatten xi for the query + xi_flat = xi.reshape(-1, xi.shape[-1]) + original_shape = xi.shape + flattened_shape = xi_flat.shape # if distance_upper_bound is set to not be infinite, then we need to consider the case where cKDtree # does not find any points within distance_upper_bound to return. It marks those points as having infinte # distance, which is what will be used below to mask the array and return only the points that were deemed # to have a close enough neighbor to return something useful. + dist, i = self.tree.query(xi_flat, **query_options) valid_mask = np.isfinite(dist) - if self.values.ndim == 1: - interp_shape = xi.shape[0] - else: - interp_shape = (xi.shape[0],) + self.values.shape[1:] - + # create a holder interp_values array with shape (n*m*.., k, l, ...) and fill with nans. + interp_shape = flattened_shape[:-1] + self.values.shape[1:] if self.values.ndim > 1 else flattened_shape[:-1] interp_values = np.full(interp_shape, np.nan, dtype=self.values.dtype) - # Indexing self.values - need to consider case where values is nD if self.values.ndim == 1: interp_values[valid_mask] = self.values[i[valid_mask]] else: interp_values[valid_mask] = self.values[i[valid_mask], ...] + # (n*m*.., k, l, ...) -> (n, m, ..., k, l, ...) + new_shape = original_shape[:-1] + self.values.shape[1:] if self.values.ndim > 1 else original_shape[:-1] + interp_values = interp_values.reshape(new_shape) + return interp_values @@ -149,6 +160,7 @@ def __call__(self, *args, **query_options): # Convenience interface function #------------------------------------------------------------------------------ + def griddata(points, values, xi, method='linear', fill_value=np.nan, rescale=False): """ diff --git a/scipy/interpolate/tests/test_ndgriddata.py b/scipy/interpolate/tests/test_ndgriddata.py index bfcf428f46ae..b5d87b9671e5 100644 --- a/scipy/interpolate/tests/test_ndgriddata.py +++ b/scipy/interpolate/tests/test_ndgriddata.py @@ -197,8 +197,8 @@ def test_nearest_list_argument(self): assert_array_equal(NI([0.1, 0.9], [0.1, 0.9]), [0, 2]) def test_nearest_query_options(self): - nd = np.array([[0, 1, 0, 1], - [0, 0, 1, 1], + nd = np.array([[0, 0.5, 0, 1], + [0, 0, 0.5, 1], [0, 1, 1, 2]]) delta = 0.1 query_points = [0 + delta, 1 + delta], [0 + delta, 1 + delta] From 4d01d49cc0e73444d2936a214a4d173d81556a2e Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Wed, 22 Nov 2023 08:02:04 -0500 Subject: [PATCH 13/18] BUG: fix test case for complex dtypes. np.full with int dtype and nan-setting leads to weirdness. --- scipy/interpolate/_ndgriddata.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index c70a2f80fd60..fecba327b713 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -142,7 +142,11 @@ def __call__(self, *args, **query_options): # create a holder interp_values array with shape (n*m*.., k, l, ...) and fill with nans. interp_shape = flattened_shape[:-1] + self.values.shape[1:] if self.values.ndim > 1 else flattened_shape[:-1] - interp_values = np.full(interp_shape, np.nan, dtype=self.values.dtype) + + if np.issubdtype(self.values.dtype, np.complexfloating): + interp_values = np.full(interp_shape, np.nan, dtype=self.values.dtype) + else: + interp_values = np.full(interp_shape, np.nan) if self.values.ndim == 1: interp_values[valid_mask] = self.values[i[valid_mask]] From 7c3367a39488cdfb055b4842dab856a06fd63f13 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Wed, 22 Nov 2023 13:46:38 -0500 Subject: [PATCH 14/18] DOC: fix documentation for dimensionality to be more accurate --- scipy/interpolate/_ndgriddata.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index fecba327b713..ba8433a77762 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -30,9 +30,9 @@ class NearestNDInterpolator(NDInterpolatorBase): Parameters ---------- - x : (npoints, ndims) n-D ndarray of floats + x : (npoints, ndims) 2-D ndarray of floats Data point coordinates. - y : (npoints, ...) n-D ndarray of floats or complex + y : (m1, …, mn, …) n-D ndarray of floats or complex Data values. rescale : boolean, optional Rescale points to unit cube before performing interpolation. @@ -109,8 +109,9 @@ def __call__(self, *args, **query_options): x1, x2, ... xn can be array-like of float with broadcastable shape. or x1 can be array-like of float with shape ``(..., ndim)`` query_options : dict, optional - This allows `eps`, `p`, `distance_upper_bound`, and `workers` being passed to the cKDTree's query function - to be explicitly set. See the `scipy.spatial.cKDTree.query` for an overview of the different options. + This allows ``eps``, ``p``, ``distance_upper_bound``, and ``workers`` being passed to the cKDTree's query + function to be explicitly set. See the `scipy.spatial.cKDTree.query` for an overview of the different + options. ..versionadded:: 1.12.0 @@ -122,11 +123,11 @@ def __call__(self, *args, **query_options): xi = self._check_call_shape(xi) xi = self._scale_x(xi) - # We need to handle two important cases for compatibility with a flexible griddata: - # (1) the case where xi is of some dimension (n, m, ..., D), where D is the coordinate dimension, and - # (2) the case where y is multidimensional (npoints, k, l, ...). - # We will first flatten xi to deal with case (1) and build an intermediate return array with shape - # (n*m*.., k, l, ...) and then reshape back to (n, m, ..., k, l, ...). + # We need to handle two important cases: + # (1) the case where xi has trailing dimensions (..., ndim), where D is the coordinate dimension, and + # (2) the case where y is multidimensional (m1, …, mn, …). + # We will first flatten xi to deal with case (1), do the computation in flattened array while retaining y's + # dimensionality, and then reshape the interpolated values back to match xi's shape. # Flatten xi for the query xi_flat = xi.reshape(-1, xi.shape[-1]) @@ -140,7 +141,7 @@ def __call__(self, *args, **query_options): dist, i = self.tree.query(xi_flat, **query_options) valid_mask = np.isfinite(dist) - # create a holder interp_values array with shape (n*m*.., k, l, ...) and fill with nans. + # create a holder interp_values array and fill with nans. interp_shape = flattened_shape[:-1] + self.values.shape[1:] if self.values.ndim > 1 else flattened_shape[:-1] if np.issubdtype(self.values.dtype, np.complexfloating): @@ -153,7 +154,6 @@ def __call__(self, *args, **query_options): else: interp_values[valid_mask] = self.values[i[valid_mask], ...] - # (n*m*.., k, l, ...) -> (n, m, ..., k, l, ...) new_shape = original_shape[:-1] + self.values.shape[1:] if self.values.ndim > 1 else original_shape[:-1] interp_values = interp_values.reshape(new_shape) From 35c9577aef6ce4562f580ee00ea6d8250bda1684 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Wed, 22 Nov 2023 13:48:08 -0500 Subject: [PATCH 15/18] Update scipy/interpolate/_ndgriddata.py Co-authored-by: Jake Bowhay <60778417+j-bowhay@users.noreply.github.com> --- scipy/interpolate/_ndgriddata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index ba8433a77762..ed795aacb7d7 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -108,7 +108,7 @@ def __call__(self, *args, **query_options): Points where to interpolate data at. x1, x2, ... xn can be array-like of float with broadcastable shape. or x1 can be array-like of float with shape ``(..., ndim)`` - query_options : dict, optional + **query_options This allows ``eps``, ``p``, ``distance_upper_bound``, and ``workers`` being passed to the cKDTree's query function to be explicitly set. See the `scipy.spatial.cKDTree.query` for an overview of the different options. From b142521b43f86d4ae8dd26039a8d75e832cc9d34 Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Thu, 23 Nov 2023 06:12:34 -0500 Subject: [PATCH 16/18] DOC: fix documentation to be clearer for y values --- scipy/interpolate/_ndgriddata.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index ba8433a77762..64c23eec6acb 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -32,7 +32,7 @@ class NearestNDInterpolator(NDInterpolatorBase): ---------- x : (npoints, ndims) 2-D ndarray of floats Data point coordinates. - y : (m1, …, mn, …) n-D ndarray of floats or complex + y : 1-D ndarray of float or complex Data values. rescale : boolean, optional Rescale points to unit cube before performing interpolation. @@ -124,8 +124,8 @@ def __call__(self, *args, **query_options): xi = self._scale_x(xi) # We need to handle two important cases: - # (1) the case where xi has trailing dimensions (..., ndim), where D is the coordinate dimension, and - # (2) the case where y is multidimensional (m1, …, mn, …). + # (1) the case where xi has trailing dimensions (..., ndim), and + # (2) the case where y has trailing dimensions # We will first flatten xi to deal with case (1), do the computation in flattened array while retaining y's # dimensionality, and then reshape the interpolated values back to match xi's shape. @@ -149,10 +149,7 @@ def __call__(self, *args, **query_options): else: interp_values = np.full(interp_shape, np.nan) - if self.values.ndim == 1: - interp_values[valid_mask] = self.values[i[valid_mask]] - else: - interp_values[valid_mask] = self.values[i[valid_mask], ...] + interp_values[valid_mask] = self.values[i[valid_mask], ...] new_shape = original_shape[:-1] + self.values.shape[1:] if self.values.ndim > 1 else original_shape[:-1] interp_values = interp_values.reshape(new_shape) From ca3f52f4110c2b8f93a876ce1b55868711f83dcf Mon Sep 17 00:00:00 2001 From: harshilkamdar Date: Thu, 23 Nov 2023 06:13:44 -0500 Subject: [PATCH 17/18] DOC: forgot dim --- scipy/interpolate/_ndgriddata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index fe81661ac999..0e0cb86708b1 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -32,7 +32,7 @@ class NearestNDInterpolator(NDInterpolatorBase): ---------- x : (npoints, ndims) 2-D ndarray of floats Data point coordinates. - y : 1-D ndarray of float or complex + y : (npoints, ) 1-D ndarray of float or complex Data values. rescale : boolean, optional Rescale points to unit cube before performing interpolation. From fd067957d6e9529e2a7dcdbda09083f8b183aa7c Mon Sep 17 00:00:00 2001 From: Evgeni Burovski Date: Thu, 30 Nov 2023 10:50:56 +0300 Subject: [PATCH 18/18] DOC: a trivial doc tweak --- scipy/interpolate/_ndgriddata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/interpolate/_ndgriddata.py b/scipy/interpolate/_ndgriddata.py index 0e0cb86708b1..69115389d1cf 100644 --- a/scipy/interpolate/_ndgriddata.py +++ b/scipy/interpolate/_ndgriddata.py @@ -113,7 +113,7 @@ def __call__(self, *args, **query_options): function to be explicitly set. See the `scipy.spatial.cKDTree.query` for an overview of the different options. - ..versionadded:: 1.12.0 + .. versionadded:: 1.12.0 """ # For the sake of enabling subclassing, NDInterpolatorBase._set_xi performs some operations