Skip to content

Commit

Permalink
Merge pull request #505 from SwamyDev/main
Browse files Browse the repository at this point in the history
Add pre-allocation option to `get_neighbour_info` to improve performance on large raster data
  • Loading branch information
djhoese committed Mar 23, 2023
2 parents 53ed600 + b36e047 commit e3d7a2f
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 17 deletions.
29 changes: 12 additions & 17 deletions pyresample/kd_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

from .future.resamplers._transform_utils import lonlat2xyz
from .future.resamplers.nearest import _my_index, query_no_distance
from .utils.row_appendable_array import RowAppendableArray

logger = getLogger(__name__)

Expand Down Expand Up @@ -301,7 +302,6 @@ def get_neighbour_info(source_geo_def, target_geo_def, radius_of_influence,
segments : int or None
Number of segments to use when resampling.
If set to None an estimate will be calculated
Returns
-------
(valid_input_index, valid_output_index,
Expand Down Expand Up @@ -339,8 +339,11 @@ def get_neighbour_info(source_geo_def, target_geo_def, radius_of_influence,

if segments > 1:
# Iterate through segments
for i, target_slice in enumerate(geometry._get_slice(segments,
target_geo_def.shape)):
appendable_valid_output_index = RowAppendableArray(target_geo_def.size)
appendable_index_array = RowAppendableArray(target_geo_def.size)
appendable_distance_array = RowAppendableArray(target_geo_def.size)

for target_slice in geometry._get_slice(segments, target_geo_def.shape):

# Query on slice of target coordinates
next_voi, next_ia, next_da = \
Expand All @@ -352,20 +355,12 @@ def get_neighbour_info(source_geo_def, target_geo_def, radius_of_influence,
reduce_data=reduce_data,
nprocs=nprocs)

# Build result iteratively
if i == 0:
# First iteration
valid_output_index = next_voi
index_array = next_ia
distance_array = next_da
else:
valid_output_index = np.append(valid_output_index, next_voi)
if neighbours > 1:
index_array = np.row_stack((index_array, next_ia))
distance_array = np.row_stack((distance_array, next_da))
else:
index_array = np.append(index_array, next_ia)
distance_array = np.append(distance_array, next_da)
appendable_valid_output_index.append_row(next_voi)
appendable_index_array.append_row(next_ia)
appendable_distance_array.append_row(next_da)
valid_output_index = appendable_valid_output_index.to_array()
index_array = appendable_index_array.to_array()
distance_array = appendable_distance_array.to_array()
else:
# Query kd-tree with full target coordinate set
full_slice = slice(None)
Expand Down
50 changes: 50 additions & 0 deletions pyresample/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
import os
import unittest
import uuid
from timeit import timeit

import numpy as np
from pyproj import CRS

from pyresample.test.utils import create_test_latitude, create_test_longitude
from pyresample.utils.row_appendable_array import RowAppendableArray


def tmptiff(width=100, height=100, transform=None, crs=None, dtype=np.uint8):
Expand Down Expand Up @@ -724,3 +726,51 @@ def test_check_slice_orientation():
slice_in = slice(start, stop, step)
res = check_slice_orientation(slice_in)
assert res == slice(start, stop, -1)


class TestRowAppendableArray(unittest.TestCase):
"""Test appending numpy arrays to possible pre-allocated buffer."""

def test_append_1d_arrays_and_trim_remaining_buffer(self):
appendable = RowAppendableArray(7)
appendable.append_row(np.zeros(3))
appendable.append_row(np.ones(3))
self.assertTrue(np.array_equal(appendable.to_array(), np.array([0, 0, 0, 1, 1, 1])))

def test_append_rows_of_nd_arrays_and_trim_remaining_buffer(self):
appendable = RowAppendableArray(7)
appendable.append_row(np.zeros((3, 2)))
appendable.append_row(np.ones((3, 2)))
self.assertTrue(np.array_equal(appendable.to_array(), np.vstack([np.zeros((3, 2)), np.ones((3, 2))])))

def test_append_more_1d_arrays_than_expected(self):
appendable = RowAppendableArray(5)
appendable.append_row(np.zeros(3))
appendable.append_row(np.ones(3))
self.assertTrue(np.array_equal(appendable.to_array(), np.array([0, 0, 0, 1, 1, 1])))

def test_append_more_rows_of_nd_arrays_than_expected(self):
appendable = RowAppendableArray(2)
appendable.append_row(np.zeros((3, 2)))
appendable.append_row(np.ones((3, 2)))
self.assertTrue(np.array_equal(appendable.to_array(), np.vstack([np.zeros((3, 2)), np.ones((3, 2))])))

def test_append_1d_arrays_pre_allocated_appendable_array(self):
appendable = RowAppendableArray(6)
appendable.append_row(np.zeros(3))
appendable.append_row(np.ones(3))
self.assertTrue(np.array_equal(appendable.to_array(), np.array([0, 0, 0, 1, 1, 1])))

def test_append_rows_of_nd_arrays_to_pre_allocated_appendable_array(self):
appendable = RowAppendableArray(6)
appendable.append_row(np.zeros((3, 2)))
appendable.append_row(np.ones((3, 2)))
self.assertTrue(np.array_equal(appendable.to_array(), np.vstack([np.zeros((3, 2)), np.ones((3, 2))])))

def test_pre_allocation_can_double_appending_performance(self):
unallocated = RowAppendableArray(0)
pre_allocated = RowAppendableArray(10000)

unallocated_performance = timeit(lambda: unallocated.append_row(np.array([42])), number=10000)
pre_allocated_performance = timeit(lambda: pre_allocated.append_row(np.array([42])), number=10000)
self.assertGreater(unallocated_performance / pre_allocated_performance, 2)
56 changes: 56 additions & 0 deletions pyresample/utils/row_appendable_array.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2010-2023 Pyresample developers
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Appendable numpy array which allows for efficient pre-allocation."""

import numpy as np


class RowAppendableArray:
"""Helper class which allows efficient concatenation of numpy arrays bey pre-allocating buffers.
By default, this class behaves the same as subsequent array concatenations.
"""

def __init__(self, reserved_capacity):
"""Create an appendable array with a pre-allocated buffer.
The size of the buffer depends also on the shape after the first axis of the first segment.
"""
self._reserved_capacity = reserved_capacity
self._data = None
self._cursor = 0

def append_row(self, next_array):
"""Append the specified array."""
if self._data is None:
self._data = np.empty((self._reserved_capacity, *next_array.shape[1:]), dtype=next_array.dtype)
cursor_end = self._cursor + next_array.shape[0]
if cursor_end > self._data.shape[0]:
remaining = self._data.shape[0] - self._cursor
self._data[self._cursor:] = next_array[:remaining]
if len(next_array.shape) == 1:
self._data = np.append(self._data, next_array[remaining:])
else:
self._data = np.row_stack((self._data, next_array[remaining:]))
else:
self._data[self._cursor:cursor_end] = next_array
self._cursor = cursor_end

def to_array(self):
"""Return the numpy array with all the data appended until now."""
return self._data[:self._cursor]

0 comments on commit e3d7a2f

Please sign in to comment.