Skip to content

Commit

Permalink
Merge pull request #353 from emlys/bugfix/352
Browse files Browse the repository at this point in the history
Add support for int64, uint64, and int8
  • Loading branch information
phargogh committed Oct 6, 2023
2 parents 58f2e0c + ddee08f commit 66a1f59
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 163 deletions.
52 changes: 13 additions & 39 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,46 +13,20 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [3.7, 3.8, 3.9]
gdal: ["3.2.2", "3.3.0", "3.4.3", "3.5"]
python-version: [3.8, 3.9, "3.10", "3.11"]
gdal: ["3.2.2", "3.3.0", "3.4.3", "3.5", "3.6", "3.7"]
os: [ubuntu-latest, windows-latest, macos-latest]

include:
- python-version: "3.10"
gdal: "3.4.3"
os: ubuntu-latest

- python-version: "3.10"
gdal: "3.4.3"
os: windows-latest

- python-version: "3.10"
gdal: "3.4.3"
os: macos-latest

- python-version: "3.10"
gdal: "3.5"
os: ubuntu-latest

- python-version: "3.10"
gdal: "3.5"
os: windows-latest

- python-version: "3.10"
gdal: "3.5"
os: macos-latest

- python-version: "3.11"
gdal: "3.5"
os: ubuntu-latest

- python-version: "3.11"
gdal: "3.5"
os: windows-latest

- python-version: "3.11"
gdal: "3.5"
os: macos-latest
exclude:
- gdal: "3.2.2"
python-version: 3.10
- gdal: "3.2.2"
python-version: 3.11
- gdal: "3.3.0"
python-version: 3.10
- gdal: "3.3.0"
python-version: 3.11
- gdal: "3.4.3"
python-version: 3.11

steps:
- uses: actions/checkout@v3
Expand Down
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ Unreleased Changes
from the input vector.
* Added a new function, ``pygeoprocessing.align_bbox``, which pads a bounding
box to align with the grid of a given geotransform.
* Added support for int64 and uint64 (GDAL 3.5+)
https://github.com/natcap/pygeoprocessing/issues/352
* Added support for signed bytes (GDAL 3.7+)
https://github.com/natcap/pygeoprocessing/issues/329

2.4.1 (2023-09-05)
------------------
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ build-backend = "setuptools.build_meta"

[tool.pytest.ini_options]
# Raise warnings to exceptions.
filterwarnings = 'error'

filterwarnings = ["error", "default::DeprecationWarning", "default::FutureWarning"]

[tool.setuptools_scm]
version_scheme = "post-release"
Expand Down
151 changes: 65 additions & 86 deletions src/pygeoprocessing/geoprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import shapely.prepared
import shapely.wkb
from osgeo import gdal
from osgeo import gdal_array
from osgeo import gdalconst
from osgeo import ogr
from osgeo import osr
Expand All @@ -40,19 +41,7 @@
if sys.version_info >= (3, 8):
import multiprocessing.shared_memory

NUMPY_TO_GDAL_TYPE = {
numpy.dtype(bool): gdal.GDT_Byte,
numpy.dtype(numpy.int8): gdal.GDT_Byte,
numpy.dtype(numpy.uint8): gdal.GDT_Byte,
numpy.dtype(numpy.int16): gdal.GDT_Int16,
numpy.dtype(numpy.int32): gdal.GDT_Int32,
numpy.dtype(numpy.uint16): gdal.GDT_UInt16,
numpy.dtype(numpy.uint32): gdal.GDT_UInt32,
numpy.dtype(numpy.float32): gdal.GDT_Float32,
numpy.dtype(numpy.float64): gdal.GDT_Float64,
numpy.dtype(numpy.csingle): gdal.GDT_CFloat32,
numpy.dtype(numpy.complex64): gdal.GDT_CFloat64,
}
GDAL_VERSION = tuple(int(_) for _ in gdal.__version__.split('.'))


class ReclassificationMissingValuesError(Exception):
Expand Down Expand Up @@ -86,18 +75,6 @@ def __init__(self, missing_values, raster_path, value_map):
_LOGGING_PERIOD = 5.0 # min 5.0 seconds per update log message for the module
_LARGEST_ITERBLOCK = 2**16 # largest block for iterblocks to read in cells

_GDAL_TYPE_TO_NUMPY_LOOKUP = {
gdal.GDT_Byte: numpy.uint8,
gdal.GDT_Int16: numpy.int16,
gdal.GDT_Int32: numpy.int32,
gdal.GDT_UInt16: numpy.uint16,
gdal.GDT_UInt32: numpy.uint32,
gdal.GDT_Float32: numpy.float32,
gdal.GDT_Float64: numpy.float64,
gdal.GDT_CFloat32: numpy.csingle,
gdal.GDT_CFloat64: numpy.complex64,
}

_GDAL_WARP_ALGORITHMS = []
for _warp_algo in (_attrname for _attrname in dir(gdalconst)
if _attrname.startswith('GRA_')):
Expand Down Expand Up @@ -772,8 +749,8 @@ def raster_map(op, rasters, target_path, target_nodata=None, target_dtype=None,
f'the target dtype {target_dtype}')

driver, options = raster_driver_creation_tuple
if target_dtype == numpy.int8 and 'PIXELTYPE=SIGNEDBYTE' not in options:
options += ('PIXELTYPE=SIGNEDBYTE',)
gdal_type, type_creation_options = _numpy_to_gdal_type(target_dtype)
options = list(options) + type_creation_options

def apply_op(*arrays):
"""Apply the function ``op`` to the input arrays.
Expand Down Expand Up @@ -802,7 +779,7 @@ def apply_op(*arrays):
[(path, 1) for path in rasters], # assume the first band
apply_op,
target_path,
NUMPY_TO_GDAL_TYPE[numpy.dtype(target_dtype)],
gdal_type,
target_nodata,
raster_driver_creation_tuple=(driver, options))

Expand Down Expand Up @@ -1222,19 +1199,8 @@ def new_raster_from_base(
driver = gdal.GetDriverByName(raster_driver_creation_tuple[0])

local_raster_creation_options = list(raster_driver_creation_tuple[1])
# PIXELTYPE is sometimes used to define signed vs. unsigned bytes and
# the only place that is stored is in the IMAGE_STRUCTURE metadata
# copy it over if it exists and it not already defined by the input
# creation options. It's okay to get this info from the first band since
# all bands have the same datatype
numpy_dtype = _gdal_to_numpy_type(datatype, local_raster_creation_options)
base_band = base_raster.GetRasterBand(1)
metadata = base_band.GetMetadata('IMAGE_STRUCTURE')
if 'PIXELTYPE' in metadata and not any(
['PIXELTYPE' in option for option in
local_raster_creation_options]):
local_raster_creation_options.append(
'PIXELTYPE=' + metadata['PIXELTYPE'])

block_size = base_band.GetBlockSize()
# It's not clear how or IF we can determine if the output should be
# striped or tiled. Here we leave it up to the default inputs or if its
Expand Down Expand Up @@ -1284,7 +1250,6 @@ def new_raster_from_base(
timed_logger = TimedLoggingAdapter(_LOGGING_PERIOD)
pixels_processed = 0
n_pixels = n_cols * n_rows
numpy_dtype = _GDAL_TYPE_TO_NUMPY_LOOKUP[datatype]
if fill_value_list is not None:
for index, fill_value in enumerate(fill_value_list):
if fill_value is None:
Expand Down Expand Up @@ -2168,15 +2133,9 @@ def get_raster_info(raster_path):

# datatype is the same for the whole raster, but is associated with band
band = raster.GetRasterBand(1)
band_datatype = band.DataType
raster_properties['datatype'] = band_datatype
raster_properties['numpy_type'] = (
_GDAL_TYPE_TO_NUMPY_LOOKUP[band_datatype])
# this part checks to see if the byte is signed or not
if band_datatype == gdal.GDT_Byte:
metadata = band.GetMetadata('IMAGE_STRUCTURE')
if 'PIXELTYPE' in metadata and metadata['PIXELTYPE'] == 'SIGNEDBYTE':
raster_properties['numpy_type'] = numpy.int8
raster_properties['datatype'] = band.DataType
raster_properties['numpy_type'] = _gdal_to_numpy_type(
band.DataType, band.GetMetadata('IMAGE_STRUCTURE'))
band = None
raster = None
return raster_properties
Expand Down Expand Up @@ -2414,11 +2373,14 @@ def reclassify_raster(
keys = sorted(numpy.array(list(value_map_copy.keys())))
values = numpy.array([value_map_copy[x] for x in keys])

numpy_dtype = _gdal_to_numpy_type(
target_datatype, raster_driver_creation_tuple[1])

def _map_dataset_to_value_op(original_values):
"""Convert a block of original values to the lookup values."""
out_array = numpy.full(
original_values.shape, target_nodata,
dtype=_GDAL_TYPE_TO_NUMPY_LOOKUP[target_datatype])
dtype=numpy_dtype)
if nodata is None:
valid_mask = numpy.full(original_values.shape, True)
else:
Expand Down Expand Up @@ -2620,9 +2582,9 @@ def warp_raster(
base_raster = gdal.OpenEx(base_raster_path, gdal.OF_RASTER)

raster_creation_options = list(raster_driver_creation_tuple[1])
if (base_raster_info['numpy_type'] == numpy.int8 and
'PIXELTYPE' not in ' '.join(raster_creation_options)):
raster_creation_options.append('PIXELTYPE=SIGNEDBYTE')
_, type_creation_options = _numpy_to_gdal_type(
base_raster_info['numpy_type'])
raster_creation_options += type_creation_options

if resample_method.lower() not in _GDAL_WARP_ALGORITHMS:
raise ValueError(
Expand Down Expand Up @@ -3704,40 +3666,54 @@ def mask_op(base_array, mask_array):
os.remove(mask_raster_path)


def _gdal_to_numpy_type(band):
"""Calculate the equivalent numpy datatype from a GDAL raster band type.
This function doesn't handle complex or unknown types. If they are
passed in, this function will raise a ValueError.
def _gdal_to_numpy_type(gdal_type, metadata):
"""Calculate the equivalent numpy datatype from a GDAL type and metadata.
Args:
band (gdal.Band): GDAL Band
gdal_type: GDAL.GDT_* data type code
metadata: mapping or list of strings to check for the existence of
the 'PIXELTYPE=SIGNEDBYTE' flag
Return:
numpy_datatype (numpy.dtype): equivalent of band.DataType
Returns:
numpy.dtype that is the equivalent of the input gdal type
Raises:
ValueError if an unsupported data type is entered
"""
# doesn't include GDT_Byte because that's a special case
base_gdal_type_to_numpy = {
gdal.GDT_Int16: numpy.int16,
gdal.GDT_Int32: numpy.int32,
gdal.GDT_UInt16: numpy.uint16,
gdal.GDT_UInt32: numpy.uint32,
gdal.GDT_Float32: numpy.float32,
gdal.GDT_Float64: numpy.float64,
}
if (GDAL_VERSION < (3, 7, 0) and gdal_type == gdal.GDT_Byte and
(('PIXELTYPE=SIGNEDBYTE' in metadata) or
('PIXELTYPE' in metadata and metadata['PIXELTYPE'] == 'SIGNEDBYTE'))):
return numpy.int8

if band.DataType in base_gdal_type_to_numpy:
return base_gdal_type_to_numpy[band.DataType]
numpy_type = gdal_array.GDALTypeCodeToNumericTypeCode(gdal_type)
if numpy_type is None:
raise ValueError(f"Unsupported DataType: {gdal_type}")
return numpy_type

if band.DataType != gdal.GDT_Byte:
raise ValueError("Unsupported DataType: %s" % str(band.DataType))

# band must be GDT_Byte type, check if it is signed/unsigned
metadata = band.GetMetadata('IMAGE_STRUCTURE')
if 'PIXELTYPE' in metadata and metadata['PIXELTYPE'] == 'SIGNEDBYTE':
return numpy.int8
return numpy.uint8
def _numpy_to_gdal_type(numpy_type):
"""Calculate the equivalent GDAL type and metadata from a numpy type.
Args:
numpy_type: numpy data type
Returns:
(gdal type, metadata) tuple. gdal type is a gdal.GDT_* type code.
metadata is an empty list in most cases, or ['PIXELTYPE=SIGNEDBYTE']
if needed to indicate a signed byte type.
Raises:
ValueError if an unsupported data type is entered
"""
numpy_dtype = numpy.dtype(numpy_type)

if GDAL_VERSION < (3, 7, 0) and numpy_dtype == numpy.dtype(numpy.int8):
return gdal.GDT_Byte, ['PIXELTYPE=SIGNEDBYTE']

gdal_type = gdal_array.NumericTypeCodeToGDALTypeCode(numpy_dtype)
if gdal_type is None:
raise ValueError(f"Unsupported DataType: {numpy_type}")
return gdal_type, []


def merge_bounding_box_list(bounding_box_list, bounding_box_mode):
Expand Down Expand Up @@ -4196,12 +4172,13 @@ def numpy_array_to_raster(
Return:
None
"""

raster_driver = gdal.GetDriverByName(raster_driver_creation_tuple[0])
driver_name, creation_options = raster_driver_creation_tuple
raster_driver = gdal.GetDriverByName(driver_name)
ny, nx = base_array.shape
gdal_type, type_creation_options = _numpy_to_gdal_type(base_array.dtype)
new_raster = raster_driver.Create(
target_path, nx, ny, 1, NUMPY_TO_GDAL_TYPE[base_array.dtype],
options=raster_driver_creation_tuple[1])
target_path, nx, ny, 1, gdal_type,
options=list(creation_options) + type_creation_options)
if projection_wkt is not None:
new_raster.SetProjection(projection_wkt)
if origin is not None and pixel_size is not None:
Expand Down Expand Up @@ -4421,14 +4398,16 @@ def _mult_op(base_array, base_nodata, scale, datatype):
scaled_raster_path = os.path.join(
workspace_dir,
f'scaled_{os.path.basename(base_stitch_raster_path)}')
gdal_type = _gdal_to_numpy_type(
target_band.DataType,
target_band.GetMetadata('IMAGE_STRUCTURE'))
# multiply the pixels in the resampled raster by the ratio of
# the pixel area in the wgs84 units divided by the area of the
# original pixel
raster_calculator(
[(base_stitch_raster_path, 1), (base_stitch_nodata, 'raw'),
m2_area_per_lat/base_pixel_area_m2,
(_GDAL_TYPE_TO_NUMPY_LOOKUP[
target_raster_info['datatype']], 'raw')], _mult_op,
(gdal_type, 'raw')], _mult_op,
scaled_raster_path,
target_raster_info['datatype'], base_stitch_nodata)

Expand Down
12 changes: 5 additions & 7 deletions src/pygeoprocessing/geoprocessing_core.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -656,22 +656,20 @@ def raster_band_percentile(
will select the next element higher than the percentile cutoff).
"""
raster_type = pygeoprocessing.get_raster_info(
base_raster_path_band[0])['datatype']
if raster_type in (
gdal.GDT_Byte, gdal.GDT_Int16, gdal.GDT_UInt16, gdal.GDT_Int32,
gdal.GDT_UInt32):
numpy_type = pygeoprocessing.get_raster_info(
base_raster_path_band[0])['numpy_type']
if numpy.issubdtype(numpy_type, numpy.integer):
return _raster_band_percentile_int(
base_raster_path_band, working_sort_directory, percentile_list,
heap_buffer_size, ffi_buffer_size)
elif raster_type in (gdal.GDT_Float32, gdal.GDT_Float64):
elif numpy.issubdtype(numpy_type, numpy.floating):
return _raster_band_percentile_double(
base_raster_path_band, working_sort_directory, percentile_list,
heap_buffer_size, ffi_buffer_size)
else:
raise ValueError(
'Cannot process raster type %s (not a known integer nor float '
'type)', raster_type)
'type)', numpy_type)


def _raster_band_percentile_int(
Expand Down

0 comments on commit 66a1f59

Please sign in to comment.