Skip to content

Commit

Permalink
Merge pull request #338 from phargogh/bugfix/254-watershed-delineatio…
Browse files Browse the repository at this point in the history
…n-requires-srs

Watershed delineation: handling the case where flow_dir doesn't have an SRS.
  • Loading branch information
emlys committed Aug 30, 2023
2 parents 1db6ef1 + 5ab4a46 commit 879fd70
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 105 deletions.
3 changes: 3 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ Unreleased Changes
``create_raster_from_bounding_box`` for determining the dimensions of the
target raster given a target bounding box and pixel sizes.
https://github.com/natcap/pygeoprocessing/issues/321
* ``pygeoprocessing.routing.delineate_watersheds_d8`` now handles the case
where the input flow direction raster does not have a defined spatial
reference. https://github.com/natcap/pygeoprocessing/issues/254
* Updating internal documentation describing TauDEM flow directions, and adding
for how to convert from a flow direction raster from what TauDEM expects to
what pygeoprocessing expects.
Expand Down
19 changes: 13 additions & 6 deletions src/pygeoprocessing/routing/watershed.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -628,10 +628,16 @@ def delineate_watersheds_d8(
target_layer_name='watersheds'):
"""Delineate watersheds for a vector of geometries using D8 flow dir.
Note:
The ``d8_flow_dir_raster_path_band`` and ``outflow_vector_path`` files
must have the same spatial reference system. The output watersheds
vector will use this same spatial reference system.
Args:
d8_flow_dir_raster_path_band (tuple): A (path, band_id) tuple
to a D8 flow direction raster. This raster must be a tiled raster
with block sizes being a power of 2.
with block sizes being a power of 2. The output watersheds vector
will have its spatial reference copied from this raster.
outflow_vector_path (str): The path to a vector on disk containing
features with valid geometries from which watersheds will be
delineated. Only those parts of the geometry that overlap valid
Expand Down Expand Up @@ -701,15 +707,17 @@ def delineate_watersheds_d8(

gtiff_driver = gdal.GetDriverByName('GTiff')
flow_dir_srs = osr.SpatialReference()
flow_dir_srs.ImportFromWkt(flow_dir_info['projection_wkt'])
if flow_dir_info['projection_wkt']:
flow_dir_srs.ImportFromWkt(flow_dir_info['projection_wkt'])

outflow_vector = gdal.OpenEx(outflow_vector_path, gdal.OF_VECTOR)
if outflow_vector is None:
raise ValueError(u'Could not open outflow vector %s' % outflow_vector_path)

driver = ogr.GetDriverByName('GPKG')
watersheds_srs = osr.SpatialReference()
watersheds_srs.ImportFromWkt(flow_dir_info['projection_wkt'])
if flow_dir_info['projection_wkt']:
watersheds_srs.ImportFromWkt(flow_dir_info['projection_wkt'])
watersheds_vector = driver.CreateDataSource(target_watersheds_vector_path)
polygonized_watersheds_layer = watersheds_vector.CreateLayer(
'polygonized_watersheds', watersheds_srs, ogr.wkbPolygon)
Expand Down Expand Up @@ -743,8 +751,6 @@ def delineate_watersheds_d8(
LOGGER.info('Delineating watersheds')
outflow_layer = outflow_vector.GetLayer()
outflow_feature_count = outflow_layer.GetFeatureCount()
flow_dir_srs = osr.SpatialReference()
flow_dir_srs.ImportFromWkt(flow_dir_info['projection_wkt'])
for feature in outflow_layer:
# Some vectors start indexing their FIDs at 0.
# The mask raster input to polygonization, however, only regards pixels
Expand Down Expand Up @@ -818,7 +824,8 @@ def delineate_watersheds_d8(
gdal.GDT_UInt32,
options=GTIFF_CREATION_OPTIONS)
scratch_raster.SetGeoTransform(source_gt)
scratch_raster.SetProjection(flow_dir_info['projection_wkt'])
if flow_dir_info['projection_wkt']:
scratch_raster.SetProjection(flow_dir_info['projection_wkt'])
# strictly speaking, there's no need to set the nodata value on the band.
scratch_raster = None

Expand Down
213 changes: 114 additions & 99 deletions tests/test_watershed_delineation.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,105 +116,120 @@ def test_watersheds_trivial(self):
srs.ImportFromEPSG(32731) # WGS84 / UTM zone 31s
srs_wkt = srs.ExportToWkt()

flow_dir_path = os.path.join(self.workspace_dir, 'flow_dir.tif')
pygeoprocessing.numpy_array_to_raster(
base_array=flow_dir_array,
target_nodata=255,
pixel_size=(2, -2),
origin=(2, -2),
projection_wkt=srs_wkt,
target_path=flow_dir_path)

# These geometries test:
# * Delineation works with varying geometry types
# * That we exclude seed pixels that are over nodata
# * That we exclude seed pixels off the bounds of the raster
horizontal_line = shapely.geometry.LineString([(19, -11), (25, -11)])
vertical_line = shapely.geometry.LineString([(21, -9), (21, -13)])
square = shapely.geometry.box(17, -13, 21, -9)
point = shapely.geometry.Point(21, -11)

outflow_vector_path = os.path.join(self.workspace_dir, 'outflow.gpkg')
pygeoprocessing.shapely_geometry_to_vector(
[horizontal_line, vertical_line, square, point],
outflow_vector_path, srs_wkt,
'GPKG',
{
'polygon_id': ogr.OFTInteger,
'field_string': ogr.OFTString,
'other': ogr.OFTReal,
# We use ws_id internally, so make sure that this field is
# copied over into the final vector since it's present in the
# source vector.
'ws_id': ogr.OFTInteger,
},
[
{'polygon_id': 1, 'field_string': 'hello world',
'other': 1.111, 'ws_id': 1},
{'polygon_id': 2, 'field_string': 'hello foo', 'other': 2.222,
'ws_id': 2},
{'polygon_id': 3, 'field_string': 'hello bar', 'other': 3.333,
'ws_id': 3},
{'polygon_id': 4, 'field_string': 'hello baz', 'other': 4.444,
'ws_id': 4},
],
ogr_geom_type=ogr.wkbUnknown)

target_watersheds_path = os.path.join(
self.workspace_dir, 'watersheds.gpkg')

pygeoprocessing.routing.delineate_watersheds_d8(
(flow_dir_path, 1), outflow_vector_path, target_watersheds_path,
target_layer_name='watersheds_something')

watersheds_vector = gdal.OpenEx(target_watersheds_path, gdal.OF_VECTOR)
watersheds_layer = watersheds_vector.GetLayer('watersheds_something')
self.assertEqual(watersheds_layer.GetFeatureCount(), 4)

# All features should have the same watersheds, both in area and
# geometry.
flow_dir_bbox = pygeoprocessing.get_raster_info(
flow_dir_path)['bounding_box']
expected_watershed_geometry = shapely.geometry.box(*flow_dir_bbox)
expected_watershed_geometry = expected_watershed_geometry.difference(
shapely.geometry.box(20, -2, 22, -10))
expected_watershed_geometry = expected_watershed_geometry.difference(
shapely.geometry.box(20, -12, 22, -22))
pygeoprocessing.shapely_geometry_to_vector(
[expected_watershed_geometry],
os.path.join(self.workspace_dir, 'foo.gpkg'), srs_wkt,
'GPKG', ogr_geom_type=ogr.wkbGeometryCollection)

id_to_fields = {}
for feature in watersheds_layer:
geometry = feature.GetGeometryRef()
shapely_geom = shapely.wkb.loads(bytes(geometry.ExportToWkb()))
self.assertEqual(
shapely_geom.area, expected_watershed_geometry.area)
self.assertEqual(
shapely_geom.intersection(
expected_watershed_geometry).area,
expected_watershed_geometry.area)
self.assertEqual(
shapely_geom.difference(
expected_watershed_geometry).area, 0)

field_values = feature.items()
id_to_fields[field_values['polygon_id']] = field_values

outflow_vector = gdal.OpenEx(outflow_vector_path, gdal.OF_VECTOR)
outflow_layer = outflow_vector.GetLayer()
found_ws_ids = set() # make sure the ws_id field is copied over
try:
for feature in outflow_layer:
self.assertEqual(
id_to_fields[feature.GetField('polygon_id')],
feature.items())
found_ws_ids.add(feature.GetField('ws_id'))
finally:
outflow_layer = None
outflow_vector = None
self.assertEqual(found_ws_ids, set([1, 2, 3, 4]))
for srs in (srs_wkt, None):
flow_dir_path = os.path.join(self.workspace_dir, 'flow_dir.tif')
pygeoprocessing.numpy_array_to_raster(
base_array=flow_dir_array,
target_nodata=255,
pixel_size=(2, -2),
origin=(2, -2),
projection_wkt=srs,
target_path=flow_dir_path)

# These geometries test:
# * Delineation works with varying geometry types
# * That we exclude seed pixels that are over nodata
# * That we exclude seed pixels off the bounds of the raster
horizontal_line = shapely.geometry.LineString(
[(19, -11), (25, -11)])
vertical_line = shapely.geometry.LineString([(21, -9), (21, -13)])
square = shapely.geometry.box(17, -13, 21, -9)
point = shapely.geometry.Point(21, -11)

outflow_vector_path = os.path.join(
self.workspace_dir, 'outflow.gpkg')
pygeoprocessing.shapely_geometry_to_vector(
[horizontal_line, vertical_line, square, point],
outflow_vector_path, srs_wkt,
'GPKG',
{
'polygon_id': ogr.OFTInteger,
'field_string': ogr.OFTString,
'other': ogr.OFTReal,
# We use ws_id internally, so make sure that this field is
# copied over into the final vector since it's present in
# the source vector.
'ws_id': ogr.OFTInteger,
},
[
{'polygon_id': 1, 'field_string': 'hello world',
'other': 1.111, 'ws_id': 1},
{'polygon_id': 2, 'field_string': 'hello foo',
'other': 2.222, 'ws_id': 2},
{'polygon_id': 3, 'field_string': 'hello bar',
'other': 3.333, 'ws_id': 3},
{'polygon_id': 4, 'field_string': 'hello baz',
'other': 4.444, 'ws_id': 4},
],
ogr_geom_type=ogr.wkbUnknown)

target_watersheds_path = os.path.join(
self.workspace_dir, 'watersheds.gpkg')

pygeoprocessing.routing.delineate_watersheds_d8(
(flow_dir_path, 1), outflow_vector_path,
target_watersheds_path,
target_layer_name='watersheds_something')

try:
watersheds_vector = gdal.OpenEx(target_watersheds_path,
gdal.OF_VECTOR)
watersheds_layer = watersheds_vector.GetLayer(
'watersheds_something')
self.assertEqual(watersheds_layer.GetFeatureCount(), 4)

# All features should have the same watersheds, both in area
# and geometry.
flow_dir_bbox = pygeoprocessing.get_raster_info(
flow_dir_path)['bounding_box']
expected_watershed_geometry = shapely.geometry.box(
*flow_dir_bbox)
expected_watershed_geometry = (
expected_watershed_geometry.difference(
shapely.geometry.box(20, -2, 22, -10)))
expected_watershed_geometry = (
expected_watershed_geometry.difference(
shapely.geometry.box(20, -12, 22, -22)))
pygeoprocessing.shapely_geometry_to_vector(
[expected_watershed_geometry],
os.path.join(self.workspace_dir, 'foo.gpkg'), srs_wkt,
'GPKG', ogr_geom_type=ogr.wkbGeometryCollection)

id_to_fields = {}
for feature in watersheds_layer:
geometry = feature.GetGeometryRef()
shapely_geom = shapely.wkb.loads(
bytes(geometry.ExportToWkb()))
self.assertEqual(
shapely_geom.area, expected_watershed_geometry.area)
self.assertEqual(
shapely_geom.intersection(
expected_watershed_geometry).area,
expected_watershed_geometry.area)
self.assertEqual(
shapely_geom.difference(
expected_watershed_geometry).area, 0)

field_values = feature.items()
id_to_fields[field_values['polygon_id']] = field_values
finally:
watersheds_vector = None
watersheds_layer = None

try:
outflow_vector = gdal.OpenEx(
outflow_vector_path, gdal.OF_VECTOR)
outflow_layer = outflow_vector.GetLayer()
found_ws_ids = set() # make sure the ws_id field is copied
for feature in outflow_layer:
self.assertEqual(
id_to_fields[feature.GetField('polygon_id')],
feature.items())
found_ws_ids.add(feature.GetField('ws_id'))
finally:
outflow_layer = None
outflow_vector = None
self.assertEqual(found_ws_ids, set([1, 2, 3, 4]))

def test_split_geometry_into_seeds(self):
"""PGP watersheds: Test geometry-to-seed extraction."""
Expand Down

0 comments on commit 879fd70

Please sign in to comment.