diff --git a/CHANGELOG.md b/CHANGELOG.md index 37b7d924..5ad2fd09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,17 +9,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Add `autofix_paging` parameter to `StacApiIO` to allow auto-fix of invalid links returned by servers when iterating paged responses ([#852](https://github.com/stac-utils/pystac-client/pull/852)). - Add comprehensive test coverage for warning context managers (`ignore()` and `strict()`) ([#832](https://github.com/stac-utils/pystac-client/pull/832)) - Moved -Werror to pyproject.toml ([#841](https://github.com/stac-utils/pystac-client/pull/841)) +- Add comprehensive test coverage for ConformanceClasses enum ([#834](https://github.com/stac-utils/pystac-client/pull/834)) ### Changed - Make `get_collection` raise if `collection_id` is empty ([#809](https://github.com/stac-utils/pystac-client/pull/809)) -### Added - -- Add comprehensive test coverage for ConformanceClasses enum ([#834](https://github.com/stac-utils/pystac-client/pull/834)) - ### Documentation - Update contributing guide to consistently use `uv` workflow ([#822](https://github.com/stac-utils/pystac-client/pull/822)) diff --git a/pystac_client/stac_api_io.py b/pystac_client/stac_api_io.py index 0daf73e5..44e7bc12 100644 --- a/pystac_client/stac_api_io.py +++ b/pystac_client/stac_api_io.py @@ -47,6 +47,7 @@ def __init__( request_modifier: Callable[[Request], Request | None] | None = None, timeout: Timeout | None = None, max_retries: int | Retry | None = 5, + autofix_paging: bool = True, ): """Initialize class for API IO @@ -69,11 +70,15 @@ def __init__( `__. max_retries: The number of times to retry requests. Set to ``None`` to disable retries. + autofix_paging: Whether to attempt automatically fixing paging issues + that can be identified from paged responses links mismatching expectations + compared to the original request. Return: StacApiIO : StacApiIO instance """ - # TODO - this should super() to parent class + super().__init__(headers) + self.autofix_paging = autofix_paging if conformance is not None: warnings.warn( @@ -311,7 +316,29 @@ def get_pages( ) while next_link: link = Link.from_dict(next_link) - page = self.read_json(link, parameters=parameters) + try: + page = self.read_json(link, parameters=parameters) + except APIError as exc: + # retry with fixes if enabled and changes could be identified + params = link.to_dict() + meth = params.get("method") + if link.href == url and meth == method: + raise # other unidentified error + if self.autofix_paging: + logger.warning( + "Error retrieving paged results from 'next' link " + f"due to incompatible URL {link.href} or HTTP {meth} method. " + "Retrying with original request parameters." + ) + params["method"] = method + page = self.read_json(url, method=method, parameters=params) + else: + logger.error( + "Error retrieving paged results from 'next' link " + f"due to incompatible URL {link.href} or HTTP {meth} method.", + exc_info=exc, + ) + raise if not (page.get("features") or page.get("collections")): return None yield page diff --git a/tests/cassettes/test_item_search/TestItemSearch.test_result_paging_bad_next_link_autofix_disabled.yaml b/tests/cassettes/test_item_search/TestItemSearch.test_result_paging_bad_next_link_autofix_disabled.yaml new file mode 100644 index 00000000..f97b0444 --- /dev/null +++ b/tests/cassettes/test_item_search/TestItemSearch.test_result_paging_bad_next_link_autofix_disabled.yaml @@ -0,0 +1,72 @@ +version: 1 +interactions: +- request: + body: '{"limit": 1, "bbox": [-73.21, 43.99, -73.12, 44.05], "collections": ["naip"]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '78' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.4 + method: POST + uri: https://planetarycomputer.microsoft.com/api/stac/v1/search + response: + body: + string: '{"type":"FeatureCollection","features":[ + {"id":"vt_m_4307308_nw_18_030_20230622_20231113","bbox":[-73.130782,43.933691,-73.056636,44.003801],"type":"Feature","links":[{"rel":"collection","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip"},{"rel":"parent","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip"},{"rel":"root","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/"},{"rel":"self","type":"application/geo+json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/vt_m_4307308_nw_18_030_20230622_20231113"},{"rel":"preview","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=naip&item=vt_m_4307308_nw_18_030_20230622_20231113","title":"Map + of item","type":"text/html"}],"assets":{"image":{"href":"https://naipeuwest.blob.core.windows.net/naip/v002/vt/2023/vt_030cm_2023/43073/m_4307308_nw_18_030_20230622_20231113.tif","type":"image/tiff; + application=geotiff; profile=cloud-optimized","roles":["data"],"title":"RGBIR + COG tile","eo:bands":[{"name":"Red","common_name":"red"},{"name":"Green","common_name":"green"},{"name":"Blue","common_name":"blue"},{"name":"NIR","common_name":"nir","description":"near-infrared"}]},"thumbnail":{"href":"https://naipeuwest.blob.core.windows.net/naip/v002/vt/2023/vt_030cm_2023/43073/m_4307308_nw_18_030_20230622_20231113.200.jpg","type":"image/jpeg","roles":["thumbnail"],"title":"Thumbnail"},"tilejson":{"title":"TileJSON + with default rendering","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=naip&item=vt_m_4307308_nw_18_030_20230622_20231113&assets=image&asset_bidx=image%7C1%2C2%2C3&format=png","type":"application/json","roles":["tiles"]},"rendered_preview":{"title":"Rendered + preview","rel":"preview","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=naip&item=vt_m_4307308_nw_18_030_20230622_20231113&assets=image&asset_bidx=image%7C1%2C2%2C3&format=png","roles":["overview"],"type":"image/png"}},"geometry":{"type":"Polygon","coordinates":[[[-73.058882,43.933691],[-73.056636,44.002599],[-73.128619,44.003801],[-73.130782,43.93489],[-73.058882,43.933691]]]},"collection":"naip","properties":{"gsd":0.3,"datetime":"2023-06-22T16:00:00Z","naip:year":"2023","proj:bbox":[650027.7,4866339.600000001,655801.7999999999,4873997.4],"proj:epsg":26918,"providers":[{"url":"https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/","name":"USDA + Farm Service Agency","roles":["producer","licensor"]}],"naip:state":"vt","proj:shape":[25526,19247],"proj:centroid":{"lat":43.96875,"lon":-73.09373},"proj:transform":[0.3,0.0,650027.7,0.0,-0.3,4873997.4,0.0,0.0,1.0]},"stac_extensions":["https://stac-extensions.github.io/eo/v1.0.0/schema.json","https://stac-extensions.github.io/projection/v1.0.0/schema.json"],"stac_version":"1.0.0"} + ], + "links":[{"rel":"next","type":"application/geo+json","method":"POST","href":"https://planetarycomputer.microsoft.com/api/stac/","body":{"limit":1,"bbox":[-73.21,43.99,-73.12,44.05],"collections":["naip"],"token":"next:naip:vt_m_4307307_nw_18_060_20211029"}},{"rel":"root","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/"},{"rel":"self","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/search"}]}' + headers: + Connection: + - keep-alive + Content-Length: + - '2443' + Content-Type: + - application/geo+json + content-encoding: + - gzip + status: + code: 200 + message: OK +- request: + body: '{"limit": 1, "bbox": [-73.21, 43.99, -73.12, 44.05], "collections": ["naip"], + "token": "next:naip:vt_m_4307307_nw_18_060_20211029"}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '132' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.4 + method: POST + uri: https://planetarycomputer.microsoft.com/api/stac/ # invalid URI returned by previous response 'next' link + response: + body: + string: 'Method not allowed' + headers: + Content-Length: + - '18' + Content-Type: + - application/geo+json + status: + code: 405 + message: 'Method not allowed' diff --git a/tests/cassettes/test_item_search/TestItemSearch.test_result_paging_bad_next_link_autofix_enabled.yaml b/tests/cassettes/test_item_search/TestItemSearch.test_result_paging_bad_next_link_autofix_enabled.yaml new file mode 100644 index 00000000..96095378 --- /dev/null +++ b/tests/cassettes/test_item_search/TestItemSearch.test_result_paging_bad_next_link_autofix_enabled.yaml @@ -0,0 +1,110 @@ +version: 1 +interactions: +- request: + body: '{"limit": 1, "bbox": [-73.21, 43.99, -73.12, 44.05], "collections": ["naip"]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '78' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.4 + method: POST + uri: https://planetarycomputer.microsoft.com/api/stac/v1/search + response: + body: + string: '{"type":"FeatureCollection","features":[ + {"id":"vt_m_4307308_nw_18_030_20230622_20231113","bbox":[-73.130782,43.933691,-73.056636,44.003801],"type":"Feature","links":[{"rel":"collection","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip"},{"rel":"parent","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip"},{"rel":"root","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/"},{"rel":"self","type":"application/geo+json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/vt_m_4307308_nw_18_030_20230622_20231113"},{"rel":"preview","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=naip&item=vt_m_4307308_nw_18_030_20230622_20231113","title":"Map + of item","type":"text/html"}],"assets":{"image":{"href":"https://naipeuwest.blob.core.windows.net/naip/v002/vt/2023/vt_030cm_2023/43073/m_4307308_nw_18_030_20230622_20231113.tif","type":"image/tiff; + application=geotiff; profile=cloud-optimized","roles":["data"],"title":"RGBIR + COG tile","eo:bands":[{"name":"Red","common_name":"red"},{"name":"Green","common_name":"green"},{"name":"Blue","common_name":"blue"},{"name":"NIR","common_name":"nir","description":"near-infrared"}]},"thumbnail":{"href":"https://naipeuwest.blob.core.windows.net/naip/v002/vt/2023/vt_030cm_2023/43073/m_4307308_nw_18_030_20230622_20231113.200.jpg","type":"image/jpeg","roles":["thumbnail"],"title":"Thumbnail"},"tilejson":{"title":"TileJSON + with default rendering","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=naip&item=vt_m_4307308_nw_18_030_20230622_20231113&assets=image&asset_bidx=image%7C1%2C2%2C3&format=png","type":"application/json","roles":["tiles"]},"rendered_preview":{"title":"Rendered + preview","rel":"preview","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=naip&item=vt_m_4307308_nw_18_030_20230622_20231113&assets=image&asset_bidx=image%7C1%2C2%2C3&format=png","roles":["overview"],"type":"image/png"}},"geometry":{"type":"Polygon","coordinates":[[[-73.058882,43.933691],[-73.056636,44.002599],[-73.128619,44.003801],[-73.130782,43.93489],[-73.058882,43.933691]]]},"collection":"naip","properties":{"gsd":0.3,"datetime":"2023-06-22T16:00:00Z","naip:year":"2023","proj:bbox":[650027.7,4866339.600000001,655801.7999999999,4873997.4],"proj:epsg":26918,"providers":[{"url":"https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/","name":"USDA + Farm Service Agency","roles":["producer","licensor"]}],"naip:state":"vt","proj:shape":[25526,19247],"proj:centroid":{"lat":43.96875,"lon":-73.09373},"proj:transform":[0.3,0.0,650027.7,0.0,-0.3,4873997.4,0.0,0.0,1.0]},"stac_extensions":["https://stac-extensions.github.io/eo/v1.0.0/schema.json","https://stac-extensions.github.io/projection/v1.0.0/schema.json"],"stac_version":"1.0.0"} + ], + "links":[{"rel":"next","type":"application/geo+json","method":"POST","href":"https://planetarycomputer.microsoft.com/api/stac/","body":{"limit":1,"bbox":[-73.21,43.99,-73.12,44.05],"collections":["naip"],"token":"next:naip:vt_m_4307307_nw_18_060_20211029"}},{"rel":"root","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/"},{"rel":"self","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/search"}]}' + headers: + Connection: + - keep-alive + Content-Length: + - '3464' + Content-Type: + - application/geo+json + content-encoding: + - gzip + status: + code: 200 + message: OK +- request: + body: '{"limit": 1, "bbox": [-73.21, 43.99, -73.12, 44.05], "collections": ["naip"], + "token": "next:naip:vt_m_4307307_nw_18_060_20211029"}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '132' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.4 + method: POST + uri: https://planetarycomputer.microsoft.com/api/stac/ # invalid URI returned by previous response 'next' link + response: + body: + string: 'Method not allowed' + headers: + Content-Length: + - '18' + Content-Type: + - text/plain + status: + code: 405 + message: 'Method not allowed' +- request: + body: '{"limit": 1, "bbox": [-73.21, 43.99, -73.12, 44.05], "collections": ["naip"], + "token": "next:naip:vt_m_4307307_nw_18_060_20211029"}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '132' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.4 + method: POST + uri: https://planetarycomputer.microsoft.com/api/stac/v1/search # fixed URI by client + response: + body: + string: '{"type":"FeatureCollection","features":[ + {"id":"vt_m_4307307_ne_18_h_20160805","bbox":[-73.193221,43.933726,-73.119201,44.003771],"type":"Feature","links":[{"rel":"collection","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip"},{"rel":"parent","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip"},{"rel":"root","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/"},{"rel":"self","type":"application/geo+json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/vt_m_4307307_ne_18_h_20160805"},{"rel":"preview","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=naip&item=vt_m_4307307_ne_18_h_20160805","title":"Map + of item","type":"text/html"}],"assets":{"image":{"href":"https://naipeuwest.blob.core.windows.net/naip/v002/vt/2016/vt_060cm_2016/43073/m_4307307_ne_18_h_20160805.tif","type":"image/tiff; + application=geotiff; profile=cloud-optimized","roles":["data"],"title":"RGBIR + COG tile","eo:bands":[{"name":"Red","common_name":"red"},{"name":"Green","common_name":"green"},{"name":"Blue","common_name":"blue"},{"name":"NIR","common_name":"nir","description":"near-infrared"}]},"metadata":{"href":"https://naipeuwest.blob.core.windows.net/naip/v002/vt/2016/vt_fgdc_2016/43073/m_4307307_ne_18_h_20160805.txt","type":"text/plain","roles":["metadata"],"title":"FGDC + Metdata"},"thumbnail":{"href":"https://naipeuwest.blob.core.windows.net/naip/v002/vt/2016/vt_060cm_2016/43073/m_4307307_ne_18_h_20160805.200.jpg","type":"image/jpeg","roles":["thumbnail"],"title":"Thumbnail"},"tilejson":{"title":"TileJSON + with default rendering","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=naip&item=vt_m_4307307_ne_18_h_20160805&assets=image&asset_bidx=image%7C1%2C2%2C3&format=png","type":"application/json","roles":["tiles"]},"rendered_preview":{"title":"Rendered + preview","rel":"preview","href":"https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=naip&item=vt_m_4307307_ne_18_h_20160805&assets=image&asset_bidx=image%7C1%2C2%2C3&format=png","roles":["overview"],"type":"image/png"}},"geometry":{"type":"Polygon","coordinates":[[[-73.121374,43.933726],[-73.119201,44.002609],[-73.19113,44.003771],[-73.193221,43.934885],[-73.121374,43.933726]]]},"collection":"naip","properties":{"gsd":0.6000000000000096,"datetime":"2016-08-05T00:00:00Z","naip:year":"2016","proj:bbox":[645016.2,4866227.4,650785.8,4873882.2],"proj:epsg":26918,"naip:state":"vt","proj:shape":[12758,9616],"proj:transform":[0.6000000000000096,0.0,645016.2,0.0,-0.5999999999999854,4873882.2,0.0,0.0,1.0]},"stac_extensions":["https://stac-extensions.github.io/eo/v1.0.0/schema.json","https://stac-extensions.github.io/projection/v1.0.0/schema.json"],"stac_version":"1.0.0"} + ], + "links":[{"rel":"previous","type":"application/geo+json","method":"POST","href":"https://planetarycomputer.microsoft.com/api/stac/","body":{"limit":1,"bbox":[-73.21,43.99,-73.12,44.05],"collections":["naip"],"token":"prev:naip:vt_m_4307307_ne_18_060_20211029"}},{"rel":"root","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/"},{"rel":"self","type":"application/json","href":"https://planetarycomputer.microsoft.com/api/stac/v1/search"}]}' + headers: + Content-Length: + - '3378' + Content-Type: + - application/geo+json + status: + code: 200 + message: OK diff --git a/tests/test_item_search.py b/tests/test_item_search.py index 11dc0179..1650807e 100644 --- a/tests/test_item_search.py +++ b/tests/test_item_search.py @@ -11,7 +11,9 @@ from requests_mock import Mocker from pystac_client import Client +from pystac_client.exceptions import APIError from pystac_client.item_search import ItemSearch +from pystac_client.stac_api_io import StacApiIO from .helpers import STAC_URLS, read_data_file @@ -240,6 +242,39 @@ def test_result_paging_max_items(self) -> None: assert num_pages == 3 assert len(items) == 25 + @pytest.mark.vcr + def test_result_paging_bad_next_link_autofix_enabled(self, vcr) -> None: + search = ItemSearch( + url=SEARCH_URL, + method="POST", + collections="naip", + limit=1, + ) + num_pages = 0 + items = list() + for page in search.pages_as_dicts(): + num_pages += 1 + items.extend(page["features"]) + assert num_pages == 2 + assert len(items) == 2 + assert vcr.play_count == 3, ( + "should have made the 2 valid requests and 1 autofix request" + ) + + @pytest.mark.vcr + def test_result_paging_bad_next_link_autofix_disabled(self, vcr) -> None: + search = ItemSearch( + url=SEARCH_URL, + method="POST", + collections="naip", + limit=1, + stac_io=StacApiIO(autofix_paging=False), + ) + with pytest.raises(APIError, match="Method not allowed"): + for _ in search.pages_as_dicts(): + pass + assert vcr.play_count == 2 + @pytest.mark.vcr def test_item_collection(self) -> None: search = ItemSearch(