From 573dc1bfc05e716bf8fd1b1374cc90750d8021d0 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Wed, 28 May 2025 15:35:56 +0800 Subject: [PATCH 1/8] Check for reversed coordinates --- CHANGELOG.md | 3 ++ README.md | 6 ++- stac_check/lint.py | 56 ++++++++++++++++++++ stac_check/stac-check.config.yml | 2 + tests/test.config.yml | 2 + tests/test_lint.py | 91 ++++++++++++++++++++++++++++++++ 6 files changed, 158 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5456677..5aa0e8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) - Added sponsors and supporters section with logos ([#122](https://github.com/stac-utils/stac-check/pull/122)) - Added configuration documentation to README ([#124](https://github.com/stac-utils/stac-check/pull/124)) +- Added validation for geometry coordinates order to detect reversed lat/lon coordinates ([#125](https://github.com/stac-utils/stac-check/pull/125)) + - Checks that coordinates follow the GeoJSON specification with [longitude, latitude] order + - Detects when coordinates are accidentally reversed by checking if latitude values exceed ±90 degrees ### Updated diff --git a/README.md b/README.md index 5371fb8..95c960d 100644 --- a/README.md +++ b/README.md @@ -116,8 +116,8 @@ linting: null_datetime: true # Check unlocated items to make sure bbox field is not set check_unlocated: true - # Check if bbox matches the bounds of the geometry - check_bbox_geometry_match: true + # Recommend items have a geometry + check_geometry: true # Check to see if there are too many links bloated_links: true # Check for bloated metadata in properties @@ -128,6 +128,8 @@ linting: links_title: true # Ensure that links in catalogs and collections include self link links_self: true + # Check if geometry coordinates are in the correct order (longitude, latitude) + geometry_coordinates_order: true settings: # Number of links before the bloated links warning is shown diff --git a/stac_check/lint.py b/stac_check/lint.py index cdfdd17..a5c720a 100644 --- a/stac_check/lint.py +++ b/stac_check/lint.py @@ -553,6 +553,54 @@ def check_catalog_file_name(self) -> bool: else: return True + def check_geometry_coordinates_order(self) -> bool: + """Checks if the coordinates in a geometry are in the correct order (longitude, latitude). + + This function verifies that coordinates follow the GeoJSON specification where positions are in + [longitude, latitude] order. It detects cases where coordinates might be accidentally reversed + by checking if latitude values (which should be the second element in each coordinate pair) + are within the valid range of -90 to 90 degrees. + + Returns: + bool: True if coordinates appear to be in the correct order, False if they seem reversed. + """ + if "geometry" not in self.data or self.data["geometry"] is None: + return True + + geometry = self.data["geometry"] + + # Function to check a single coordinate pair + def is_valid_coordinate(coord): + if len(coord) < 2: + return True # Not enough elements to check + + lon, lat = coord[0], coord[1] + + # Check if latitude (second value) is outside the valid range + # This could indicate reversed coordinates + if abs(lat) > 90: + return False + + # Check if longitude (first value) is outside the valid range + # This is another indicator of possible coordinate reversal + if abs(lon) > 180: + return False + + return True + + # Function to recursively check all coordinates in a geometry + def check_coordinates(coords): + if isinstance(coords, list): + if coords and isinstance(coords[0], (int, float)): + # This is a single coordinate + return is_valid_coordinate(coords) + else: + # This is a list of coordinates or a list of lists of coordinates + return all(check_coordinates(coord) for coord in coords) + return True + + return check_coordinates(geometry.get("coordinates", [])) + def create_best_practices_dict(self) -> Dict: """Creates a dictionary of best practices violations for the current STAC object. The violations are determined by a set of configurable linting rules specified in the config file. @@ -653,6 +701,14 @@ def create_best_practices_dict(self) -> Dict: msg_1 = "A link to 'self' in links is strongly recommended" best_practices_dict["check_links_self"] = [msg_1] + # best practices - ensure that geometry coordinates are in the correct order + if ( + not self.check_geometry_coordinates_order() + and config["geometry_coordinates_order"] == True + ): + msg_1 = "Geometry coordinates should be in the correct order (longitude, latitude)" + best_practices_dict["geometry_coordinates_order"] = [msg_1] + return best_practices_dict def create_best_practices_msg(self) -> List[str]: diff --git a/stac_check/stac-check.config.yml b/stac_check/stac-check.config.yml index bccdfd9..16578ce 100644 --- a/stac_check/stac-check.config.yml +++ b/stac_check/stac-check.config.yml @@ -25,6 +25,8 @@ linting: links_title: true # best practices - ensure that links in catalogs and collections include self link links_self: true + # check if geometry coordinates are in the correct order (longitude, latitude) + geometry_coordinates_order: true settings: # number of links before the bloated links warning is shown diff --git a/tests/test.config.yml b/tests/test.config.yml index 031ac62..625ea39 100644 --- a/tests/test.config.yml +++ b/tests/test.config.yml @@ -25,6 +25,8 @@ linting: links_title: true # best practices - ensure that links in catalogs and collections include self link links_self: true + # check if geometry coordinates are in the correct order (longitude, latitude) + geometry_coordinates_order: true settings: # number of links before the bloated links warning is shown diff --git a/tests/test_lint.py b/tests/test_lint.py index 92b1bd4..2a5a1a4 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -568,3 +568,94 @@ def test_lint_assets_no_links(): "request_invalid": [], }, } + + +def test_geometry_coordinates_order(): + """Test the check_geometry_coordinates_order method for detecting incorrectly ordered coordinates.""" + # Create a test item with coordinates in the correct order (longitude, latitude) + correct_item = { + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "test-coordinates-correct", + "bbox": [10.0, -10.0, 20.0, 10.0], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [10.0, -10.0], # lon, lat + [20.0, -10.0], + [20.0, 10.0], + [10.0, 10.0], + [10.0, -10.0], + ] + ], + }, + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + } + + # Create a test item with coordinates in the wrong order (latitude, longitude) + # This will have "latitude" values outside the valid range (-90 to 90) + incorrect_item = { + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "test-coordinates-incorrect", + "bbox": [10.0, -10.0, 20.0, 10.0], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-10.0, 10.0], # lat, lon (reversed) + [-10.0, 20.0], + [10.0, 20.0], + [10.0, 10.0], + [-10.0, 10.0], + ] + ], + }, + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + } + + # Create a test item with coordinates that are clearly reversed (latitude > 90) + clearly_incorrect_item = { + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "test-coordinates-clearly-incorrect", + "bbox": [10.0, -10.0, 20.0, 10.0], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [10.0, 100.0], # Second value (latitude) > 90 + [20.0, 100.0], + [20.0, 100.0], + [10.0, 100.0], + [10.0, 100.0], + ] + ], + }, + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + } + + # Test with correct coordinates - this should pass + linter = Linter(correct_item) + assert linter.check_geometry_coordinates_order() == True + + # Test with incorrect coordinates - this should fail + linter = Linter(incorrect_item) + assert ( + linter.check_geometry_coordinates_order() == True + ) # This will still pass because values are within valid ranges + + # Test with clearly incorrect coordinates - this should fail + linter = Linter(clearly_incorrect_item) + assert linter.check_geometry_coordinates_order() == False + + # Test that the best practices dictionary contains the error message + best_practices = linter.create_best_practices_dict() + assert "geometry_coordinates_order" in best_practices + assert best_practices["geometry_coordinates_order"] == [ + "Geometry coordinates should be in the correct order (longitude, latitude)" + ] From 1ecfb5e38fd4cf56628e3a31995601924aea81d6 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Wed, 28 May 2025 23:29:33 +0800 Subject: [PATCH 2/8] make message more ambiguous --- stac_check/lint.py | 29 ++++++++++++++++++++--------- tests/test_lint.py | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/stac_check/lint.py b/stac_check/lint.py index a5c720a..f57fa38 100644 --- a/stac_check/lint.py +++ b/stac_check/lint.py @@ -554,15 +554,22 @@ def check_catalog_file_name(self) -> bool: return True def check_geometry_coordinates_order(self) -> bool: - """Checks if the coordinates in a geometry are in the correct order (longitude, latitude). + """Checks if the coordinates in a geometry may be in the incorrect order. - This function verifies that coordinates follow the GeoJSON specification where positions are in - [longitude, latitude] order. It detects cases where coordinates might be accidentally reversed - by checking if latitude values (which should be the second element in each coordinate pair) - are within the valid range of -90 to 90 degrees. + This function attempts to detect cases where coordinates might not follow the GeoJSON + specification where positions should be in [longitude, latitude] order. It uses several + heuristics to identify potentially problematic coordinates: + + 1. Checks if latitude values (second element) exceed ±90 degrees + 2. Checks if longitude values (first element) exceed ±180 degrees + 3. Uses a heuristic to detect when coordinates are likely reversed + (when first value > 90, second value < 90, and first value > second value*2) + + Note that this check can never definitively determine if coordinates are reversed + or simply contain errors, it can only flag suspicious patterns. Returns: - bool: True if coordinates appear to be in the correct order, False if they seem reversed. + bool: True if coordinates appear to be in the expected order, False if they may be reversed. """ if "geometry" not in self.data or self.data["geometry"] is None: return True @@ -577,15 +584,19 @@ def is_valid_coordinate(coord): lon, lat = coord[0], coord[1] # Check if latitude (second value) is outside the valid range - # This could indicate reversed coordinates if abs(lat) > 90: return False # Check if longitude (first value) is outside the valid range - # This is another indicator of possible coordinate reversal if abs(lon) > 180: return False + # Additional heuristic for likely reversed coordinates + # If the first value (supposed longitude) is > 90, second value (supposed latitude) is < 90, + # and first value is significantly larger than second value, they may be reversed + if abs(lon) > 90 and abs(lat) < 90 and abs(lon) > abs(lat) * 2: + return False + return True # Function to recursively check all coordinates in a geometry @@ -706,7 +717,7 @@ def create_best_practices_dict(self) -> Dict: not self.check_geometry_coordinates_order() and config["geometry_coordinates_order"] == True ): - msg_1 = "Geometry coordinates should be in the correct order (longitude, latitude)" + msg_1 = "Geometry coordinates may be reversed or contain errors (expected order: longitude, latitude)" best_practices_dict["geometry_coordinates_order"] = [msg_1] return best_practices_dict diff --git a/tests/test_lint.py b/tests/test_lint.py index 2a5a1a4..f916a3a 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -639,23 +639,51 @@ def test_geometry_coordinates_order(): "properties": {"datetime": "2023-01-01T00:00:00Z"}, } + # Create a test item with coordinates that may be reversed based on heuristic + # (first value > 90, second value < 90, first value > second value*2) + heuristic_incorrect_item = { + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "test-coordinates-heuristic-incorrect", + "bbox": [10.0, -10.0, 20.0, 10.0], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [120.0, 40.0], # First value > 90, second < 90, first > second*2 + [120.0, 40.0], + [120.0, 40.0], + [120.0, 40.0], + [120.0, 40.0], + ] + ], + }, + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + } + # Test with correct coordinates - this should pass linter = Linter(correct_item) assert linter.check_geometry_coordinates_order() == True - # Test with incorrect coordinates - this should fail + # Test with incorrect coordinates that are within valid ranges + # This will now fail with our enhanced heuristic linter = Linter(incorrect_item) assert ( linter.check_geometry_coordinates_order() == True - ) # This will still pass because values are within valid ranges + ) # Still passes as values are within valid ranges # Test with clearly incorrect coordinates - this should fail linter = Linter(clearly_incorrect_item) assert linter.check_geometry_coordinates_order() == False + # Test with coordinates that trigger the heuristic - this should fail + linter = Linter(heuristic_incorrect_item) + assert linter.check_geometry_coordinates_order() == False + # Test that the best practices dictionary contains the error message best_practices = linter.create_best_practices_dict() assert "geometry_coordinates_order" in best_practices assert best_practices["geometry_coordinates_order"] == [ - "Geometry coordinates should be in the correct order (longitude, latitude)" + "Geometry coordinates may be reversed or contain errors (expected order: longitude, latitude)" ] From 8eb352eda30bf7bc1bdc9c325655b4180a8faaf5 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Wed, 28 May 2025 23:38:25 +0800 Subject: [PATCH 3/8] comment change --- README.md | 2 +- stac_check/stac-check.config.yml | 2 +- tests/test.config.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 95c960d..eaf2e5d 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ linting: links_title: true # Ensure that links in catalogs and collections include self link links_self: true - # Check if geometry coordinates are in the correct order (longitude, latitude) + # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) geometry_coordinates_order: true settings: diff --git a/stac_check/stac-check.config.yml b/stac_check/stac-check.config.yml index 16578ce..3ddbc6f 100644 --- a/stac_check/stac-check.config.yml +++ b/stac_check/stac-check.config.yml @@ -25,7 +25,7 @@ linting: links_title: true # best practices - ensure that links in catalogs and collections include self link links_self: true - # check if geometry coordinates are in the correct order (longitude, latitude) + # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) geometry_coordinates_order: true settings: diff --git a/tests/test.config.yml b/tests/test.config.yml index 625ea39..75ab7d9 100644 --- a/tests/test.config.yml +++ b/tests/test.config.yml @@ -25,7 +25,7 @@ linting: links_title: true # best practices - ensure that links in catalogs and collections include self link links_self: true - # check if geometry coordinates are in the correct order (longitude, latitude) + # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) geometry_coordinates_order: true settings: From c98090846fc8fc89d6fbbec58c784aa940ffaf13 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Wed, 28 May 2025 23:43:09 +0800 Subject: [PATCH 4/8] update changelog --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5aa0e8f..66a22bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,10 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) - Added sponsors and supporters section with logos ([#122](https://github.com/stac-utils/stac-check/pull/122)) - Added configuration documentation to README ([#124](https://github.com/stac-utils/stac-check/pull/124)) -- Added validation for geometry coordinates order to detect reversed lat/lon coordinates ([#125](https://github.com/stac-utils/stac-check/pull/125)) +- Added validation for geometry coordinates order to detect potentially reversed lat/lon coordinates ([#125](https://github.com/stac-utils/stac-check/pull/125)) - Checks that coordinates follow the GeoJSON specification with [longitude, latitude] order - - Detects when coordinates are accidentally reversed by checking if latitude values exceed ±90 degrees + - Uses heuristics to identify coordinates that may be reversed or contain errors + - Provides nuanced error messages acknowledging the uncertainty in coordinate validation ### Updated From aecb03929599a5a4fb2e3a3b1396726baf9e6b0e Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 1 Jun 2025 01:02:06 +0800 Subject: [PATCH 5/8] lint --- stac_check/lint.py | 2 +- tests/test_lint.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/stac_check/lint.py b/stac_check/lint.py index 367f2ae..f23cd06 100644 --- a/stac_check/lint.py +++ b/stac_check/lint.py @@ -859,7 +859,7 @@ def create_best_practices_dict(self) -> Dict: ): msg_1 = "Geometry coordinates may be reversed or contain errors (expected order: longitude, latitude)" best_practices_dict["geometry_coordinates_order"] = [msg_1] - + # Check if a bbox that crosses the antimeridian is correctly formatted if not self.check_bbox_antimeridian() and config.get( "check_bbox_antimeridian", True diff --git a/tests/test_lint.py b/tests/test_lint.py index d22a5e4..966c5ab 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -779,7 +779,6 @@ def test_geometry_coordinates_order(): "Geometry coordinates may be reversed or contain errors (expected order: longitude, latitude)" ] - def test_bbox_antimeridian(): """Test the check_bbox_antimeridian method for detecting incorrectly formatted bboxes that cross the antimeridian.""" From b6fd37f703b915fd56688bf40d5ff1e4ccf2ab91 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 1 Jun 2025 15:26:53 +0800 Subject: [PATCH 6/8] make test clearer --- tests/test_lint.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 966c5ab..edba56d 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -686,18 +686,18 @@ def test_geometry_coordinates_order(): } # Create a test item with coordinates in the wrong order (latitude, longitude) - # This will have "latitude" values outside the valid range (-90 to 90) - incorrect_item = { + # but with values that don't trigger the validation checks + undetectable_reversed_item = { "stac_version": "1.0.0", "stac_extensions": [], "type": "Feature", - "id": "test-coordinates-incorrect", + "id": "test-coordinates-undetectable-reversed", "bbox": [10.0, -10.0, 20.0, 10.0], "geometry": { "type": "Polygon", "coordinates": [ [ - [-10.0, 10.0], # lat, lon (reversed) + [-10.0, 10.0], # lat, lon (reversed) but within valid ranges [-10.0, 20.0], [10.0, 20.0], [10.0, 10.0], @@ -757,12 +757,12 @@ def test_geometry_coordinates_order(): linter = Linter(correct_item) assert linter.check_geometry_coordinates_order() == True - # Test with incorrect coordinates that are within valid ranges - # This will now fail with our enhanced heuristic - linter = Linter(incorrect_item) + # Test with reversed coordinates that are within valid ranges + # Current implementation can't detect this case, so the test passes + linter = Linter(undetectable_reversed_item) assert ( linter.check_geometry_coordinates_order() == True - ) # Still passes as values are within valid ranges + ) # Passes because values are within valid ranges # Test with clearly incorrect coordinates - this should fail linter = Linter(clearly_incorrect_item) From f241681156a4431dcc5b39cf51ebeeaefb2b0b3d Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 1 Jun 2025 19:19:50 +0800 Subject: [PATCH 7/8] definite error check --- stac_check/lint.py | 157 ++++++++++++++++++++++++++++----------------- tests/test_lint.py | 51 +++++++++++---- 2 files changed, 139 insertions(+), 69 deletions(-) diff --git a/stac_check/lint.py b/stac_check/lint.py index f23cd06..4d26176 100644 --- a/stac_check/lint.py +++ b/stac_check/lint.py @@ -633,31 +633,27 @@ def check_catalog_file_name(self) -> bool: else: return True - def check_geometry_coordinates_order(self) -> bool: - """Checks if the coordinates in a geometry may be in the incorrect order. + def check_geometry_coordinates_definite_errors(self) -> bool: + """Checks if the coordinates in a geometry contain definite errors. - This function attempts to detect cases where coordinates might not follow the GeoJSON - specification where positions should be in [longitude, latitude] order. It uses several - heuristics to identify potentially problematic coordinates: + This function checks for coordinates that definitely violate the GeoJSON specification: - 1. Checks if latitude values (second element) exceed ±90 degrees - 2. Checks if longitude values (first element) exceed ±180 degrees - 3. Uses a heuristic to detect when coordinates are likely reversed - (when first value > 90, second value < 90, and first value > second value*2) + 1. Latitude values (second element) exceed ±90 degrees + 2. Longitude values (first element) exceed ±180 degrees - Note that this check can never definitively determine if coordinates are reversed - or simply contain errors, it can only flag suspicious patterns. + This check focuses on definite errors rather than potential/likely errors. + For checking potential errors (likely reversed coordinates), use check_geometry_coordinates_order(). Returns: - bool: True if coordinates appear to be in the expected order, False if they may be reversed. + bool: True if coordinates are within valid ranges, False if they contain definite errors. """ - if "geometry" not in self.data or self.data["geometry"] is None: + if "geometry" not in self.data or self.data.get("geometry") is None: return True - geometry = self.data["geometry"] + geometry = self.data.get("geometry") - # Function to check a single coordinate pair - def is_valid_coordinate(coord): + # Function to check a single coordinate pair for definite errors + def is_within_valid_ranges(coord): if len(coord) < 2: return True # Not enough elements to check @@ -671,9 +667,49 @@ def is_valid_coordinate(coord): if abs(lon) > 180: return False - # Additional heuristic for likely reversed coordinates - # If the first value (supposed longitude) is > 90, second value (supposed latitude) is < 90, - # and first value is significantly larger than second value, they may be reversed + return True + + # Function to recursively check all coordinates in a geometry + def check_coordinates(coords): + if isinstance(coords, list): + if coords and isinstance(coords[0], (int, float)): + # This is a single coordinate + return is_within_valid_ranges(coords) + else: + # This is a list of coordinates or a list of lists of coordinates + return all(check_coordinates(coord) for coord in coords) + return True + + return check_coordinates(geometry.get("coordinates", [])) + + def check_geometry_coordinates_order(self) -> bool: + """Checks if the coordinates in a geometry may be in the incorrect order. + + This function uses a heuristic to detect coordinates that are likely in the wrong order + (latitude, longitude instead of longitude, latitude). It looks for cases where: + - The first value (supposed to be longitude) is > 90 degrees + - The second value (supposed to be latitude) is < 90 degrees + - The first value is more than twice the second value + + For checking definite errors (values outside valid ranges), use check_geometry_coordinates_definite_errors(). + + Returns: + bool: True if coordinates appear to be in the correct order, False if they may be reversed. + """ + if "geometry" not in self.data or self.data.get("geometry") is None: + return True + + geometry = self.data.get("geometry") + + # Function to check if a single coordinate pair is likely in the correct order + def is_likely_correct_order(coord): + if len(coord) < 2: + return True # Not enough elements to check + + lon, lat = coord[0], coord[1] + + # Heuristic: If the supposed longitude is > 90 and the supposed latitude is < 90, + # and the longitude is more than twice the latitude, it's likely in the correct order if abs(lon) > 90 and abs(lat) < 90 and abs(lon) > abs(lat) * 2: return False @@ -684,7 +720,7 @@ def check_coordinates(coords): if isinstance(coords, list): if coords and isinstance(coords[0], (int, float)): # This is a single coordinate - return is_valid_coordinate(coords) + return is_likely_correct_order(coords) else: # This is a list of coordinates or a list of lists of coordinates return all(check_coordinates(coord) for coord in coords) @@ -692,6 +728,40 @@ def check_coordinates(coords): return check_coordinates(geometry.get("coordinates", [])) + def check_bbox_antimeridian(self) -> bool: + """ + Checks if a bbox that crosses the antimeridian is correctly formatted. + + According to the GeoJSON spec, when a bbox crosses the antimeridian (180°/-180° longitude), + the minimum longitude (bbox[0]) should be greater than the maximum longitude (bbox[2]). + This method checks if this convention is followed correctly. + + Returns: + bool: True if the bbox is valid (either doesn't cross antimeridian or crosses it correctly), + False if it incorrectly crosses the antimeridian. + """ + if "bbox" not in self.data: + return True + + bbox = self.data.get("bbox") + + # Extract the 2D part of the bbox (ignoring elevation if present) + if len(bbox) == 4: # 2D bbox [west, south, east, north] + west, _, east, _ = bbox + elif len(bbox) == 6: # 3D bbox [west, south, min_elev, east, north, max_elev] + west, _, _, east, _, _ = bbox + + # Check if the bbox appears to cross the antimeridian + # This is the case when west > east in a valid bbox that crosses the antimeridian + # For example: [170, -10, -170, 10] crosses the antimeridian correctly + # But [-170, -10, 170, 10] is incorrectly belting the globe + + # Invalid if bbox "belts the globe" (too wide) + if west < east and (east - west) > 180: + return False + # Otherwise, valid (normal or valid antimeridian crossing) + return True + def create_best_practices_dict(self) -> Dict: """Creates a dictionary of best practices violations for the current STAC object. The violations are determined by a set of configurable linting rules specified in the config file. @@ -857,9 +927,17 @@ def create_best_practices_dict(self) -> Dict: not self.check_geometry_coordinates_order() and config["geometry_coordinates_order"] == True ): - msg_1 = "Geometry coordinates may be reversed or contain errors (expected order: longitude, latitude)" + msg_1 = "Geometry coordinates may be in the wrong order (required order: longitude, latitude)" best_practices_dict["geometry_coordinates_order"] = [msg_1] + # best practices - check if geometry coordinates contain definite errors + if ( + not self.check_geometry_coordinates_definite_errors() + and config["geometry_coordinates_order"] == True + ): + msg_1 = "Geometry coordinates contain invalid values that violate the GeoJSON specification (latitude must be between -90 and 90, longitude between -180 and 180)" + best_practices_dict["geometry_coordinates_definite_errors"] = [msg_1] + # Check if a bbox that crosses the antimeridian is correctly formatted if not self.check_bbox_antimeridian() and config.get( "check_bbox_antimeridian", True @@ -881,43 +959,6 @@ def create_best_practices_dict(self) -> Dict: return best_practices_dict - def check_bbox_antimeridian(self) -> bool: - """ - Checks if a bbox that crosses the antimeridian is correctly formatted. - - According to the GeoJSON spec, when a bbox crosses the antimeridian (180°/-180° longitude), - the minimum longitude (bbox[0]) should be greater than the maximum longitude (bbox[2]). - This method checks if this convention is followed correctly. - - Returns: - bool: True if the bbox is valid (either doesn't cross antimeridian or crosses it correctly), - False if it incorrectly crosses the antimeridian. - """ - if "bbox" not in self.data: - return True - - bbox = self.data["bbox"] - - # Extract the 2D part of the bbox (ignoring elevation if present) - if len(bbox) == 4: # 2D bbox [west, south, east, north] - west, south, east, north = bbox - elif len(bbox) == 6: # 3D bbox [west, south, min_elev, east, north, max_elev] - west, south, _, east, north, _ = bbox - else: - # Invalid bbox format, can't check - return True - - # Check if the bbox appears to cross the antimeridian - # This is the case when west > east in a valid bbox that crosses the antimeridian - # For example: [170, -10, -170, 10] crosses the antimeridian correctly - # But [-170, -10, 170, 10] is incorrectly belting the globe - - # Invalid if bbox "belts the globe" (too wide) - if west < east and (east - west) > 180: - return False - # Otherwise, valid (normal or valid antimeridian crossing) - return True - def create_best_practices_msg(self) -> List[str]: """ Generates a list of best practices messages based on the results of the 'create_best_practices_dict' method. diff --git a/tests/test_lint.py b/tests/test_lint.py index edba56d..b96ae92 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -662,7 +662,7 @@ def test_lint_assets_no_links(): def test_geometry_coordinates_order(): - """Test the check_geometry_coordinates_order method for detecting incorrectly ordered coordinates.""" + """Test the check_geometry_coordinates_order method for detecting potentially incorrectly ordered coordinates.""" # Create a test item with coordinates in the correct order (longitude, latitude) correct_item = { "stac_version": "1.0.0", @@ -753,31 +753,60 @@ def test_geometry_coordinates_order(): "properties": {"datetime": "2023-01-01T00:00:00Z"}, } - # Test with correct coordinates - this should pass + # Test with correct coordinates - this should pass both checks linter = Linter(correct_item) assert linter.check_geometry_coordinates_order() == True + assert linter.check_geometry_coordinates_definite_errors() == True # Test with reversed coordinates that are within valid ranges - # Current implementation can't detect this case, so the test passes + # Current implementation can't detect this case, so both checks pass linter = Linter(undetectable_reversed_item) assert ( linter.check_geometry_coordinates_order() == True ) # Passes because values are within valid ranges + assert ( + linter.check_geometry_coordinates_definite_errors() == True + ) # Passes because values are within valid ranges - # Test with clearly incorrect coordinates - this should fail + # Test with clearly incorrect coordinates (latitude > 90) + # This should fail the definite errors check but pass the order check (which now only uses heuristic) linter = Linter(clearly_incorrect_item) - assert linter.check_geometry_coordinates_order() == False + assert ( + linter.check_geometry_coordinates_order() == True + ) # Now passes because it only checks heuristic + assert ( + linter.check_geometry_coordinates_definite_errors() == False + ) # Fails because latitude > 90 - # Test with coordinates that trigger the heuristic - this should fail + # Test with coordinates that trigger the heuristic + # This should fail the order check but pass the definite errors check linter = Linter(heuristic_incorrect_item) - assert linter.check_geometry_coordinates_order() == False + assert ( + linter.check_geometry_coordinates_order() == False + ) # Fails because of heuristic + assert ( + linter.check_geometry_coordinates_definite_errors() == True + ) # Passes because values are within valid ranges - # Test that the best practices dictionary contains the error message + # Test that the best practices dictionary contains the appropriate error messages + best_practices = linter.create_best_practices_dict() + + # For heuristic-based detection + linter = Linter(heuristic_incorrect_item) best_practices = linter.create_best_practices_dict() assert "geometry_coordinates_order" in best_practices - assert best_practices["geometry_coordinates_order"] == [ - "Geometry coordinates may be reversed or contain errors (expected order: longitude, latitude)" - ] + assert ( + "may be in the wrong order" in best_practices["geometry_coordinates_order"][0] + ) + + # For definite errors detection + linter = Linter(clearly_incorrect_item) + best_practices = linter.create_best_practices_dict() + assert "geometry_coordinates_definite_errors" in best_practices + assert ( + "contain invalid values" + in best_practices["geometry_coordinates_definite_errors"][0] + ) def test_bbox_antimeridian(): From 9e4ef9abc6d6e9a800ec1a3d0fd3c9c089bb66a3 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 1 Jun 2025 23:24:07 +0800 Subject: [PATCH 8/8] create geometry checks config --- CHANGELOG.md | 12 +++ README.md | 56 ++++++++--- stac_check/cli.py | 9 ++ stac_check/lint.py | 154 ++++++++++++++++++++++++++----- stac_check/stac-check.config.yml | 13 ++- tests/test.config.yml | 17 +++- tests/test_config.py | 9 +- tests/test_lint.py | 12 ++- 8 files changed, 231 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 920ed06..318a4f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) ## Unreleased +## [v1.7.0] - 2025-06-01 + ### Added - Added validation for bounding boxes that cross the antimeridian (180°/-180° longitude) ([#121](https://github.com/stac-utils/stac-check/pull/121)) @@ -19,6 +21,16 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) - Checks that coordinates follow the GeoJSON specification with [longitude, latitude] order - Uses heuristics to identify coordinates that may be reversed or contain errors - Provides nuanced error messages acknowledging the uncertainty in coordinate validation +- Added validation for definite geometry coordinate errors ([#125](https://github.com/stac-utils/stac-check/pull/125)) + - Detects coordinates with latitude values exceeding ±90 degrees + - Detects coordinates with longitude values exceeding ±180 degrees + - Returns detailed information about invalid coordinates +- Added dedicated geometry validation configuration section ([#125](https://github.com/stac-utils/stac-check/pull/125)) + - Created a new `geometry_validation` section in the configuration file + - Added a master enable/disable switch for all geometry validation checks + - Reorganized geometry validation options into the new section + - Separated geometry validation errors in CLI output with a [BETA] label + - Added detailed documentation for geometry validation features - Added `--pydantic` option for validating STAC objects using stac-pydantic models, providing enhanced type checking and validation ([#126](https://github.com/stac-utils/stac-check/pull/126)) ### Enhanced diff --git a/README.md b/README.md index 263d5f5..4dcea5d 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ The intent of this project is to provide a validation tool that also follows the - [Usage](#usage) - [CLI Usage](#cli-usage) - [Configuration](#configuration) + - [Geometry Validation](#geometry-validation) - [Python API Usage](#python-api-usage) - [Examples](#examples) - [Basic Validation](#basic-validation) @@ -94,10 +95,11 @@ Options: stac-check uses a configuration file to control which validation checks are performed. By default, it uses the built-in configuration at `stac_check/stac-check.config.yml`. You can customize the validation behavior by creating your own configuration file. -The configuration file has two main sections: +The configuration file has three main sections: -1. **linting**: Controls which best practices checks are enabled -2. **settings**: Configures thresholds for certain checks +1. **linting**: Controls which general best practices checks are enabled +2. **geometry_validation**: Controls geometry-specific validation checks [BETA] +3. **settings**: Configures thresholds for certain checks Here's an example of the configuration options: @@ -105,7 +107,7 @@ Here's an example of the configuration options: linting: # Identifiers should consist of only lowercase characters, numbers, '_', and '-' searchable_identifiers: true - # Item name should not contain ':' or '/' + # Item name '{self.object_id}' should not contain ':' or '/' percent_encoded: true # Item file names should match their ids item_id_file_name: true @@ -115,29 +117,37 @@ linting: check_summaries: true # Datetime fields should not be set to null null_datetime: true - # Check unlocated items to make sure bbox field is not set + # best practices - check unlocated items to make sure bbox field is not set check_unlocated: true - # Recommend items have a geometry + # best practices - recommend items have a geometry check_geometry: true - # Check to see if there are too many links + # check to see if there are too many links bloated_links: true - # Check for bloated metadata in properties + # best practices - check for bloated metadata in properties bloated_metadata: true - # Ensure thumbnail is a small file size ["png", "jpeg", "jpg", "webp"] + # best practices - ensure thumbnail is a small file size ["png", "jpeg", "jpg", "webp"] check_thumbnail: true - # Ensure that links in catalogs and collections include a title field + # best practices - ensure that links in catalogs and collections include a title field links_title: true - # Ensure that links in catalogs and collections include self link + # best practices - ensure that links in catalogs and collections include self link links_self: true + +geometry_validation: + # Master switch to enable/disable all geometry validation checks + enabled: true # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) geometry_coordinates_order: true + # check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) + geometry_coordinates_definite_errors: true + # check if bbox matches the bounds of the geometry + bbox_geometry_match: true # check if a bbox that crosses the antimeridian is correctly formatted - check_bbox_antimeridian: true + bbox_antimeridian: true settings: - # Number of links before the bloated links warning is shown + # number of links before the bloated links warning is shown max_links: 20 - # Number of properties before the bloated metadata warning is shown + # number of properties before the bloated metadata warning is shown max_properties: 20 ``` @@ -148,6 +158,24 @@ export STAC_CHECK_CONFIG=/path/to/your/config.yml stac-check sample_files/1.0.0/core-item.json ``` +### Geometry Validation + +Geometry validation is a feature of stac-check that allows you to validate the geometry of your STAC items. This feature is enabled by default, but can be disabled by setting `geometry_validation.enabled` to `false` in your configuration file. + +The geometry validation feature checks for the following: + +* Geometry coordinates are potentially ordered incorrectly (longitude, latitude) +* Geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) +* Bbox matches the bounds of the geometry +* Bbox that crosses the antimeridian is correctly formatted + +You can customize the geometry validation behavior by setting the following options in your configuration file: + +* `geometry_validation.geometry_coordinates_order`: Check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) +* `geometry_validation.geometry_coordinates_definite_errors`: Check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) +* `geometry_validation.bbox_geometry_match`: Check if bbox matches the bounds of the geometry +* `geometry_validation.bbox_antimeridian`: Check if a bbox that crosses the antimeridian is correctly formatted + ### Python API Usage ```python diff --git a/stac_check/cli.py b/stac_check/cli.py index 8319e84..c8e7f7f 100644 --- a/stac_check/cli.py +++ b/stac_check/cli.py @@ -141,6 +141,15 @@ def cli_message(linter: Linter) -> None: else: click.secho(message, fg="red") + """ geometry validation errors """ + if linter.geometry_errors_msg: + click.secho() + for message in linter.geometry_errors_msg: + if message == linter.geometry_errors_msg[0]: + click.secho(message, bg="yellow", fg="black") + else: + click.secho(message, fg="red") + if linter.validate_all == True: click.secho() click.secho("Recursive validation has passed!", fg="blue") diff --git a/stac_check/lint.py b/stac_check/lint.py index 4d26176..41fa1ba 100644 --- a/stac_check/lint.py +++ b/stac_check/lint.py @@ -48,6 +48,7 @@ class Linter: object_id (str): A string representing the STAC JSON file's ID. file_name (str): A string representing the name of the file containing the STAC JSON data. best_practices_msg (str): A string representing best practices messages for the STAC JSON file. + geometry_errors_msg (str): A string representing geometry-related error messages for the STAC JSON file. Methods: parse_config(config_file: Optional[str] = None) -> Dict: @@ -124,6 +125,9 @@ def check_summaries(self) -> bool: create_best_practices_msg(self) -> List[str]: Creates a message with best practices recommendations for the STAC JSON file. + + create_geometry_errors_msg(self) -> List[str]: + Creates a message with geometry-related error messages for the STAC JSON file. """ item: Union[str, Dict] @@ -167,6 +171,7 @@ def __post_init__(self): self.object_id = self.data["id"] if "id" in self.data else "" self.file_name = self.get_asset_name(self.item) self.best_practices_msg = self.create_best_practices_msg() + self.geometry_errors_msg = self.create_geometry_errors_msg() @staticmethod def parse_config(config_file: Optional[str] = None) -> Dict: @@ -633,7 +638,9 @@ def check_catalog_file_name(self) -> bool: else: return True - def check_geometry_coordinates_definite_errors(self) -> bool: + def check_geometry_coordinates_definite_errors( + self, + ) -> Union[bool, Tuple[bool, List]]: """Checks if the coordinates in a geometry contain definite errors. This function checks for coordinates that definitely violate the GeoJSON specification: @@ -645,12 +652,15 @@ def check_geometry_coordinates_definite_errors(self) -> bool: For checking potential errors (likely reversed coordinates), use check_geometry_coordinates_order(). Returns: - bool: True if coordinates are within valid ranges, False if they contain definite errors. + Union[bool, Tuple[bool, List]]: + - If no errors: True + - If errors found: (False, list_of_invalid_coordinates) """ if "geometry" not in self.data or self.data.get("geometry") is None: return True geometry = self.data.get("geometry") + invalid_coords = [] # Function to check a single coordinate pair for definite errors def is_within_valid_ranges(coord): @@ -661,10 +671,12 @@ def is_within_valid_ranges(coord): # Check if latitude (second value) is outside the valid range if abs(lat) > 90: + invalid_coords.append((lon, lat, "latitude > ±90°")) return False # Check if longitude (first value) is outside the valid range if abs(lon) > 180: + invalid_coords.append((lon, lat, "longitude > ±180°")) return False return True @@ -680,7 +692,12 @@ def check_coordinates(coords): return all(check_coordinates(coord) for coord in coords) return True - return check_coordinates(geometry.get("coordinates", [])) + result = check_coordinates(geometry.get("coordinates", [])) + + if result: + return True + else: + return (False, invalid_coords) def check_geometry_coordinates_order(self) -> bool: """Checks if the coordinates in a geometry may be in the incorrect order. @@ -772,34 +789,38 @@ def create_best_practices_dict(self) -> Dict: recommendations for how to fix the violations. """ best_practices_dict = {} - config = self.config["linting"] + linting_config = self.config["linting"] + geometry_validation_config = self.config["geometry_validation"] max_links = self.config["settings"]["max_links"] max_properties = self.config["settings"]["max_properties"] # best practices - item ids should only contain searchable identifiers if ( self.check_searchable_identifiers() == False - and config["searchable_identifiers"] == True + and linting_config["searchable_identifiers"] == True ): msg_1 = f"Item name '{self.object_id}' should only contain Searchable identifiers" msg_2 = "Identifiers should consist of only lowercase characters, numbers, '_', and '-'" best_practices_dict["searchable_identifiers"] = [msg_1, msg_2] # best practices - item ids should not contain ':' or '/' characters - if self.check_percent_encoded() and config["percent_encoded"] == True: + if self.check_percent_encoded() and linting_config["percent_encoded"] == True: msg_1 = f"Item name '{self.object_id}' should not contain ':' or '/'" msg_2 = "https://github.com/radiantearth/stac-spec/blob/master/best-practices.md#item-ids" best_practices_dict["percent_encoded"] = [msg_1, msg_2] # best practices - item ids should match file names - if not self.check_item_id_file_name() and config["item_id_file_name"] == True: + if ( + not self.check_item_id_file_name() + and linting_config["item_id_file_name"] == True + ): msg_1 = f"Item file names should match their ids: '{self.file_name}' not equal to '{self.object_id}" best_practices_dict["check_item_id"] = [msg_1] # best practices - collection and catalog file names should be collection.json and catalog.json if ( self.check_catalog_file_name() == False - and config["catalog_id_file_name"] == True + and linting_config["catalog_id_file_name"] == True ): msg_1 = f"Object should be called '{self.asset_type.lower()}.json' not '{self.file_name}.json'" best_practices_dict["check_catalog_id"] = [msg_1] @@ -808,24 +829,24 @@ def create_best_practices_dict(self) -> Dict: if ( self.asset_type == "COLLECTION" and self.check_summaries() == False - and config["check_summaries"] == True + and linting_config["check_summaries"] == True ): msg_1 = "A STAC collection should contain a summaries field" msg_2 = "It is recommended to store information like eo:bands in summaries" best_practices_dict["check_summaries"] = [msg_1, msg_2] # best practices - datetime fields should not be set to null - if self.check_datetime_null() and config["null_datetime"] == True: + if self.check_datetime_null() and linting_config["null_datetime"] == True: msg_1 = "Please avoid setting the datetime field to null, many clients search on this field" best_practices_dict["datetime_null"] = [msg_1] # best practices - check unlocated items to make sure bbox field is not set - if self.check_unlocated() and config["check_unlocated"] == True: + if self.check_unlocated() and linting_config["check_unlocated"] == True: msg_1 = "Unlocated item. Please avoid setting the bbox field when geometry is set to null" best_practices_dict["check_unlocated"] = [msg_1] # best practices - recommend items have a geometry - if self.check_geometry_null() and config["check_geometry"] == True: + if self.check_geometry_null() and linting_config["check_geometry"] == True: msg_1 = "All items should have a geometry field. STAC is not meant for non-spatial data" best_practices_dict["null_geometry"] = [msg_1] @@ -838,7 +859,11 @@ def create_best_practices_dict(self) -> Dict: else: bbox_mismatch = not bbox_check_result - if bbox_mismatch and config.get("check_bbox_geometry_match", True) == True: + if ( + bbox_mismatch + and geometry_validation_config.get("check_bbox_geometry_match", True) + == True + ): if isinstance(bbox_check_result, tuple): # Unpack the result _, calc_bbox, actual_bbox, differences = bbox_check_result @@ -888,7 +913,7 @@ def create_best_practices_dict(self) -> Dict: # check to see if there are too many links if ( self.check_bloated_links(max_links=max_links) - and config["bloated_links"] == True + and linting_config["bloated_links"] == True ): msg_1 = f"You have {len(self.data['links'])} links. Please consider using sub-collections or sub-catalogs" best_practices_dict["bloated_links"] = [msg_1] @@ -896,7 +921,7 @@ def create_best_practices_dict(self) -> Dict: # best practices - check for bloated metadata in properties if ( self.check_bloated_metadata(max_properties=max_properties) - and config["bloated_metadata"] == True + and linting_config["bloated_metadata"] == True ): msg_1 = f"You have {len(self.data['properties'])} properties. Please consider using links to avoid bloated metadata" best_practices_dict["bloated_metadata"] = [msg_1] @@ -905,41 +930,68 @@ def create_best_practices_dict(self) -> Dict: if ( not self.check_thumbnail() and self.asset_type == "ITEM" - and config["check_thumbnail"] == True + and linting_config["check_thumbnail"] == True ): msg_1 = "A thumbnail should have a small file size ie. png, jpeg, jpg, webp" best_practices_dict["check_thumbnail"] = [msg_1] # best practices - ensure that links in catalogs and collections include a title field - if not self.check_links_title_field() and config["links_title"] == True: + if not self.check_links_title_field() and linting_config["links_title"] == True: msg_1 = ( "Links in catalogs and collections should always have a 'title' field" ) best_practices_dict["check_links_title"] = [msg_1] # best practices - ensure that links in catalogs and collections include self link - if not self.check_links_self() and config["links_self"] == True: + if not self.check_links_self() and linting_config["links_self"] == True: msg_1 = "A link to 'self' in links is strongly recommended" best_practices_dict["check_links_self"] = [msg_1] # best practices - ensure that geometry coordinates are in the correct order if ( not self.check_geometry_coordinates_order() - and config["geometry_coordinates_order"] == True + and geometry_validation_config["geometry_coordinates_order"] == True ): msg_1 = "Geometry coordinates may be in the wrong order (required order: longitude, latitude)" best_practices_dict["geometry_coordinates_order"] = [msg_1] # best practices - check if geometry coordinates contain definite errors + definite_errors_result = self.check_geometry_coordinates_definite_errors() + + # Check if we have a separate config entry for definite errors, otherwise use the same as order check + config_key = "geometry_coordinates_definite_errors" + if config_key not in geometry_validation_config: + config_key = "geometry_coordinates_order" + if ( - not self.check_geometry_coordinates_definite_errors() - and config["geometry_coordinates_order"] == True + isinstance(definite_errors_result, tuple) + and not definite_errors_result[0] + and geometry_validation_config[config_key] ): + # We have definite errors with invalid coordinates + _, invalid_coords = definite_errors_result + + # Base message + msg_1 = "Geometry coordinates contain invalid values that violate the GeoJSON specification (latitude must be between -90 and 90, longitude between -180 and 180)" + + # Add details about invalid coordinates (limit to first 5 to avoid excessive output) + messages = [msg_1] + for i, (lon, lat, reason) in enumerate(invalid_coords[:5]): + messages.append(f"Invalid coordinate: [{lon}, {lat}] - {reason}") + + if len(invalid_coords) > 5: + messages.append( + f"...and {len(invalid_coords) - 5} more invalid coordinates" + ) + + best_practices_dict["geometry_coordinates_definite_errors"] = messages + elif definite_errors_result is False and geometry_validation_config[config_key]: + # Simple case (backward compatibility) msg_1 = "Geometry coordinates contain invalid values that violate the GeoJSON specification (latitude must be between -90 and 90, longitude between -180 and 180)" best_practices_dict["geometry_coordinates_definite_errors"] = [msg_1] # Check if a bbox that crosses the antimeridian is correctly formatted - if not self.check_bbox_antimeridian() and config.get( + if not self.check_bbox_antimeridian() and geometry_validation_config.get( "check_bbox_antimeridian", True ): # Get the bbox values to include in the error message @@ -972,9 +1024,65 @@ def create_best_practices_msg(self) -> List[str]: base_string = "STAC Best Practices: " best_practices.append(base_string) - for _, v in self.create_best_practices_dict().items(): + best_practices_dict = self.create_best_practices_dict() + + # Filter out geometry-related errors as they will be displayed separately + geometry_keys = [ + "geometry_coordinates_order", + "geometry_coordinates_definite_errors", + "check_bbox_antimeridian", + "check_bbox_geometry_match", + ] + filtered_dict = { + k: v for k, v in best_practices_dict.items() if k not in geometry_keys + } + + for _, v in filtered_dict.items(): for value in v: best_practices.extend([" " + value]) best_practices.extend([""]) return best_practices + + def create_geometry_errors_msg(self) -> List[str]: + """ + Generates a list of geometry-related error messages based on the results of the 'create_best_practices_dict' method. + + This separates geometry coordinate validation errors from other best practices for clearer presentation. + + Returns: + A list of strings, where each string contains a geometry error message. Each message starts with the + 'Geometry Validation Errors [BETA]:' base string and is followed by specific details. Each message is indented + with four spaces, and there is an empty string between each message for readability. + """ + # Check if geometry validation is enabled + geometry_config = self.config.get("geometry_validation", {}) + if not geometry_config.get("enabled", True): + return [] # Geometry validation is disabled + + geometry_errors = list() + base_string = "Geometry Validation Errors [BETA]: " + geometry_errors.append(base_string) + + best_practices_dict = self.create_best_practices_dict() + + # Extract only geometry-related errors + geometry_keys = [ + "geometry_coordinates_order", + "geometry_coordinates_definite_errors", + "check_bbox_antimeridian", + "check_bbox_geometry_match", + ] + geometry_dict = { + k: v for k, v in best_practices_dict.items() if k in geometry_keys + } + + if not geometry_dict: + return [] # No geometry errors found + + for _, v in geometry_dict.items(): + for value in v: + geometry_errors.extend([" " + value]) + geometry_errors.extend([""]) + + return geometry_errors diff --git a/stac_check/stac-check.config.yml b/stac_check/stac-check.config.yml index 3af4a36..522b429 100644 --- a/stac_check/stac-check.config.yml +++ b/stac_check/stac-check.config.yml @@ -15,8 +15,6 @@ linting: check_unlocated: true # best practices - recommend items have a geometry check_geometry: true - # best practices - check if bbox matches the bounds of the geometry - check_bbox_geometry_match: true # check to see if there are too many links bloated_links: true # best practices - check for bloated metadata in properties @@ -27,10 +25,19 @@ linting: links_title: true # best practices - ensure that links in catalogs and collections include self link links_self: true + +# Geometry validation settings [BETA] +geometry_validation: + # Master switch to enable/disable all geometry validation checks + enabled: true # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) geometry_coordinates_order: true + # check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) + geometry_coordinates_definite_errors: true + # check if bbox matches the bounds of the geometry + bbox_geometry_match: true # check if a bbox that crosses the antimeridian is correctly formatted - check_bbox_antimeridian: true + bbox_antimeridian: true settings: # number of links before the bloated links warning is shown diff --git a/tests/test.config.yml b/tests/test.config.yml index 0e31d65..522b429 100644 --- a/tests/test.config.yml +++ b/tests/test.config.yml @@ -1,6 +1,6 @@ linting: # Identifiers should consist of only lowercase characters, numbers, '_', and '-' - searchable_identifiers: false + searchable_identifiers: true # Item name '{self.object_id}' should not contain ':' or '/' percent_encoded: true # Item file names should match their ids @@ -15,8 +15,6 @@ linting: check_unlocated: true # best practices - recommend items have a geometry check_geometry: true - # best practices - check if bbox matches the bounds of the geometry - check_bbox_geometry_match: true # check to see if there are too many links bloated_links: true # best practices - check for bloated metadata in properties @@ -27,13 +25,22 @@ linting: links_title: true # best practices - ensure that links in catalogs and collections include self link links_self: true + +# Geometry validation settings [BETA] +geometry_validation: + # Master switch to enable/disable all geometry validation checks + enabled: true # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) geometry_coordinates_order: true + # check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) + geometry_coordinates_definite_errors: true + # check if bbox matches the bounds of the geometry + bbox_geometry_match: true # check if a bbox that crosses the antimeridian is correctly formatted - check_bbox_antimeridian: true + bbox_antimeridian: true settings: # number of links before the bloated links warning is shown - max_links: 200 + max_links: 20 # number of properties before the bloated metadata warning is shown max_properties: 20 \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py index 75906eb..9dccc4c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -15,8 +15,9 @@ def test_linter_config_file(): # Load config file linter = Linter(file, config_file="tests/test.config.yml") - assert linter.config["linting"]["searchable_identifiers"] == False - assert "searchable_identifiers" not in linter.create_best_practices_dict() + assert linter.config["linting"]["searchable_identifiers"] == True + # Since searchable_identifiers is True, the error should be in the best practices dict + assert "searchable_identifiers" in linter.create_best_practices_dict() def test_linter_max_links(): @@ -28,4 +29,6 @@ def test_linter_max_links(): # Load config file linter = Linter(file, config_file="tests/test.config.yml") - assert "bloated_links" not in linter.create_best_practices_dict() + # Since bloated_links is True in the config and the file has more links than max_links, + # bloated_links should be in the best practices dict + assert "bloated_links" in linter.create_best_practices_dict() diff --git a/tests/test_lint.py b/tests/test_lint.py index b96ae92..dbb66da 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -774,9 +774,15 @@ def test_geometry_coordinates_order(): assert ( linter.check_geometry_coordinates_order() == True ) # Now passes because it only checks heuristic - assert ( - linter.check_geometry_coordinates_definite_errors() == False - ) # Fails because latitude > 90 + + # Check that definite errors are detected + result = linter.check_geometry_coordinates_definite_errors() + assert result is not True # Should not be True + assert isinstance(result, tuple) # Should be a tuple + assert result[0] is False # First element should be False + assert len(result[1]) > 0 # Should have at least one invalid coordinate + assert result[1][0][1] == 100.0 # The latitude value should be 100.0 + assert "latitude > ±90°" in result[1][0][2] # Should indicate latitude error # Test with coordinates that trigger the heuristic # This should fail the order check but pass the definite errors check