diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d1d8cf..318a4f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) ## Unreleased +## [v1.7.0] - 2025-06-01 + ### Added - Added validation for bounding boxes that cross the antimeridian (180°/-180° longitude) ([#121](https://github.com/stac-utils/stac-check/pull/121)) @@ -15,6 +17,20 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) - Added sponsors and supporters section with logos ([#122](https://github.com/stac-utils/stac-check/pull/122)) - Added check to verify that bbox matches item's polygon geometry ([#123](https://github.com/stac-utils/stac-check/pull/123)) - Added configuration documentation to README ([#124](https://github.com/stac-utils/stac-check/pull/124)) +- Added validation for geometry coordinates order to detect potentially reversed lat/lon coordinates ([#125](https://github.com/stac-utils/stac-check/pull/125)) + - Checks that coordinates follow the GeoJSON specification with [longitude, latitude] order + - Uses heuristics to identify coordinates that may be reversed or contain errors + - Provides nuanced error messages acknowledging the uncertainty in coordinate validation +- Added validation for definite geometry coordinate errors ([#125](https://github.com/stac-utils/stac-check/pull/125)) + - Detects coordinates with latitude values exceeding ±90 degrees + - Detects coordinates with longitude values exceeding ±180 degrees + - Returns detailed information about invalid coordinates +- Added dedicated geometry validation configuration section ([#125](https://github.com/stac-utils/stac-check/pull/125)) + - Created a new `geometry_validation` section in the configuration file + - Added a master enable/disable switch for all geometry validation checks + - Reorganized geometry validation options into the new section + - Separated geometry validation errors in CLI output with a [BETA] label + - Added detailed documentation for geometry validation features - Added `--pydantic` option for validating STAC objects using stac-pydantic models, providing enhanced type checking and validation ([#126](https://github.com/stac-utils/stac-check/pull/126)) ### Enhanced diff --git a/README.md b/README.md index 3c525fb..4dcea5d 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ The intent of this project is to provide a validation tool that also follows the - [Usage](#usage) - [CLI Usage](#cli-usage) - [Configuration](#configuration) + - [Geometry Validation](#geometry-validation) - [Python API Usage](#python-api-usage) - [Examples](#examples) - [Basic Validation](#basic-validation) @@ -94,10 +95,11 @@ Options: stac-check uses a configuration file to control which validation checks are performed. By default, it uses the built-in configuration at `stac_check/stac-check.config.yml`. You can customize the validation behavior by creating your own configuration file. -The configuration file has two main sections: +The configuration file has three main sections: -1. **linting**: Controls which best practices checks are enabled -2. **settings**: Configures thresholds for certain checks +1. **linting**: Controls which general best practices checks are enabled +2. **geometry_validation**: Controls geometry-specific validation checks [BETA] +3. **settings**: Configures thresholds for certain checks Here's an example of the configuration options: @@ -105,7 +107,7 @@ Here's an example of the configuration options: linting: # Identifiers should consist of only lowercase characters, numbers, '_', and '-' searchable_identifiers: true - # Item name should not contain ':' or '/' + # Item name '{self.object_id}' should not contain ':' or '/' percent_encoded: true # Item file names should match their ids item_id_file_name: true @@ -115,27 +117,37 @@ linting: check_summaries: true # Datetime fields should not be set to null null_datetime: true - # Check unlocated items to make sure bbox field is not set + # best practices - check unlocated items to make sure bbox field is not set check_unlocated: true - # Check if bbox matches the bounds of the geometry - check_bbox_geometry_match: true - # Check to see if there are too many links + # best practices - recommend items have a geometry + check_geometry: true + # check to see if there are too many links bloated_links: true - # Check for bloated metadata in properties + # best practices - check for bloated metadata in properties bloated_metadata: true - # Ensure thumbnail is a small file size ["png", "jpeg", "jpg", "webp"] + # best practices - ensure thumbnail is a small file size ["png", "jpeg", "jpg", "webp"] check_thumbnail: true - # Ensure that links in catalogs and collections include a title field + # best practices - ensure that links in catalogs and collections include a title field links_title: true - # Ensure that links in catalogs and collections include self link + # best practices - ensure that links in catalogs and collections include self link links_self: true + +geometry_validation: + # Master switch to enable/disable all geometry validation checks + enabled: true + # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) + geometry_coordinates_order: true + # check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) + geometry_coordinates_definite_errors: true + # check if bbox matches the bounds of the geometry + bbox_geometry_match: true # check if a bbox that crosses the antimeridian is correctly formatted - check_bbox_antimeridian: true + bbox_antimeridian: true settings: - # Number of links before the bloated links warning is shown + # number of links before the bloated links warning is shown max_links: 20 - # Number of properties before the bloated metadata warning is shown + # number of properties before the bloated metadata warning is shown max_properties: 20 ``` @@ -146,6 +158,24 @@ export STAC_CHECK_CONFIG=/path/to/your/config.yml stac-check sample_files/1.0.0/core-item.json ``` +### Geometry Validation + +Geometry validation is a feature of stac-check that allows you to validate the geometry of your STAC items. This feature is enabled by default, but can be disabled by setting `geometry_validation.enabled` to `false` in your configuration file. + +The geometry validation feature checks for the following: + +* Geometry coordinates are potentially ordered incorrectly (longitude, latitude) +* Geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) +* Bbox matches the bounds of the geometry +* Bbox that crosses the antimeridian is correctly formatted + +You can customize the geometry validation behavior by setting the following options in your configuration file: + +* `geometry_validation.geometry_coordinates_order`: Check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) +* `geometry_validation.geometry_coordinates_definite_errors`: Check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) +* `geometry_validation.bbox_geometry_match`: Check if bbox matches the bounds of the geometry +* `geometry_validation.bbox_antimeridian`: Check if a bbox that crosses the antimeridian is correctly formatted + ### Python API Usage ```python diff --git a/stac_check/cli.py b/stac_check/cli.py index 8319e84..c8e7f7f 100644 --- a/stac_check/cli.py +++ b/stac_check/cli.py @@ -141,6 +141,15 @@ def cli_message(linter: Linter) -> None: else: click.secho(message, fg="red") + """ geometry validation errors """ + if linter.geometry_errors_msg: + click.secho() + for message in linter.geometry_errors_msg: + if message == linter.geometry_errors_msg[0]: + click.secho(message, bg="yellow", fg="black") + else: + click.secho(message, fg="red") + if linter.validate_all == True: click.secho() click.secho("Recursive validation has passed!", fg="blue") diff --git a/stac_check/lint.py b/stac_check/lint.py index 7fbc564..41fa1ba 100644 --- a/stac_check/lint.py +++ b/stac_check/lint.py @@ -48,6 +48,7 @@ class Linter: object_id (str): A string representing the STAC JSON file's ID. file_name (str): A string representing the name of the file containing the STAC JSON data. best_practices_msg (str): A string representing best practices messages for the STAC JSON file. + geometry_errors_msg (str): A string representing geometry-related error messages for the STAC JSON file. Methods: parse_config(config_file: Optional[str] = None) -> Dict: @@ -124,6 +125,9 @@ def check_summaries(self) -> bool: create_best_practices_msg(self) -> List[str]: Creates a message with best practices recommendations for the STAC JSON file. + + create_geometry_errors_msg(self) -> List[str]: + Creates a message with geometry-related error messages for the STAC JSON file. """ item: Union[str, Dict] @@ -167,6 +171,7 @@ def __post_init__(self): self.object_id = self.data["id"] if "id" in self.data else "" self.file_name = self.get_asset_name(self.item) self.best_practices_msg = self.create_best_practices_msg() + self.geometry_errors_msg = self.create_geometry_errors_msg() @staticmethod def parse_config(config_file: Optional[str] = None) -> Dict: @@ -633,6 +638,147 @@ def check_catalog_file_name(self) -> bool: else: return True + def check_geometry_coordinates_definite_errors( + self, + ) -> Union[bool, Tuple[bool, List]]: + """Checks if the coordinates in a geometry contain definite errors. + + This function checks for coordinates that definitely violate the GeoJSON specification: + + 1. Latitude values (second element) exceed ±90 degrees + 2. Longitude values (first element) exceed ±180 degrees + + This check focuses on definite errors rather than potential/likely errors. + For checking potential errors (likely reversed coordinates), use check_geometry_coordinates_order(). + + Returns: + Union[bool, Tuple[bool, List]]: + - If no errors: True + - If errors found: (False, list_of_invalid_coordinates) + """ + if "geometry" not in self.data or self.data.get("geometry") is None: + return True + + geometry = self.data.get("geometry") + invalid_coords = [] + + # Function to check a single coordinate pair for definite errors + def is_within_valid_ranges(coord): + if len(coord) < 2: + return True # Not enough elements to check + + lon, lat = coord[0], coord[1] + + # Check if latitude (second value) is outside the valid range + if abs(lat) > 90: + invalid_coords.append((lon, lat, "latitude > ±90°")) + return False + + # Check if longitude (first value) is outside the valid range + if abs(lon) > 180: + invalid_coords.append((lon, lat, "longitude > ±180°")) + return False + + return True + + # Function to recursively check all coordinates in a geometry + def check_coordinates(coords): + if isinstance(coords, list): + if coords and isinstance(coords[0], (int, float)): + # This is a single coordinate + return is_within_valid_ranges(coords) + else: + # This is a list of coordinates or a list of lists of coordinates + return all(check_coordinates(coord) for coord in coords) + return True + + result = check_coordinates(geometry.get("coordinates", [])) + + if result: + return True + else: + return (False, invalid_coords) + + def check_geometry_coordinates_order(self) -> bool: + """Checks if the coordinates in a geometry may be in the incorrect order. + + This function uses a heuristic to detect coordinates that are likely in the wrong order + (latitude, longitude instead of longitude, latitude). It looks for cases where: + - The first value (supposed to be longitude) is > 90 degrees + - The second value (supposed to be latitude) is < 90 degrees + - The first value is more than twice the second value + + For checking definite errors (values outside valid ranges), use check_geometry_coordinates_definite_errors(). + + Returns: + bool: True if coordinates appear to be in the correct order, False if they may be reversed. + """ + if "geometry" not in self.data or self.data.get("geometry") is None: + return True + + geometry = self.data.get("geometry") + + # Function to check if a single coordinate pair is likely in the correct order + def is_likely_correct_order(coord): + if len(coord) < 2: + return True # Not enough elements to check + + lon, lat = coord[0], coord[1] + + # Heuristic: If the supposed longitude is > 90 and the supposed latitude is < 90, + # and the longitude is more than twice the latitude, it's likely in the correct order + if abs(lon) > 90 and abs(lat) < 90 and abs(lon) > abs(lat) * 2: + return False + + return True + + # Function to recursively check all coordinates in a geometry + def check_coordinates(coords): + if isinstance(coords, list): + if coords and isinstance(coords[0], (int, float)): + # This is a single coordinate + return is_likely_correct_order(coords) + else: + # This is a list of coordinates or a list of lists of coordinates + return all(check_coordinates(coord) for coord in coords) + return True + + return check_coordinates(geometry.get("coordinates", [])) + + def check_bbox_antimeridian(self) -> bool: + """ + Checks if a bbox that crosses the antimeridian is correctly formatted. + + According to the GeoJSON spec, when a bbox crosses the antimeridian (180°/-180° longitude), + the minimum longitude (bbox[0]) should be greater than the maximum longitude (bbox[2]). + This method checks if this convention is followed correctly. + + Returns: + bool: True if the bbox is valid (either doesn't cross antimeridian or crosses it correctly), + False if it incorrectly crosses the antimeridian. + """ + if "bbox" not in self.data: + return True + + bbox = self.data.get("bbox") + + # Extract the 2D part of the bbox (ignoring elevation if present) + if len(bbox) == 4: # 2D bbox [west, south, east, north] + west, _, east, _ = bbox + elif len(bbox) == 6: # 3D bbox [west, south, min_elev, east, north, max_elev] + west, _, _, east, _, _ = bbox + + # Check if the bbox appears to cross the antimeridian + # This is the case when west > east in a valid bbox that crosses the antimeridian + # For example: [170, -10, -170, 10] crosses the antimeridian correctly + # But [-170, -10, 170, 10] is incorrectly belting the globe + + # Invalid if bbox "belts the globe" (too wide) + if west < east and (east - west) > 180: + return False + # Otherwise, valid (normal or valid antimeridian crossing) + return True + def create_best_practices_dict(self) -> Dict: """Creates a dictionary of best practices violations for the current STAC object. The violations are determined by a set of configurable linting rules specified in the config file. @@ -643,34 +789,38 @@ def create_best_practices_dict(self) -> Dict: recommendations for how to fix the violations. """ best_practices_dict = {} - config = self.config["linting"] + linting_config = self.config["linting"] + geometry_validation_config = self.config["geometry_validation"] max_links = self.config["settings"]["max_links"] max_properties = self.config["settings"]["max_properties"] # best practices - item ids should only contain searchable identifiers if ( self.check_searchable_identifiers() == False - and config["searchable_identifiers"] == True + and linting_config["searchable_identifiers"] == True ): msg_1 = f"Item name '{self.object_id}' should only contain Searchable identifiers" msg_2 = "Identifiers should consist of only lowercase characters, numbers, '_', and '-'" best_practices_dict["searchable_identifiers"] = [msg_1, msg_2] # best practices - item ids should not contain ':' or '/' characters - if self.check_percent_encoded() and config["percent_encoded"] == True: + if self.check_percent_encoded() and linting_config["percent_encoded"] == True: msg_1 = f"Item name '{self.object_id}' should not contain ':' or '/'" msg_2 = "https://github.com/radiantearth/stac-spec/blob/master/best-practices.md#item-ids" best_practices_dict["percent_encoded"] = [msg_1, msg_2] # best practices - item ids should match file names - if not self.check_item_id_file_name() and config["item_id_file_name"] == True: + if ( + not self.check_item_id_file_name() + and linting_config["item_id_file_name"] == True + ): msg_1 = f"Item file names should match their ids: '{self.file_name}' not equal to '{self.object_id}" best_practices_dict["check_item_id"] = [msg_1] # best practices - collection and catalog file names should be collection.json and catalog.json if ( self.check_catalog_file_name() == False - and config["catalog_id_file_name"] == True + and linting_config["catalog_id_file_name"] == True ): msg_1 = f"Object should be called '{self.asset_type.lower()}.json' not '{self.file_name}.json'" best_practices_dict["check_catalog_id"] = [msg_1] @@ -679,24 +829,24 @@ def create_best_practices_dict(self) -> Dict: if ( self.asset_type == "COLLECTION" and self.check_summaries() == False - and config["check_summaries"] == True + and linting_config["check_summaries"] == True ): msg_1 = "A STAC collection should contain a summaries field" msg_2 = "It is recommended to store information like eo:bands in summaries" best_practices_dict["check_summaries"] = [msg_1, msg_2] # best practices - datetime fields should not be set to null - if self.check_datetime_null() and config["null_datetime"] == True: + if self.check_datetime_null() and linting_config["null_datetime"] == True: msg_1 = "Please avoid setting the datetime field to null, many clients search on this field" best_practices_dict["datetime_null"] = [msg_1] # best practices - check unlocated items to make sure bbox field is not set - if self.check_unlocated() and config["check_unlocated"] == True: + if self.check_unlocated() and linting_config["check_unlocated"] == True: msg_1 = "Unlocated item. Please avoid setting the bbox field when geometry is set to null" best_practices_dict["check_unlocated"] = [msg_1] # best practices - recommend items have a geometry - if self.check_geometry_null() and config["check_geometry"] == True: + if self.check_geometry_null() and linting_config["check_geometry"] == True: msg_1 = "All items should have a geometry field. STAC is not meant for non-spatial data" best_practices_dict["null_geometry"] = [msg_1] @@ -709,7 +859,11 @@ def create_best_practices_dict(self) -> Dict: else: bbox_mismatch = not bbox_check_result - if bbox_mismatch and config.get("check_bbox_geometry_match", True) == True: + if ( + bbox_mismatch + and geometry_validation_config.get("check_bbox_geometry_match", True) + == True + ): if isinstance(bbox_check_result, tuple): # Unpack the result _, calc_bbox, actual_bbox, differences = bbox_check_result @@ -759,7 +913,7 @@ def create_best_practices_dict(self) -> Dict: # check to see if there are too many links if ( self.check_bloated_links(max_links=max_links) - and config["bloated_links"] == True + and linting_config["bloated_links"] == True ): msg_1 = f"You have {len(self.data['links'])} links. Please consider using sub-collections or sub-catalogs" best_practices_dict["bloated_links"] = [msg_1] @@ -767,7 +921,7 @@ def create_best_practices_dict(self) -> Dict: # best practices - check for bloated metadata in properties if ( self.check_bloated_metadata(max_properties=max_properties) - and config["bloated_metadata"] == True + and linting_config["bloated_metadata"] == True ): msg_1 = f"You have {len(self.data['properties'])} properties. Please consider using links to avoid bloated metadata" best_practices_dict["bloated_metadata"] = [msg_1] @@ -776,25 +930,68 @@ def create_best_practices_dict(self) -> Dict: if ( not self.check_thumbnail() and self.asset_type == "ITEM" - and config["check_thumbnail"] == True + and linting_config["check_thumbnail"] == True ): msg_1 = "A thumbnail should have a small file size ie. png, jpeg, jpg, webp" best_practices_dict["check_thumbnail"] = [msg_1] # best practices - ensure that links in catalogs and collections include a title field - if not self.check_links_title_field() and config["links_title"] == True: + if not self.check_links_title_field() and linting_config["links_title"] == True: msg_1 = ( "Links in catalogs and collections should always have a 'title' field" ) best_practices_dict["check_links_title"] = [msg_1] # best practices - ensure that links in catalogs and collections include self link - if not self.check_links_self() and config["links_self"] == True: + if not self.check_links_self() and linting_config["links_self"] == True: msg_1 = "A link to 'self' in links is strongly recommended" best_practices_dict["check_links_self"] = [msg_1] + # best practices - ensure that geometry coordinates are in the correct order + if ( + not self.check_geometry_coordinates_order() + and geometry_validation_config["geometry_coordinates_order"] == True + ): + msg_1 = "Geometry coordinates may be in the wrong order (required order: longitude, latitude)" + best_practices_dict["geometry_coordinates_order"] = [msg_1] + + # best practices - check if geometry coordinates contain definite errors + definite_errors_result = self.check_geometry_coordinates_definite_errors() + + # Check if we have a separate config entry for definite errors, otherwise use the same as order check + config_key = "geometry_coordinates_definite_errors" + if config_key not in geometry_validation_config: + config_key = "geometry_coordinates_order" + + if ( + isinstance(definite_errors_result, tuple) + and not definite_errors_result[0] + and geometry_validation_config[config_key] + ): + # We have definite errors with invalid coordinates + _, invalid_coords = definite_errors_result + + # Base message + msg_1 = "Geometry coordinates contain invalid values that violate the GeoJSON specification (latitude must be between -90 and 90, longitude between -180 and 180)" + + # Add details about invalid coordinates (limit to first 5 to avoid excessive output) + messages = [msg_1] + for i, (lon, lat, reason) in enumerate(invalid_coords[:5]): + messages.append(f"Invalid coordinate: [{lon}, {lat}] - {reason}") + + if len(invalid_coords) > 5: + messages.append( + f"...and {len(invalid_coords) - 5} more invalid coordinates" + ) + + best_practices_dict["geometry_coordinates_definite_errors"] = messages + elif definite_errors_result is False and geometry_validation_config[config_key]: + # Simple case (backward compatibility) + msg_1 = "Geometry coordinates contain invalid values that violate the GeoJSON specification (latitude must be between -90 and 90, longitude between -180 and 180)" + best_practices_dict["geometry_coordinates_definite_errors"] = [msg_1] + # Check if a bbox that crosses the antimeridian is correctly formatted - if not self.check_bbox_antimeridian() and config.get( + if not self.check_bbox_antimeridian() and geometry_validation_config.get( "check_bbox_antimeridian", True ): # Get the bbox values to include in the error message @@ -814,43 +1011,6 @@ def create_best_practices_dict(self) -> Dict: return best_practices_dict - def check_bbox_antimeridian(self) -> bool: - """ - Checks if a bbox that crosses the antimeridian is correctly formatted. - - According to the GeoJSON spec, when a bbox crosses the antimeridian (180°/-180° longitude), - the minimum longitude (bbox[0]) should be greater than the maximum longitude (bbox[2]). - This method checks if this convention is followed correctly. - - Returns: - bool: True if the bbox is valid (either doesn't cross antimeridian or crosses it correctly), - False if it incorrectly crosses the antimeridian. - """ - if "bbox" not in self.data: - return True - - bbox = self.data["bbox"] - - # Extract the 2D part of the bbox (ignoring elevation if present) - if len(bbox) == 4: # 2D bbox [west, south, east, north] - west, south, east, north = bbox - elif len(bbox) == 6: # 3D bbox [west, south, min_elev, east, north, max_elev] - west, south, _, east, north, _ = bbox - else: - # Invalid bbox format, can't check - return True - - # Check if the bbox appears to cross the antimeridian - # This is the case when west > east in a valid bbox that crosses the antimeridian - # For example: [170, -10, -170, 10] crosses the antimeridian correctly - # But [-170, -10, 170, 10] is incorrectly belting the globe - - # Invalid if bbox "belts the globe" (too wide) - if west < east and (east - west) > 180: - return False - # Otherwise, valid (normal or valid antimeridian crossing) - return True - def create_best_practices_msg(self) -> List[str]: """ Generates a list of best practices messages based on the results of the 'create_best_practices_dict' method. @@ -864,9 +1024,65 @@ def create_best_practices_msg(self) -> List[str]: base_string = "STAC Best Practices: " best_practices.append(base_string) - for _, v in self.create_best_practices_dict().items(): + best_practices_dict = self.create_best_practices_dict() + + # Filter out geometry-related errors as they will be displayed separately + geometry_keys = [ + "geometry_coordinates_order", + "geometry_coordinates_definite_errors", + "check_bbox_antimeridian", + "check_bbox_geometry_match", + ] + filtered_dict = { + k: v for k, v in best_practices_dict.items() if k not in geometry_keys + } + + for _, v in filtered_dict.items(): for value in v: best_practices.extend([" " + value]) best_practices.extend([""]) return best_practices + + def create_geometry_errors_msg(self) -> List[str]: + """ + Generates a list of geometry-related error messages based on the results of the 'create_best_practices_dict' method. + + This separates geometry coordinate validation errors from other best practices for clearer presentation. + + Returns: + A list of strings, where each string contains a geometry error message. Each message starts with the + 'Geometry Validation Errors [BETA]:' base string and is followed by specific details. Each message is indented + with four spaces, and there is an empty string between each message for readability. + """ + # Check if geometry validation is enabled + geometry_config = self.config.get("geometry_validation", {}) + if not geometry_config.get("enabled", True): + return [] # Geometry validation is disabled + + geometry_errors = list() + base_string = "Geometry Validation Errors [BETA]: " + geometry_errors.append(base_string) + + best_practices_dict = self.create_best_practices_dict() + + # Extract only geometry-related errors + geometry_keys = [ + "geometry_coordinates_order", + "geometry_coordinates_definite_errors", + "check_bbox_antimeridian", + "check_bbox_geometry_match", + ] + geometry_dict = { + k: v for k, v in best_practices_dict.items() if k in geometry_keys + } + + if not geometry_dict: + return [] # No geometry errors found + + for _, v in geometry_dict.items(): + for value in v: + geometry_errors.extend([" " + value]) + geometry_errors.extend([""]) + + return geometry_errors diff --git a/stac_check/stac-check.config.yml b/stac_check/stac-check.config.yml index f90dc5c..522b429 100644 --- a/stac_check/stac-check.config.yml +++ b/stac_check/stac-check.config.yml @@ -15,8 +15,6 @@ linting: check_unlocated: true # best practices - recommend items have a geometry check_geometry: true - # best practices - check if bbox matches the bounds of the geometry - check_bbox_geometry_match: true # check to see if there are too many links bloated_links: true # best practices - check for bloated metadata in properties @@ -27,8 +25,19 @@ linting: links_title: true # best practices - ensure that links in catalogs and collections include self link links_self: true + +# Geometry validation settings [BETA] +geometry_validation: + # Master switch to enable/disable all geometry validation checks + enabled: true + # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) + geometry_coordinates_order: true + # check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) + geometry_coordinates_definite_errors: true + # check if bbox matches the bounds of the geometry + bbox_geometry_match: true # check if a bbox that crosses the antimeridian is correctly formatted - check_bbox_antimeridian: true + bbox_antimeridian: true settings: # number of links before the bloated links warning is shown diff --git a/tests/test.config.yml b/tests/test.config.yml index 8872352..522b429 100644 --- a/tests/test.config.yml +++ b/tests/test.config.yml @@ -1,6 +1,6 @@ linting: # Identifiers should consist of only lowercase characters, numbers, '_', and '-' - searchable_identifiers: false + searchable_identifiers: true # Item name '{self.object_id}' should not contain ':' or '/' percent_encoded: true # Item file names should match their ids @@ -15,8 +15,6 @@ linting: check_unlocated: true # best practices - recommend items have a geometry check_geometry: true - # best practices - check if bbox matches the bounds of the geometry - check_bbox_geometry_match: true # check to see if there are too many links bloated_links: true # best practices - check for bloated metadata in properties @@ -27,11 +25,22 @@ linting: links_title: true # best practices - ensure that links in catalogs and collections include self link links_self: true + +# Geometry validation settings [BETA] +geometry_validation: + # Master switch to enable/disable all geometry validation checks + enabled: true + # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) + geometry_coordinates_order: true + # check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) + geometry_coordinates_definite_errors: true + # check if bbox matches the bounds of the geometry + bbox_geometry_match: true # check if a bbox that crosses the antimeridian is correctly formatted - check_bbox_antimeridian: true + bbox_antimeridian: true settings: # number of links before the bloated links warning is shown - max_links: 200 + max_links: 20 # number of properties before the bloated metadata warning is shown max_properties: 20 \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py index 75906eb..9dccc4c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -15,8 +15,9 @@ def test_linter_config_file(): # Load config file linter = Linter(file, config_file="tests/test.config.yml") - assert linter.config["linting"]["searchable_identifiers"] == False - assert "searchable_identifiers" not in linter.create_best_practices_dict() + assert linter.config["linting"]["searchable_identifiers"] == True + # Since searchable_identifiers is True, the error should be in the best practices dict + assert "searchable_identifiers" in linter.create_best_practices_dict() def test_linter_max_links(): @@ -28,4 +29,6 @@ def test_linter_max_links(): # Load config file linter = Linter(file, config_file="tests/test.config.yml") - assert "bloated_links" not in linter.create_best_practices_dict() + # Since bloated_links is True in the config and the file has more links than max_links, + # bloated_links should be in the best practices dict + assert "bloated_links" in linter.create_best_practices_dict() diff --git a/tests/test_lint.py b/tests/test_lint.py index a56f1f7..dbb66da 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -661,6 +661,160 @@ def test_lint_assets_no_links(): } +def test_geometry_coordinates_order(): + """Test the check_geometry_coordinates_order method for detecting potentially incorrectly ordered coordinates.""" + # Create a test item with coordinates in the correct order (longitude, latitude) + correct_item = { + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "test-coordinates-correct", + "bbox": [10.0, -10.0, 20.0, 10.0], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [10.0, -10.0], # lon, lat + [20.0, -10.0], + [20.0, 10.0], + [10.0, 10.0], + [10.0, -10.0], + ] + ], + }, + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + } + + # Create a test item with coordinates in the wrong order (latitude, longitude) + # but with values that don't trigger the validation checks + undetectable_reversed_item = { + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "test-coordinates-undetectable-reversed", + "bbox": [10.0, -10.0, 20.0, 10.0], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-10.0, 10.0], # lat, lon (reversed) but within valid ranges + [-10.0, 20.0], + [10.0, 20.0], + [10.0, 10.0], + [-10.0, 10.0], + ] + ], + }, + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + } + + # Create a test item with coordinates that are clearly reversed (latitude > 90) + clearly_incorrect_item = { + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "test-coordinates-clearly-incorrect", + "bbox": [10.0, -10.0, 20.0, 10.0], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [10.0, 100.0], # Second value (latitude) > 90 + [20.0, 100.0], + [20.0, 100.0], + [10.0, 100.0], + [10.0, 100.0], + ] + ], + }, + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + } + + # Create a test item with coordinates that may be reversed based on heuristic + # (first value > 90, second value < 90, first value > second value*2) + heuristic_incorrect_item = { + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "test-coordinates-heuristic-incorrect", + "bbox": [10.0, -10.0, 20.0, 10.0], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [120.0, 40.0], # First value > 90, second < 90, first > second*2 + [120.0, 40.0], + [120.0, 40.0], + [120.0, 40.0], + [120.0, 40.0], + ] + ], + }, + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + } + + # Test with correct coordinates - this should pass both checks + linter = Linter(correct_item) + assert linter.check_geometry_coordinates_order() == True + assert linter.check_geometry_coordinates_definite_errors() == True + + # Test with reversed coordinates that are within valid ranges + # Current implementation can't detect this case, so both checks pass + linter = Linter(undetectable_reversed_item) + assert ( + linter.check_geometry_coordinates_order() == True + ) # Passes because values are within valid ranges + assert ( + linter.check_geometry_coordinates_definite_errors() == True + ) # Passes because values are within valid ranges + + # Test with clearly incorrect coordinates (latitude > 90) + # This should fail the definite errors check but pass the order check (which now only uses heuristic) + linter = Linter(clearly_incorrect_item) + assert ( + linter.check_geometry_coordinates_order() == True + ) # Now passes because it only checks heuristic + + # Check that definite errors are detected + result = linter.check_geometry_coordinates_definite_errors() + assert result is not True # Should not be True + assert isinstance(result, tuple) # Should be a tuple + assert result[0] is False # First element should be False + assert len(result[1]) > 0 # Should have at least one invalid coordinate + assert result[1][0][1] == 100.0 # The latitude value should be 100.0 + assert "latitude > ±90°" in result[1][0][2] # Should indicate latitude error + + # Test with coordinates that trigger the heuristic + # This should fail the order check but pass the definite errors check + linter = Linter(heuristic_incorrect_item) + assert ( + linter.check_geometry_coordinates_order() == False + ) # Fails because of heuristic + assert ( + linter.check_geometry_coordinates_definite_errors() == True + ) # Passes because values are within valid ranges + + # Test that the best practices dictionary contains the appropriate error messages + best_practices = linter.create_best_practices_dict() + + # For heuristic-based detection + linter = Linter(heuristic_incorrect_item) + best_practices = linter.create_best_practices_dict() + assert "geometry_coordinates_order" in best_practices + assert ( + "may be in the wrong order" in best_practices["geometry_coordinates_order"][0] + ) + + # For definite errors detection + linter = Linter(clearly_incorrect_item) + best_practices = linter.create_best_practices_dict() + assert "geometry_coordinates_definite_errors" in best_practices + assert ( + "contain invalid values" + in best_practices["geometry_coordinates_definite_errors"][0] + ) + + def test_bbox_antimeridian(): """Test the check_bbox_antimeridian method for detecting incorrectly formatted bboxes that cross the antimeridian.""" # Create a test item with an incorrectly formatted bbox that belts the globe