Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/)
- Added sponsors and supporters section with logos ([#122](https://github.com/stac-utils/stac-check/pull/122))
- Added check to verify that bbox matches item's polygon geometry ([#123](https://github.com/stac-utils/stac-check/pull/123))
- Added configuration documentation to README ([#124](https://github.com/stac-utils/stac-check/pull/124))
- Added `--pydantic` option for validating STAC objects using stac-pydantic models, providing enhanced type checking and validation ([#126](https://github.com/stac-utils/stac-check/pull/126))

### Enhanced

- Improved bbox validation output to show detailed information about mismatches between bbox and geometry bounds, including which specific coordinates differ and by how much ([#126](https://github.com/stac-utils/stac-check/pull/126))

### Updated

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ Options:
(enabled by default).
--header KEY VALUE HTTP header to include in the requests. Can be used
multiple times.
--pydantic Use stac-pydantic for enhanced validation with Pydantic models.
--help Show this message and exit.
```

Expand Down
2 changes: 1 addition & 1 deletion sample_files/1.0.0/bad-item.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
-122.59750209,
37.48803556,
-122.2880486,
37.613537207
37.613531207
],
"geometry": {
"type": "Polygon",
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from setuptools import find_packages, setup

__version__ = "1.6.0"
__version__ = "1.7.0"

with open("README.md", "r") as fh:
long_description = fh.read()
Expand All @@ -20,7 +20,7 @@
"requests>=2.32.3",
"jsonschema>=4.23.0",
"click>=8.1.8",
"stac-validator>=3.6.0",
"stac-validator[pydantic]>=3.7.0",
"PyYAML",
"python-dotenv",
],
Expand Down
27 changes: 25 additions & 2 deletions stac_check/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ def intro_message(linter: Linter) -> None:
f"Validator: stac-validator {linter.validator_version}", bg="blue", fg="white"
)

# Always show validation method
validation_method = (
"Pydantic" if hasattr(linter, "pydantic") and linter.pydantic else "JSONSchema"
)
click.secho()
click.secho(f"Validation method: {validation_method}", bg="yellow", fg="black")

click.secho()


Expand All @@ -111,7 +118,17 @@ def cli_message(linter: Linter) -> None:

""" schemas validated for core object """
click.secho()
if len(linter.schema) > 0:

# Determine if we're using Pydantic validation
using_pydantic = hasattr(linter, "pydantic") and linter.pydantic

# For Pydantic validation, always show the appropriate schema model
if using_pydantic:
click.secho("Schemas validated: ", fg="blue")
asset_type = linter.asset_type.capitalize() if linter.asset_type else "Item"
click.secho(f" stac-pydantic {asset_type} model")
# For JSONSchema validation or when schemas are available
elif len(linter.schema) > 0:
click.secho("Schemas validated: ", fg="blue")
for schema in linter.schema:
click.secho(f" {schema}")
Expand Down Expand Up @@ -194,10 +211,15 @@ def cli_message(linter: Linter) -> None:
multiple=True,
help="HTTP header to include in the requests. Can be used multiple times.",
)
@click.option(
"--pydantic",
is_flag=True,
help="Use stac-pydantic for enhanced validation with Pydantic models.",
)
@click.command()
@click.argument("file")
@click.version_option(version=importlib.metadata.distribution("stac-check").version)
def main(file, recursive, max_depth, assets, links, no_assets_urls, header):
def main(file, recursive, max_depth, assets, links, no_assets_urls, header, pydantic):
linter = Linter(
file,
assets=assets,
Expand All @@ -206,6 +228,7 @@ def main(file, recursive, max_depth, assets, links, no_assets_urls, header):
max_depth=max_depth,
assets_open_urls=not no_assets_urls,
headers=dict(header),
pydantic=pydantic,
)
intro_message(linter)
if recursive > 0:
Expand Down
102 changes: 83 additions & 19 deletions stac_check/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import os
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import requests
import yaml
Expand All @@ -27,6 +27,7 @@ class Linter:
max_depth (Optional[int], optional): An optional integer indicating the maximum depth to validate recursively. Defaults to None.
assets_open_urls (bool): Whether to open assets URLs when validating assets. Defaults to True.
headers (dict): HTTP headers to include in the requests.
pydantic (bool, optional): A boolean value indicating whether to use pydantic validation. Defaults to False.

Attributes:
data (dict): A dictionary representing the STAC JSON file.
Expand Down Expand Up @@ -122,14 +123,15 @@ def check_summaries(self) -> bool:
Creates a message with best practices recommendations for the STAC JSON file.
"""

item: Union[str, dict] # url, file name, or dictionary
item: Union[str, Dict]
config_file: Optional[str] = None
assets: bool = False
links: bool = False
recursive: bool = False
max_depth: Optional[int] = None
assets_open_urls: bool = True
headers: dict = field(default_factory=dict)
headers: Dict = field(default_factory=dict)
pydantic: bool = False

def __post_init__(self):
self.data = self.load_data(self.item)
Expand Down Expand Up @@ -270,16 +272,21 @@ def validate_file(self, file: Union[str, dict]) -> Dict[str, Any]:
assets=self.assets,
assets_open_urls=self.assets_open_urls,
headers=self.headers,
pydantic=self.pydantic,
)
stac.run()
elif isinstance(file, dict):
stac = StacValidate(
assets_open_urls=self.assets_open_urls, headers=self.headers
assets_open_urls=self.assets_open_urls,
headers=self.headers,
pydantic=self.pydantic,
)
stac.validate_dict(file)
else:
raise ValueError("Input must be a file path or STAC dictionary.")
return stac.message[0]

message = stac.message[0]
return message

def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str:
"""Recursively validate a STAC item or catalog file and its child items.
Expand All @@ -302,6 +309,7 @@ def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str:
max_depth=self.max_depth,
assets_open_urls=self.assets_open_urls,
headers=self.headers,
pydantic=self.pydantic,
)
stac.run()
else:
Expand All @@ -310,6 +318,7 @@ def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str:
max_depth=self.max_depth,
assets_open_urls=self.assets_open_urls,
headers=self.headers,
pydantic=self.pydantic,
)
stac.validate_dict(file)
return stac.message
Expand Down Expand Up @@ -454,16 +463,20 @@ def check_geometry_null(self) -> bool:
else:
return False

def check_bbox_matches_geometry(self) -> bool:
def check_bbox_matches_geometry(
self,
) -> Union[bool, Tuple[bool, List[float], List[float], List[float]]]:
"""Checks if the bbox of a STAC item matches its geometry.

This function verifies that the bounding box (bbox) accurately represents
the minimum bounding rectangle of the item's geometry. It only applies to
items with non-null geometry of type Polygon or MultiPolygon.

Returns:
bool: True if the bbox matches the geometry or if the check is not applicable
(e.g., null geometry or non-polygon type). False if there's a mismatch.
Union[bool, Tuple[bool, List[float], List[float], List[float]]]:
- True if the bbox matches the geometry or if the check is not applicable
(e.g., null geometry or non-polygon type).
- When there's a mismatch: a tuple containing (False, calculated_bbox, actual_bbox, differences)
"""
# Skip check if geometry is null or bbox is not present
if (
Expand Down Expand Up @@ -504,11 +517,14 @@ def check_bbox_matches_geometry(self) -> bool:

calc_bbox = [min(lons), min(lats), max(lons), max(lats)]

# Allow for small floating point differences (epsilon)
epsilon = 1e-8
for i in range(4):
if abs(bbox[i] - calc_bbox[i]) > epsilon:
return False
# Allow for differences that would be invisible when rounded to 6 decimal places
# 1e-6 would be exactly at the 6th decimal place, so use 5e-7 to be just under that threshold
epsilon = 5e-7
differences = [abs(bbox[i] - calc_bbox[i]) for i in range(4)]

if any(diff > epsilon for diff in differences):
# Return False along with the calculated bbox, actual bbox, and the differences
return (False, calc_bbox, bbox, differences)

return True

Expand Down Expand Up @@ -675,12 +691,60 @@ def create_best_practices_dict(self) -> Dict:
best_practices_dict["null_geometry"] = [msg_1]

# best practices - check if bbox matches geometry
if (
not self.check_bbox_matches_geometry()
and config.get("check_bbox_geometry_match", True) == True
):
msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
best_practices_dict["bbox_geometry_mismatch"] = [msg_1]
bbox_check_result = self.check_bbox_matches_geometry()
bbox_mismatch = False

if isinstance(bbox_check_result, tuple):
bbox_mismatch = not bbox_check_result[0]
else:
bbox_mismatch = not bbox_check_result

if bbox_mismatch and config.get("check_bbox_geometry_match", True) == True:
if isinstance(bbox_check_result, tuple):
# Unpack the result
_, calc_bbox, actual_bbox, differences = bbox_check_result

# Format the bbox values for display
calc_bbox_str = ", ".join([f"{v:.6f}" for v in calc_bbox])
actual_bbox_str = ", ".join([f"{v:.6f}" for v in actual_bbox])

# Create a more detailed message about which coordinates differ
coordinate_labels = [
"min longitude",
"min latitude",
"max longitude",
"max latitude",
]
mismatch_details = []

# Use the same epsilon threshold as in check_bbox_matches_geometry
epsilon = 5e-7

for i, (diff, label) in enumerate(zip(differences, coordinate_labels)):
if diff > epsilon:
mismatch_details.append(
f"{label}: calculated={calc_bbox[i]:.6f}, actual={actual_bbox[i]:.6f}, diff={diff:.7f}"
)

msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
msg_2 = f"Calculated bbox from geometry: [{calc_bbox_str}]"
msg_3 = f"Actual bbox in metadata: [{actual_bbox_str}]"

messages = [msg_1, msg_2, msg_3]
if mismatch_details:
messages.append("Mismatched coordinates:")
messages.extend(mismatch_details)
else:
# If we got here but there are no visible differences at 6 decimal places,
# add a note explaining that the differences are too small to matter
messages.append(
"Note: The differences are too small to be visible at 6 decimal places and can be ignored."
)

best_practices_dict["bbox_geometry_mismatch"] = messages
else:
msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
best_practices_dict["bbox_geometry_mismatch"] = [msg_1]

# check to see if there are too many links
if (
Expand Down
64 changes: 60 additions & 4 deletions tests/test_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def test_bbox_matches_geometry():
# Test with matching bbox and geometry
file = "sample_files/1.0.0/core-item.json"
linter = Linter(file)
assert linter.check_bbox_matches_geometry() == True
assert linter.check_bbox_matches_geometry() is True

# Test with mismatched bbox and geometry
mismatched_item = {
Expand All @@ -306,7 +306,30 @@ def test_bbox_matches_geometry():
"properties": {"datetime": "2020-12-11T22:38:32.125Z"},
}
linter = Linter(mismatched_item)
assert linter.check_bbox_matches_geometry() == False
result = linter.check_bbox_matches_geometry()

# Check that the result is a tuple and the first element is False
assert isinstance(result, tuple)
assert result[0] is False

# Check that the tuple contains the expected elements (calculated bbox, actual bbox, differences)
assert len(result) == 4
calc_bbox, actual_bbox, differences = result[1], result[2], result[3]

# Verify the calculated bbox matches the geometry coordinates
assert calc_bbox == [
172.91173669923782,
1.3438851951615003,
172.95469614953714,
1.3690476620161975,
]

# Verify the actual bbox is what we provided
assert actual_bbox == [100.0, 0.0, 105.0, 1.0]

# Verify the differences are calculated correctly
expected_differences = [abs(actual_bbox[i] - calc_bbox[i]) for i in range(4)]
assert differences == expected_differences

# Test with null geometry (should return True as check is not applicable)
null_geom_item = {
Expand All @@ -318,7 +341,7 @@ def test_bbox_matches_geometry():
"properties": {"datetime": "2020-12-11T22:38:32.125Z"},
}
linter = Linter(null_geom_item)
assert linter.check_bbox_matches_geometry() == True
assert linter.check_bbox_matches_geometry() is True

# Test with missing bbox (should return True as check is not applicable)
no_bbox_item = {
Expand All @@ -340,7 +363,7 @@ def test_bbox_matches_geometry():
"properties": {"datetime": "2020-12-11T22:38:32.125Z"},
}
linter = Linter(no_bbox_item)
assert linter.check_bbox_matches_geometry() == True
assert linter.check_bbox_matches_geometry() is True


def test_bloated_item():
Expand Down Expand Up @@ -633,3 +656,36 @@ def test_lint_assets_no_links():
"request_invalid": [],
},
}


def test_lint_pydantic_validation_valid():
"""Test pydantic validation with a valid STAC item."""
file = "sample_files/1.0.0/core-item.json"
linter = Linter(file, pydantic=True)

assert linter.valid_stac == True
assert linter.asset_type == "ITEM"
assert "stac-pydantic Item model" in linter.message["schema"]
assert linter.message["validation_method"] == "pydantic"


def test_lint_pydantic_validation_invalid():
"""Test pydantic validation with an invalid STAC item (missing required fields)."""
file = "sample_files/1.0.0/bad-item.json"
linter = Linter(file, pydantic=True)

assert linter.valid_stac == False
assert "PydanticValidationError" in linter.message["error_type"]
assert "id: Field required" in linter.message["error_message"]
assert linter.message["validation_method"] == "pydantic"


def test_lint_pydantic_validation_recursive():
"""Test pydantic validation with recursive option."""
file = "sample_files/1.0.0/collection.json"
linter = Linter(file, recursive=True, max_depth=1, pydantic=True)

assert linter.valid_stac == True
assert linter.asset_type == "COLLECTION"
assert "stac-pydantic Collection model" in linter.message["schema"]
assert linter.message["validation_method"] == "pydantic"