diff --git a/CHANGELOG.md b/CHANGELOG.md index da0efd9..3f5f55c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [v0.1.3] - 2022-01-23 - +### Added +- Check for bloated metadata, too many fields in properties +- Check for geometry field, recommend that STAC not be used for non-spatial data + +### Changed +- Changed bloated links check to a boolean to mirror bloated metadata + ## [v0.1.2] - 2022-01-17 - 2022-01-22 ### Added - Check for null datetime diff --git a/sample_files/1.0.0/core-item-bloated.json b/sample_files/1.0.0/core-item-bloated.json new file mode 100644 index 0000000..124d268 --- /dev/null +++ b/sample_files/1.0.0/core-item-bloated.json @@ -0,0 +1,355 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [], + "type": "Feature", + "id": "20201211_223832_CS2", + "bbox": [ + 172.91173669923782, + 1.3438851951615003, + 172.95469614953714, + 1.3690476620161975 + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 172.91173669923782, + 1.3438851951615003 + ], + [ + 172.95469614953714, + 1.3438851951615003 + ], + [ + 172.95469614953714, + 1.3690476620161975 + ], + [ + 172.91173669923782, + 1.3690476620161975 + ], + [ + 172.91173669923782, + 1.3438851951615003 + ] + ] + ] + }, + "properties": { + "title": "Core Item", + "description": "A sample STAC Item that includes examples of all common metadata", + "datetime": null, + "start_datetime": "2020-12-11T22:38:32.125Z", + "end_datetime": "2020-12-11T22:38:32.327Z", + "created": "2020-12-12T01:48:13.725Z", + "updated": "2020-12-12T01:48:13.725Z", + "platform": "cool_sat1", + "instruments": [ + "cool_sensor_v1" + ], + "constllation": "ion", + "missin": "collection 5624", + "gs": 0.512, + "tile": "Core Item", + "desciption": "A sample STAC Item that includes examples of all common metadata", + "dattime": null, + "startdatetime": "2020-12-11T22:38:32.125Z", + "end_dtetime": "2020-12-11T22:38:32.327Z", + "creted": "2020-12-12T01:48:13.725Z", + "updted": "2020-12-12T01:48:13.725Z", + "platorm": "cool_sat1", + "instrments": [ + "cool_sensor_v1" + ], + "constellation": "ion", + "mission": "collection 5624", + "gsd": 0.512 + }, + "collection": "simple-collection", + "links": [ + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + }, + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + } + ], + "assets": { + "analytic": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_analytic.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "4-Band Analytic", + "roles": [ + "data" + ] + }, + "thumbnail": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.jpg", + "title": "Thumbnail", + "type": "image/png", + "roles": [ + "thumbnail" + ] + }, + "visual": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "3-Band Visual", + "roles": [ + "visual" + ] + }, + "udm": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_analytic_udm.tif", + "title": "Unusable Data Mask", + "type": "image/tiff; application=geotiff;" + }, + "json-metadata": { + "href": "http://remotedata.io/catalog/20201211_223832_CS2/extended-metadata.json", + "title": "Extended Metadata", + "type": "application/json", + "roles": [ + "metadata" + ] + }, + "ephemeris": { + "href": "http://cool-sat.com/catalog/20201211_223832_CS2/20201211_223832_CS2.EPH", + "title": "Satellite Ephemeris Metadata" + } + } + } \ No newline at end of file diff --git a/setup.py b/setup.py index f096db6..f293421 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ """ from setuptools import setup, find_packages -__version__ = "0.1.2" +__version__ = "0.1.3" with open("README.md", "r") as fh: long_description = fh.read() diff --git a/stac_check/cli.py b/stac_check/cli.py index 41092c9..bd00e85 100644 --- a/stac_check/cli.py +++ b/stac_check/cli.py @@ -97,7 +97,7 @@ def cli_message(linter): click.secho() - click.secho(f"This object has {linter.num_links} links") + click.secho(f"This object has {len(linter.data['links'])} links") click.secho() diff --git a/stac_check/lint.py b/stac_check/lint.py index 1daf55e..d4e29bf 100644 --- a/stac_check/lint.py +++ b/stac_check/lint.py @@ -30,10 +30,12 @@ def __post_init__(self): self.invalid_link_request = self.check_links_assets(10, "links", "request") if self.links else None self.schema = self.check_schema() self.summaries = self.check_summaries() - self.num_links = self.get_num_links() + self.bloated_links = self.get_bloated_links() + self.bloated_metadata = self.get_bloated_metadata() self.recursive_error_msg = "" self.datetime_null = self.check_datetime() - self.unlocated = self.check_geometry() + self.unlocated = self.check_unlocated() + self.geometry = self.check_geometry() self.validate_all = self.recursive_validation(self.load_data(self.item)) self.object_id = self.return_id() self.file_name = self.get_file_name() @@ -115,11 +117,13 @@ def check_error_message(self): def check_summaries(self): return "summaries" in self.data - def get_num_links(self): + def get_bloated_links(self): if "links" in self.data: - return len(self.data["links"]) - else: - return 0 + return len(self.data["links"]) > 20 + + def get_bloated_metadata(self): + if "properties" in self.data: + return len(self.data["properties"].keys()) > 20 def return_id(self): if "id" in self.data: @@ -135,10 +139,14 @@ def check_datetime(self): else: return False - def check_geometry(self): + def check_unlocated(self): if "geometry" in self.data: return self.data["geometry"] is None and self.data["bbox"] is not None + def check_geometry(self): + if "geometry" in self.data: + return self.data["geometry"] is not None + def get_file_name(self): return os.path.basename(self.item).split('.')[0] @@ -190,13 +198,23 @@ def create_best_practices_msg(self): # best practices - check unlocated items to make sure bbox field is not set if self.unlocated: - string_1 = f" Unlocated item. Please avoid setting the bbox field when goemetry is set to null" + string_1 = f" Unlocated item. Please avoid setting the bbox field when geometry is set to null" + best_practices.extend([string_1, ""]) + + # best practices - recommend items have a geometry + if not self.geometry and self.asset_type == "ITEM": + string_1 = f" All items should have a geometry field. STAC is not meant for non-spatial data" best_practices.extend([string_1, ""]) # check to see if there are too many links - if self.num_links >= 20: - string_1 = f" You have {self.num_links} links. Please consider using sub-collections or sub-catalogs" + if self.bloated_links: + string_1 = f" You have {len(self.data['links'])} links. Please consider using sub-collections or sub-catalogs" string_2 = f" https://github.com/radiantearth/stac-spec/blob/master/best-practices.md#catalog--collection-practices" best_practices.extend([string_1, string_2, ""]) + # best practices - check for bloated metadata in properties + if self.bloated_metadata: + string_1 = f" You have {len(self.data['properties'])} properties. Please consider using links to avoid bloated metadata" + best_practices.extend([string_1, ""]) + return best_practices \ No newline at end of file diff --git a/tests/test_lint.py b/tests/test_lint.py index 5392b9e..2414772 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -98,7 +98,7 @@ def test_linter_catalog(): assert linter.version == "1.0.0" assert linter.valid_stac == True assert linter.asset_type == "CATALOG" - assert linter.num_links == 6 + assert linter.bloated_links == False def test_linter_collection_recursive_remote(): file = "https://raw.githubusercontent.com/stac-utils/pystac/main/tests/data-files/examples/0.9.0/collection-spec/examples/landsat-collection.json" @@ -129,4 +129,15 @@ def test_unlocated_item(): file = "sample_files/1.0.0/core-item-unlocated.json" linter = Linter(file) assert linter.unlocated == True + assert linter.geometry == False +def test_bloated_item(): + file = "sample_files/1.0.0/core-item-bloated.json" + linter = Linter(file) + + assert linter.bloated_metadata == True + assert len(linter.data["properties"]) > 20 + + assert linter.bloated_links == True + assert len(linter.data["links"]) > 20 +