diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..63f1fe2e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,32 @@ +minimum_pre_commit_version: 1.15.0 +default_language_version: + python: python3.6 +repos: + - + repo: 'https://github.com/ambv/black' + rev: 19.3b0 + hooks: + - id: black + name: Black + exclude: templates/ + args: ['--safe', '-l 100', '.'] + + - + repo: 'https://github.com/timothycrosley/isort.git' + rev: 4.3.21 + hooks: + - id: isort + name: Sort Imports + exclude: templates/ + args: ['-rc', '-m 3', '-tc', '-w 100', '-e'] + - + repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: debug-statements + - id: check-merge-conflict + - id: check-docstring-first + language_version: python3.6 diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..fdc28a6b --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +install: + pip install . + +code-check: + pre-commit run --all-files + +test: + pytest --mypy --cov --cov-report=html --verbose diff --git a/__init__.py b/__init__.py index d12a0301..59d9c1d1 100644 --- a/__init__.py +++ b/__init__.py @@ -3,4 +3,3 @@ """ __author__ = "James Banting" - diff --git a/requirements.txt b/requirements.txt index 32d9d6b9..53f54e01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ requests>=2.19.1 pytest>=3.8.0 +pytest-mypy==0.4.2 +pytest-cov==2.8.1 docopt>=0.6.2 jsonschema>=3.0.1 - +pystac==0.3.3 diff --git a/setup.py b/setup.py index a629e3b6..dc721c34 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ setup( name="stac_validator", version=__version__, - author="James Banting", + author="James Banting, Darren Wiens", author_email="jbanting@sparkgeo.com", description="A package to validate STAC files", license="MIT", @@ -36,8 +36,6 @@ url="https://github.com/sparkgeo/stac-validator", install_requires=requires, packages=["stac_validator"], - entry_points={ - "console_scripts": ["stac_validator = stac_validator.stac_validator:main"] - }, + entry_points={"console_scripts": ["stac_validator = stac_validator.stac_validator:main"]}, tests_require=["pytest"], ) diff --git a/stac_validator/__init__.py b/stac_validator/__init__.py index d18f4098..0a6ea796 100644 --- a/stac_validator/__init__.py +++ b/stac_validator/__init__.py @@ -1 +1,30 @@ -__version__ = '0.0.2' +__version__ = "0.0.2" +import boto3 +import json +import urllib.request +from urllib.parse import urlparse +from pystac import STAC_IO + + +def read_remote_stacs(uri): + """ + Reads STACs from a remote location. To be used to set STAC_IO + Defaults to local storage. + """ + parsed = urlparse(uri) + if parsed.scheme == "s3": + bucket = parsed.netloc + key = parsed.path[1:] + s3 = boto3.resource("s3") + obj = s3.Object(bucket, key) + return obj.get()["Body"].read().decode("utf-8") + if parsed.scheme in ["http", "https"]: + with urllib.request.urlopen(uri) as url: + stac = url.read().decode() + return stac + else: + return STAC_IO.default_read_text_method(uri) + +STAC_IO.read_text_method = read_remote_stacs + +from . import stac_validator \ No newline at end of file diff --git a/stac_validator/stac_utilities.py b/stac_validator/stac_utilities.py index 1589e6ee..41bdb3ab 100644 --- a/stac_validator/stac_utilities.py +++ b/stac_validator/stac_utilities.py @@ -43,14 +43,24 @@ def _determine_verison(self): self.COLLECTION_URL = os.path.join(cdn_base_url, self.filename) else: if self.version in old_versions: - self.CATALOG_URL = os.path.join(git_base_url, f"static-catalog/{self.input_type}/{self.filename}") - self.ITEM_URL = os.path.join(git_base_url, f"json-spec/{self.input_type}/{self.filename}") + self.CATALOG_URL = os.path.join( + git_base_url, f"static-catalog/{self.input_type}/{self.filename}" + ) + self.ITEM_URL = os.path.join( + git_base_url, f"json-spec/{self.input_type}/{self.filename}" + ) else: - self.CATALOG_URL = os.path.join(git_base_url, f"catalog-spec/{self.input_type}/{self.filename}") - self.COLLECTION_URL = os.path.join(git_base_url, f"collection-spec/{self.input_type}/{self.filename}") - self.ITEM_URL = os.path.join(git_base_url, f"item-spec/{self.input_type}/{self.filename}") + self.CATALOG_URL = os.path.join( + git_base_url, f"catalog-spec/{self.input_type}/{self.filename}" + ) + self.COLLECTION_URL = os.path.join( + git_base_url, f"collection-spec/{self.input_type}/{self.filename}" + ) + self.ITEM_URL = os.path.join( + git_base_url, f"item-spec/{self.input_type}/{self.filename}" + ) @staticmethod def fix_stac_item(version, filename): diff --git a/stac_validator/stac_validator.py b/stac_validator/stac_validator.py index c417c7af..930e7ac2 100755 --- a/stac_validator/stac_validator.py +++ b/stac_validator/stac_validator.py @@ -2,20 +2,18 @@ Description: Validate a STAC item or catalog against the STAC specification. Usage: - stac_validator [--spec_dirs STAC_SPEC_DIRS] [--version STAC_VERSION] [--threads NTHREADS] [--verbose] [--timer] [--log_level LOGLEVEL] [--follow] + stac_validator [--spec_host stac_spec_host] [--version STAC_VERSION] [--verbose] [--timer] [--log_level LOGLEVEL] Arguments: stac_file Fully qualified path or url to a STAC file. Options: - -v, --version STAC_VERSION Version to validate against. [default: master] + -v, --version STAC_VERSION Version to validate against. [default: v0.9.0] -h, --help Show this screen. - --spec_dirs STAC_SPEC_DIRS Path(s) to local directory containing specification files. Separate paths with a comma. [default: None] - --threads NTHREADS Number of threads to use. [default: 10] + --spec_host stac_spec_host Path to directory containing specification files. [default: https://cdn.staclint.com] --verbose Verbose output. [default: False] --timer Reports time to validate the STAC. (seconds) --log_level LOGLEVEL Standard level of logging to report. [default: CRITICAL] - --follow Follow any child links and validate those links. [default: False] """ import json @@ -29,11 +27,13 @@ from json.decoder import JSONDecodeError from pathlib import Path from timeit import default_timer +from typing import Tuple from urllib.parse import urljoin, urlparse import requests from docopt import docopt from jsonschema import RefResolutionError, RefResolver, ValidationError, validate +from pystac.serialization import identify_stac_object from .stac_utilities import StacVersion @@ -45,27 +45,31 @@ class VersionException(Exception): class StacValidate: - def __init__(self, stac_file, stac_spec_dirs=None, version="master", log_level="CRITICAL", follow=False): - """ - Validate a STAC file. - :param stac_file: File to validate - :param stac_spec_dirs: List of local specification directories to check for JSON schema files. - :param version: STAC version to validate against. Uses github tags from the stac-spec repo. ex: v0.6.2 - :param log_level: Level of logging to report - :param follow: Follow links in STAC - """ - """ + def __init__( + self, + stac_file: str, + stac_spec_host: str = "https://cdn.staclint.com", + version: str = "0.9.0", + log_level: str = "CRITICAL", + ): + """Validate a STAC file. - :param stac_file: file to validate - :param version: github tag - defaults to master + :param stac_file: File to validate + :type stac_file: str + :param stac_spec_host: Schema host location, defaults to "https://cdn.staclint.com" + :type stac_spec_host: str, optional + :param version: STAC version to validate against, defaults to "0.9.0" + :type version: str, optional + :param log_level: Level of logging to report, defaults to "CRITICAL" + :type log_level: str, optional + :raises ValueError: [description] """ - numeric_log_level = getattr(logging, log_level.upper(), None) if not isinstance(numeric_log_level, int): raise ValueError("Invalid log level: %s" % log_level) logging.basicConfig( - format="%(asctime)s : %(levelname)s : %(thread)d : %(message)s", + format="%(asctime)s : %(levelname)s : %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=numeric_log_level, ) @@ -73,10 +77,7 @@ def __init__(self, stac_file, stac_spec_dirs=None, version="master", log_level=" self.stac_version = version self.stac_file = stac_file.strip() self.dirpath = tempfile.mkdtemp() - self.stac_spec_dirs = self.check_none(stac_spec_dirs) - - self.follow = follow - + self.stac_spec_host = stac_spec_host self.message = [] self.status = { "catalogs": {"valid": 0, "invalid": 0}, @@ -85,175 +86,39 @@ def __init__(self, stac_file, stac_spec_dirs=None, version="master", log_level=" "unknown": 0, } - @staticmethod - def check_none(input): - """ - Checks if the string is None - :param input: input string to check - :return: - """ - if input == "None": - return None - try: - return input.split(",") - except AttributeError as e: - return input - except Exception as e: - logger.Warning("Could not find input file.") + def get_stac_type(self, stac_content: dict) -> str: + """Identify the STAC object type - @lru_cache(maxsize=48) - def fetch_spec(self, spec): - """ - Get the spec file and cache it. - :param spec: name of spec to get - :return: STAC spec in json format + :param stac_content: STAC content dictionary + :type stac_content: dict + :return: STAC object type + :rtype: str """ + stac_object = identify_stac_object(stac_content) + return stac_object.object_type.lower() - if spec == "geojson": - spec_name = "geojson" - elif spec == "catalog": - spec_name = "catalog" - elif spec == "collection": - spec_name = "collection" - else: - spec_name = "item" - - if self.stac_spec_dirs is None: - try: - logging.debug("Gathering STAC specs from remote.") - url = getattr(StacVersion, f"{spec_name}_schema_url") - spec = requests.get(url(self.stac_version)).json() - valid_dir = True - except Exception as error: - logger.exception("STAC Download Error") - raise VersionException(f"Could not download STAC specification files for version: {self.stac_version}") - else: - valid_dir = False - for stac_spec_dir in self.stac_spec_dirs: - # needed for old local specs - if self.stac_version in ["v0.4.0", "v0.4.1", "v0.5.0", "v0.5.1", "v0.5.2"] and spec_name == "item": - spec_name = "stac-item" - if os.path.isfile(os.path.join(stac_spec_dir, spec_name + ".json")): - valid_dir = True - try: - logging.debug("Gathering STAC specs from local directory.") - with open(os.path.join(stac_spec_dir, spec_name + ".json"), "r") as f: - spec = json.load(f) - except FileNotFoundError as error: - try: - logger.critical("Something big messed up") - url = getattr(StacVersion, f"{spec_name}_schema_url") - spec = requests.get(url(self.stac_version)).json() - except: - logger.exception( - "The STAC specification file does not exist or does not match the STAC file you are trying " - "to validate. Please check your stac_spec_dirs path." - ) - sys.exit(1) - except Exception as error: - logging.exception(error) - - # Write the stac file to a filepath. used as absolute links for geojson schmea - if valid_dir: - if spec_name == "geojson": - file_name = os.path.join(self.dirpath, "geojson.json") - else: - file_name = os.path.join(self.dirpath, f"{spec_name}_{self.stac_version.replace('.','_')}.json") - - with open(file_name, "w") as fp: - logging.debug(f"Copying {spec_name} spec from local file to cache") - fp.write(json.dumps(spec)) - - else: - logger.exception( - "The STAC specification file does not exist or does not match the STAC file you are trying " - "to validate. Please check your stac_spec_dirs path." - ) - logging.critical("Exiting.") - sys.exit(1) + def fetch_common_schemas(self, stac_json: dict): + """Fetch additional schemas, linked within a parent schema - return spec - - def validate_json(self, stac_content, stac_schema): - """ - Validate STAC. - :param stac_content: input STAC file content - :param stac_schema of STAC (item, catalog, collection) - :return: validation message + :param stac_json: STAC content dictionary + :type stac_json: dict """ + for i in stac_json["definitions"]["common_metadata"]["allOf"]: + stac_schema = requests.get( + os.path.join(self.stac_spec_host, self.stac_version, i["$ref"]) + ).json() - try: - if "title" in stac_schema and "item" in stac_schema["title"].lower(): - logger.debug("Changing GeoJson definition to reference local file") - # rewrite relative reference to use local geojson file - stac_schema['id'] = f"item_{self.stac_version.replace('.', '_')}.json" - stac_schema["definitions"]["core"]["allOf"][0]["oneOf"][0][ - "$ref" - ] = f"file://{self.dirpath}/geojson.json#/definitions/feature" - logging.info("Validating STAC") - validate(stac_content, stac_schema) - return True, None - except RefResolutionError as error: - # See https://github.com/Julian/jsonschema/issues/362 - # See https://github.com/Julian/jsonschema/issues/313 - # See https://github.com/Julian/jsonschema/issues/98 - # See https://github.com/Julian/jsonschema/issues/343 - try: - self.fetch_spec("geojson") - self.geojson_resolver = RefResolver( - base_uri=f"file://{self.dirpath}/geojson.json", referrer="geojson.json" - ) - validate(stac_content, stac_schema, resolver=self.geojson_resolver) - return True, None - except Exception as error: - logger.exception("A reference resolution error") - return False, f"{error.args}" - except ValidationError as error: - logger.warning("STAC Validation Error") - return False, f"{error.message} of {list(error.path)}" - except Exception as error: - logger.exception("STAC error") - return False, f"{error}" + tmp_schema = os.path.join(self.dirpath, i["$ref"]) + i["$ref"] = f"file://{tmp_schema}" - @staticmethod - def _update_status(old_status, new_status): - """ - Set status messages. - :param old_status: original status - :param new_status: changed status - :return: status dictionary - """ - - old_status["catalogs"]["valid"] += new_status["catalogs"]["valid"] - old_status["catalogs"]["invalid"] += new_status["catalogs"]["invalid"] - old_status["collections"]["valid"] += new_status["collections"]["valid"] - old_status["collections"]["invalid"] += new_status["collections"]["invalid"] - old_status["items"]["valid"] += new_status["items"]["valid"] - old_status["items"]["invalid"] += new_status["items"]["invalid"] - old_status["unknown"] += new_status["unknown"] - return old_status + with open(tmp_schema, "w") as f: + json.dump(stac_schema, f) @staticmethod - def _get_children_urls(stac_content, stac_path): - """ - Return children items or catalog urls. - :param stac_content: contents of STAC file - :param stac_path: path to STAC file - :return: list of urls - """ - - urls = [] + def is_valid_url(url: str) -> bool: + """Check if path is URL or not. - for link in stac_content.get("links", []): - if link["rel"] in ["child", "item"]: - urls.append(urljoin(stac_path, link["href"]).strip()) - return urls - - @staticmethod - def is_valid_url(url): - """ - Check if path is URL or not. - :param url: path to check + :param url: Path to check :return: boolean """ try: @@ -262,11 +127,26 @@ def is_valid_url(url): except Exception as e: return False - def fetch_and_parse_file(self, input_path): + @staticmethod + def create_err_msg(err_type: str, err_msg: str) -> dict: + """Format error message dictionary + + :param err_type: Error type + :type err_type: str + :param err_msg: Error message + :type err_msg: str + :return: Formatted message + :rtype: dict """ - Fetch and parse STAC file. - :param input_path: STAC file to get and read - :return: content or error message + return {"valid_stac": False, "error_type": err_type, "error_message": err_msg} + + def fetch_and_parse_file(self, input_path: str) -> Tuple[dict, dict]: + """Fetch and parse STAC file + + :param input_path: Path to STAC file + :type str: str + :return: STAC content and error message, if necessary + :rtype: Tuple[dict, dict] """ err_message = {} @@ -284,145 +164,69 @@ def fetch_and_parse_file(self, input_path): except JSONDecodeError as e: logger.exception("JSON Decode Error") - err_message["valid_stac"] = False - err_message["error_type"] = "InvalidJSON" - err_message["error_message"] = f"{input_path} is not Valid JSON" + err_message = self.create_err_msg("InvalidJSON", f"{input_path} is not Valid JSON") except FileNotFoundError as e: logger.exception("STAC File Not Found") - err_message["valid_stac"] = False - err_message["error_type"] = "FileNotFoundError" - err_message["error_message"] = f"{input_path} cannot be found" + err_message = self.create_err_msg("FileNotFoundError", f"{input_path} cannot be found") return data, err_message - def _validate(self, stac_path): + def run(self): + """ - Check STAC type and appropriate schema to validate against. - :param stac_path: path to STAC file - :return: JSON message and list of children to (potentially) validate + Entry point. + :return: message json """ - fpath = Path(stac_path) + message = {"path": self.stac_file} - Collections_Fields = ["keywords", "license", "title", "provider", "version", "description", "stac_version"] + stac_content, err_message = self.fetch_and_parse_file(self.stac_file) - message = {} - status = { - "catalogs": {"valid": 0, "invalid": 0}, - "collections": {"valid": 0, "invalid": 0}, - "items": {"valid": 0, "invalid": 0}, - "unknown": 0, - } - - stac_content, err_message = self.fetch_and_parse_file(stac_path) if err_message: - status["unknown"] = 1 - return err_message, status, [] - - # Check STAC Type - if "catalog" in fpath.stem: - # Congratulations, It's a Catalog! - logger.info("STAC is a Catalog") - message["asset_type"] = "catalog" - is_valid_stac, err_message = self.validate_json(stac_content, self.fetch_spec("catalog")) - message["valid_stac"] = is_valid_stac - message["error_message"] = err_message - - if message["valid_stac"]: - status["catalogs"]["valid"] = 1 - else: - status["catalogs"]["invalid"] = 1 + self.status["unknown"] = 1 + message.update(err_message) + self.message = [message] + return json.dumps(self.message) - if self.follow: - children = self._get_children_urls(stac_content, stac_path) - else: - children = [] + self.stac_type = self.get_stac_type(stac_content) + message["asset_type"] = self.stac_type - elif type(stac_content) is dict and any(field in Collections_Fields for field in stac_content.keys()): - # Congratulations, It's a Collection! - # Collections will validate as catalog as well. - logger.info("STAC is a Collection") - message["asset_type"] = "collection" - is_valid_stac, err_message = self.validate_json(stac_content, self.fetch_spec("collection")) + schema_url = os.path.join(self.stac_spec_host, self.stac_version, f"{self.stac_type}.json") + schema_json = requests.get(schema_url).json() + message["schema"] = schema_url - message["valid_stac"] = is_valid_stac - message["error_message"] = err_message + if self.stac_type == "item": + self.fetch_common_schemas(schema_json) - if message["valid_stac"]: - status["collections"]["valid"] = 1 - else: - status["collections"]["invalid"] = 1 - - if self.follow: - children = self._get_children_urls(stac_content, stac_path) - else: - children = [] - - elif "error_type" in message: - pass - - else: - # Congratulations, It's an Item! - logger.info("STAC is an Item") - message["asset_type"] = "item" - self.fetch_spec("geojson") - is_valid_stac, err_message = self.validate_json(stac_content, self.fetch_spec("item")) - message["valid_stac"] = is_valid_stac - message["error_message"] = err_message - - if message["valid_stac"]: - status["items"]["valid"] = 1 - else: - status["items"]["invalid"] = 1 - - children = [] - - message["path"] = stac_path - - return message, status, children - - def run(self, concurrent=10): - """ - Entry point. - :param concurrent: number of threads to use - :return: message json - """ + try: + result = validate(stac_content, schema_json) + self.status[f"{self.stac_type}s"]["valid"] += 1 + message["valid_stac"] = True + except ValidationError as e: + self.status[f"{self.stac_type}s"]["invalid"] += 1 + message.update(self.create_err_msg("ValidationError", e.message)) - children = [self.stac_file] - logger.info(f"Using {concurrent} threads") - while True: - with futures.ThreadPoolExecutor(max_workers=int(concurrent)) as executor: - future_tasks = [executor.submit(self._validate, url) for url in children] - children = [] - for task in futures.as_completed(future_tasks): - message, status, new_children = task.result() - self.status = self._update_status(self.status, status) - self.message.append(message) - children.extend(new_children) - - if not children: - break + self.message.append(message) return json.dumps(self.message) def main(): args = docopt(__doc__) - follow = args.get("--follow") stac_file = args.get("") - stac_spec_dirs = args.get("--spec_dirs", None) + stac_spec_host = args.get("--spec_host", "https://cdn.staclint.com/") version = args.get("--version") verbose = args.get("--verbose") - nthreads = args.get("--threads", 10) timer = args.get("--timer") - log_level = args.get("--log_level", "CRITICAL") + log_level = args.get("--log_level", "DEBUG") if timer: start = default_timer() - stac = StacValidate(stac_file, stac_spec_dirs, version, log_level, follow) - _ = stac.run(nthreads) + stac = StacValidate(stac_file, stac_spec_host, version, log_level) + + _ = stac.run() shutil.rmtree(stac.dirpath) if verbose: diff --git a/tests/__init__.py b/tests/__init__.py index d12a0301..60d16ecc 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -2,5 +2,4 @@ Description: """ -__author__ = "James Banting" - +__author__ = "James Banting, Darren Wiens" diff --git a/tests/test_data/bad_item_v090.json b/tests/test_data/bad_item_v090.json new file mode 100644 index 00000000..9e41858a --- /dev/null +++ b/tests/test_data/bad_item_v090.json @@ -0,0 +1,87 @@ +{ + "stac_version": "0.9.0", + "stac_extensions": [ + "eo", + "view", + "https://example.com/cs-extension/1.0/schema.json" + ], + "type": "Feature", + "bbox": [-122.59750209, 37.48803556, -122.2880486, 37.613537207], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-122.308150179, 37.488035566], + [-122.597502109, 37.538869539], + [-122.576687533, 37.613537207], + [-122.288048600, 37.562818007], + [-122.308150179, 37.488035566] + ] + ] + }, + "properties": { + "datetime": "2016-05-03T13:22:30Z", + "title": "A CS3 item", + "license": "PDDL-1.0", + "providers": [ + { + "name": "CoolSat", + "roles": [ + "producer", + "licensor" + ], + "url": "https://cool-sat.com/" + } + ], + "created": "2016-05-04T00:00:01Z", + "updated": "2017-01-01T00:30:55Z", + "view:sun_azimuth": 168.7, + "eo:cloud_cover": 0.12, + "view:off_nadir": 1.4, + "platform": "coolsat2", + "instruments": ["cool_sensor_v1"], + "eo:bands": [], + "view:sun_elevation": 33.4, + "eo:gsd": 0.512, + "cs:type": "scene", + "cs:anomalous_pixels": 0.14, + "cs:earth_sun_distance": 1.0141560, + "cs:sat_id": "CS3", + "cs:product_level": "LV1B" + }, + "collection": "CS3", + "links": [ + {"rel": "self", "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/CS3-20160503_132130_04.json"}, + {"rel": "root", "href": "http://cool-sat.com/catalog/catalog.json"}, + {"rel": "parent", "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/catalog.json"}, + {"rel": "collection", "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/catalog.json"}, + {"rel": "acquisition", "href": "http://cool-sat.com/catalog/acquisitions/20160503_56"} + ], + "assets": { + "analytic": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/analytic.tif", + "title": "4-Band Analytic" + }, + "thumbnail": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/thumbnail.png", + "title": "Thumbnail", + "type": "image/png", + "roles": [ "thumbnail" ] + }, + "udm": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/UDM.tif", + "title": "Unusable Data Mask" + }, + "json-metadata": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/extended-metadata.json", + "title": "Extended Metadata", + "type": "application/json", + "roles": [ "thumbnail" ] + }, + "ephemeris": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/S3-20160503_132130_04.EPH", + "title": "Satellite Ephemeris Metadata" + } + } + + } diff --git a/tests/test_data/good_item_v090.json b/tests/test_data/good_item_v090.json new file mode 100644 index 00000000..ad1a4d5d --- /dev/null +++ b/tests/test_data/good_item_v090.json @@ -0,0 +1,88 @@ +{ + "stac_version": "0.9.0", + "stac_extensions": [ + "eo", + "view", + "https://example.com/cs-extension/1.0/schema.json" + ], + "type": "Feature", + "id" : "CS3-20160503_132131_05", + "bbox": [-122.59750209, 37.48803556, -122.2880486, 37.613537207], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-122.308150179, 37.488035566], + [-122.597502109, 37.538869539], + [-122.576687533, 37.613537207], + [-122.288048600, 37.562818007], + [-122.308150179, 37.488035566] + ] + ] + }, + "properties": { + "datetime": "2016-05-03T13:22:30Z", + "title": "A CS3 item", + "license": "PDDL-1.0", + "providers": [ + { + "name": "CoolSat", + "roles": [ + "producer", + "licensor" + ], + "url": "https://cool-sat.com/" + } + ], + "created": "2016-05-04T00:00:01Z", + "updated": "2017-01-01T00:30:55Z", + "view:sun_azimuth": 168.7, + "eo:cloud_cover": 0.12, + "view:off_nadir": 1.4, + "platform": "coolsat2", + "instruments": ["cool_sensor_v1"], + "eo:bands": [], + "view:sun_elevation": 33.4, + "eo:gsd": 0.512, + "cs:type": "scene", + "cs:anomalous_pixels": 0.14, + "cs:earth_sun_distance": 1.0141560, + "cs:sat_id": "CS3", + "cs:product_level": "LV1B" + }, + "collection": "CS3", + "links": [ + {"rel": "self", "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/CS3-20160503_132130_04.json"}, + {"rel": "root", "href": "http://cool-sat.com/catalog/catalog.json"}, + {"rel": "parent", "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/catalog.json"}, + {"rel": "collection", "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/catalog.json"}, + {"rel": "acquisition", "href": "http://cool-sat.com/catalog/acquisitions/20160503_56"} + ], + "assets": { + "analytic": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/analytic.tif", + "title": "4-Band Analytic" + }, + "thumbnail": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/thumbnail.png", + "title": "Thumbnail", + "type": "image/png", + "roles": [ "thumbnail" ] + }, + "udm": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/UDM.tif", + "title": "Unusable Data Mask" + }, + "json-metadata": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/extended-metadata.json", + "title": "Extended Metadata", + "type": "application/json", + "roles": [ "thumbnail" ] + }, + "ephemeris": { + "href": "http://cool-sat.com/catalog/CS3-20160503_132130_04/S3-20160503_132130_04.EPH", + "title": "Satellite Ephemeris Metadata" + } + } + + } diff --git a/tests/test_stac_validator.py b/tests/test_stac_validator.py index dc9391fe..8d2e1d93 100644 --- a/tests/test_stac_validator.py +++ b/tests/test_stac_validator.py @@ -8,9 +8,9 @@ def _run_validate( - url, stac_spec_dirs=None, version="master", log_level="DEBUG", follow=False + url, stac_spec_dirs="https://cdn.staclint.com/", version="v0.9.0", log_level="DEBUG" ): - stac = stac_validator.StacValidate(url, stac_spec_dirs, version, log_level, follow) + stac = stac_validator.StacValidate(url, stac_spec_dirs, version, log_level) stac.run() return stac @@ -32,58 +32,43 @@ def test_item_master(): @pytest.mark.item -def test_good_item_validation_v052_verbose(): - stac = _run_validate(url="tests/test_data/good_item_v052.json", version="v0.5.2") +def test_good_item_validation_v090_verbose(): + stac = _run_validate(url="tests/test_data/good_item_v090.json") + print(stac.message) assert stac.message == [ { "asset_type": "item", + "path": "tests/test_data/good_item_v090.json", + "schema": "https://cdn.staclint.com/v0.9.0/item.json", "valid_stac": True, - "error_message": None, - "path": "tests/test_data/good_item_v052.json", } ] @pytest.mark.item -def test_good_item_validation_v060_verbose(): - stac = _run_validate(url="tests/test_data/good_item_v060.json", version="v0.6.0") +def test_bad_item_validation_v090_verbose(): + stac = _run_validate(url="tests/test_data/bad_item_v090.json") assert stac.message == [ { + "path": "tests/test_data/bad_item_v090.json", "asset_type": "item", - "valid_stac": True, - "error_message": None, - "path": "tests/test_data/good_item_v060.json", - } - ] - - -@pytest.mark.item -def test_good_item_validation_v061_verbose(): - stac = _run_validate(url="tests/test_data/good_item_v061.json", version="v0.6.1") - assert stac.message == [ - { - "asset_type": "item", - "valid_stac": True, - "error_message": None, - "path": "tests/test_data/good_item_v061.json", + "schema": "https://cdn.staclint.com/v0.9.0/item.json", + "valid_stac": False, + "error_type": "ValidationError", + "error_message": "'id' is a required property", } ] -@pytest.mark.local_schema @pytest.mark.item -def test_local_schema_item(): - stac = _run_validate( - url="tests/test_data/good_item_v061.json", - version="v0.6.1", - stac_spec_dirs="tests/test_data/local_schema/item_v061/json-schema", - ) +def test_missing_item(): + stac = _run_validate(url="tests/test_data/missing_item_v090.json") assert stac.message == [ { - "asset_type": "item", - "valid_stac": True, - "error_message": None, - "path": "tests/test_data/good_item_v061.json", + "path": "tests/test_data/missing_item_v090.json", + "valid_stac": False, + "error_type": "FileNotFoundError", + "error_message": "tests/test_data/missing_item_v090.json cannot be found", } ] @@ -104,82 +89,6 @@ def test_catalog_master(): } -@pytest.mark.catalog -def test_good_catalog_validation_v052_verbose(): - stac = _run_validate(url="tests/test_data/good_catalog_v052.json", version="v0.5.2") - assert stac.message == [ - { - "asset_type": "catalog", - "valid_stac": True, - "error_message": None, - "path": "tests/test_data/good_catalog_v052.json", - } - ] - - -@pytest.mark.catalog -def test_nested_catalog_v052_follow(): - stac = _run_validate( - url="tests/test_data/nested_catalogs/parent_catalog.json", - version="v0.5.2", - follow=True, - ) - assert stac.status == { - "catalogs": {"valid": 4, "invalid": 1}, - "collections": {"valid": 0, "invalid": 0}, - "items": {"valid": 6, "invalid": 1}, - "unknown": 0, - } - - -@pytest.mark.catalog -def test_nested_catalog_v052(): - stac = _run_validate( - url="tests/test_data/nested_catalogs/parent_catalog.json", version="v0.5.2" - ) - assert stac.status == { - "catalogs": {"valid": 1, "invalid": 0}, - "collections": {"valid": 0, "invalid": 0}, - "items": {"valid": 0, "invalid": 0}, - "unknown": 0, - } - - -@pytest.mark.local_schema -@pytest.mark.catalog -def test_local_schema_catalog(): - stac = _run_validate( - url="tests/test_data/good_catalog_v061.json", - version="v0.6.1", - stac_spec_dirs="tests/test_data/local_schema/catalog_v061/json-schema", - ) - assert stac.message == [ - { - "asset_type": "catalog", - "valid_stac": True, - "error_message": None, - "path": "tests/test_data/good_catalog_v061.json", - } - ] - -@pytest.mark.local_schema -@pytest.mark.catalog -def test_local_schema_catalog_schema_fail(): - stac = _run_validate( - url="tests/test_data/good_catalog_v052.json", - version="v0.6.1", - stac_spec_dirs="tests/test_data/local_schema/catalog_v061/json-schema", - ) - assert stac.message == [ - { - "asset_type": "catalog", - "valid_stac": False, - "error_message": "'stac_version' is a required property of []", - "path": "tests/test_data/good_catalog_v052.json", - } - ] - - # -------------------- COLLECTION -------------------- @@ -194,107 +103,3 @@ def test_collection_master(): "items": {"valid": 0, "invalid": 0}, "unknown": 0, } - - -@pytest.mark.local_schema -@pytest.mark.catalog -def test_local_schema_collection(): - stac = _run_validate( - url="tests/test_data/good_collection_v061.json", - version="v0.6.1", - stac_spec_dirs="tests/test_data/local_schema/collection_v061/json-schema", - ) - assert stac.message == [ - { - "asset_type": "collection", - "valid_stac": True, - "error_message": None, - "path": "tests/test_data/good_collection_v061.json", - } - ] - - -# -------------------- VALIDATOR -------------------- - - -@pytest.mark.validator -def test_bad_url(): - stac = _run_validate( - url="https://s3.amazonaws.com/spacenet-stac/spacenet-dataset/AOI_4_Shanghai_MUL-PanSharpen_Cloud", - version="v0.5.2", - ) - assert stac.status == { - "catalogs": {"valid": 0, "invalid": 0}, - "collections": {"valid": 0, "invalid": 0}, - "items": {"valid": 0, "invalid": 0}, - "unknown": 1, - } - - assert stac.message == [ - { - "valid_stac": False, - "error_type": "InvalidJSON", - "error_message": "https://s3.amazonaws.com/spacenet-stac/spacenet-dataset/AOI_4_Shanghai_MUL-PanSharpen_Cloud is not Valid JSON", - } - ] - -@pytest.mark.validator -@pytest.mark.local_schema -@pytest.mark.catalog -def test_local_schema_catalog_wrong_schema(): - with pytest.raises(SystemExit) as e: - stac = _run_validate( - url="tests/test_data/good_catalog_v052.json", - version="v0.6.1", - stac_spec_dirs="tests/test_data/local_schema/item_v061/json-schema", - ) - assert e.value.code == 1 - -@pytest.mark.validator -@pytest.mark.local_schema -@pytest.mark.multiple_dirs -@pytest.mark.catalog -def test_multiple_local_schema_catalog_wrong_schema(): - with pytest.raises(SystemExit) as e: - stac = _run_validate( - url="tests/test_data/good_catalog_v061.json", - stac_spec_dirs="tests/test_data/local_schema/item_v061/json-schem,tests/test_data/local_schema/catalog_v061/json-schem", - version="v0.6.1", - ) - assert e.value.code == 1 - -@pytest.mark.validator -@pytest.mark.local_schema -@pytest.mark.multiple_dirs -@pytest.mark.catalog -def test_multiple_local_schemas_catalog(): - stac = _run_validate( - url="tests/test_data/good_catalog_v061.json", - stac_spec_dirs="tests/test_data/local_schema/item_v061/json-schema,tests/test_data/local_schema/catalog_v061/json-schema", - version="v0.6.1", - ) - assert stac.status == { - "catalogs": {"valid": 1, "invalid": 0}, - "collections": {"valid": 0, "invalid": 0}, - "items": {"valid": 0, "invalid": 0}, - "unknown": 0, - } - -@pytest.mark.validator -@pytest.mark.local_schema -@pytest.mark.multiple_dirs -@pytest.mark.catalog -def test_multiple_local_schemas_catalog(): - - stac = _run_validate( - url="tests/test_data/nested_catalogs/parent_catalog.json", - stac_spec_dirs="tests/test_data/local_schema/item_v052,tests/test_data/local_schema/catalog_v052", - version="v0.5.2", - follow=True, - ) - assert stac.status == { - "catalogs": {"valid": 4, "invalid": 1}, - "collections": {"valid": 0, "invalid": 0}, - "items": {"valid": 6, "invalid": 1}, - "unknown": 0, - } \ No newline at end of file