diff --git a/README.rst b/README.rst index 0251db25e..32db553c9 100644 --- a/README.rst +++ b/README.rst @@ -18,11 +18,14 @@ Requirements The following library requirements are installed automatically when glean_parser is installed by `pip`. +- appdirs - Click -- PyYAML -- jsonschema +- diskcache - inflection +- isodate - Jinja2 +- jsonschema +- PyYAML Usage ----- @@ -37,6 +40,12 @@ Read in `metrics.yaml`, translate to kotlin format, and output to `output_dir`: $ glean_parser translate -o output_dir -f kotlin metrics.yaml +Check a glean ping against the ping schema: + +.. code-block:: console + + $ glean_parser check < ping.json + Credits ------- diff --git a/glean_parser/__main__.py b/glean_parser/__main__.py index 3bf80a314..a7d424ae9 100644 --- a/glean_parser/__main__.py +++ b/glean_parser/__main__.py @@ -6,12 +6,14 @@ """Console script for glean_parser.""" +import io from pathlib import Path import sys import click from . import translate as mod_translate +from . import validate_ping @click.command() @@ -55,6 +57,35 @@ def translate(input, format, output): ) +@click.command() +@click.option( + '--schema', + '-s', + type=str, + default=validate_ping.PING_SCHEMA_DEFAULT_URL, + nargs=1, + required=False, + help=( + "HTTP url or file path to glean ping schema. " + "If remote, will cache to disk." + ) +) +def check(schema): + """ + Validate the contents of a glean ping. + + The ping contents are read from stdin, and the validation errors are + written to stdout. + """ + sys.exit( + validate_ping.validate_ping( + io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8'), + io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8'), + schema_url=schema + ) + ) + + @click.group() def main(args=None): """Command line utility for glean_parser.""" @@ -62,6 +93,7 @@ def main(args=None): main.add_command(translate) +main.add_command(check) if __name__ == "__main__": diff --git a/glean_parser/parser.py b/glean_parser/parser.py index 5221e8b96..1c6174835 100644 --- a/glean_parser/parser.py +++ b/glean_parser/parser.py @@ -57,15 +57,7 @@ def _pprint_validation_error(error): @functools.lru_cache(maxsize=1) def _get_metrics_schema(): schema = util.load_yaml_or_json(SCHEMAS_DIR / 'metrics.1-0-0.schema.yaml') - - class NullResolver(jsonschema.RefResolver): - def resolve_remote(self, uri): - if uri in self.store: - return self.store[uri] - if uri == '': - return self.referrer - - resolver = NullResolver.from_schema(schema) + resolver = util.get_null_resolver(schema) validator_class = jsonschema.validators.validator_for(schema) validator_class.check_schema(schema) diff --git a/glean_parser/util.py b/glean_parser/util.py index 81332b8bf..c40aa839e 100644 --- a/glean_parser/util.py +++ b/glean_parser/util.py @@ -7,9 +7,13 @@ import functools import json import sys +import urllib.request +import appdirs +import diskcache import inflection import jinja2 +import jsonschema import yaml @@ -96,3 +100,46 @@ def g(*args, **kwargs): return ValueKeepingGenerator(f(*args, **kwargs)) return g + + +def get_null_resolver(schema): + """ + Returns a JSON Pointer resolver that does nothing. + + This lets us handle the moz: URLs in our schemas. + """ + class NullResolver(jsonschema.RefResolver): + def resolve_remote(self, uri): + if uri in self.store: + return self.store[uri] + if uri == '': + return self.referrer + + return NullResolver.from_schema(schema) + + +def fetch_remote_url(url, cache=True): + """ + Fetches the contents from an HTTP url or local file path, and optionally + caches it to disk. + """ + is_http = url.startswith('http') + + if not is_http: + with open(url, 'r', encoding='utf-8') as fd: + contents = fd.read() + return contents + + if cache: + cache_dir = appdirs.user_cache_dir('glean_parser', 'mozilla') + with diskcache.Cache(cache_dir) as dc: + if url in dc: + return dc[url] + + contents = urllib.request.urlopen(url).read() + + if cache: + with diskcache.Cache(cache_dir) as dc: + dc[url] = contents + + return contents diff --git a/glean_parser/validate_ping.py b/glean_parser/validate_ping.py new file mode 100644 index 000000000..d4e824a00 --- /dev/null +++ b/glean_parser/validate_ping.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Validates the contents of a glean ping against the schema. +""" + +import functools +import io +import json +from pathlib import Path +import sys + +import jsonschema + +from . import util + + +ROOT_DIR = Path(__file__).parent +SCHEMAS_DIR = ROOT_DIR / 'schemas' + + +PING_SCHEMA_DEFAULT_URL = ( + 'https://raw.githubusercontent.com/mozilla-services/' + 'mozilla-pipeline-schemas/dev/schemas/glean/baseline/' + 'baseline.1.schema.json' +) + + +@functools.lru_cache(maxsize=1) +def _get_ping_schema(schema_url): + contents = util.fetch_remote_url( + schema_url, + cache=(schema_url != PING_SCHEMA_DEFAULT_URL) + ) + return json.loads(contents) + + +def _validate_ping(ins, outs, schema_url): + schema = _get_ping_schema(schema_url) + + resolver = util.get_null_resolver(schema) + + document = json.load(ins) + + validator_class = jsonschema.validators.validator_for(schema) + validator = validator_class(schema, resolver=resolver) + + has_error = 0 + for error in validator.iter_errors(document): + outs.write(str(error)) + has_error = 1 + + return has_error + + +def validate_ping(ins, outs=None, schema_url=PING_SCHEMA_DEFAULT_URL): + """ + Validates the contents of a glean ping. + + :param ins: Input stream or file path to the ping contents to validate + :param outs: Output stream to write errors to. (Defaults to stdout) + :param schema_url: HTTP URL or local filesystem path to glean ping schema. + Defaults to the current version of the schema in + mozilla-pipeline-schemas. + :rtype: int 1 if any errors occurred, otherwise 0. + """ + if outs is None: + outs = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + + if isinstance(ins, (str, bytes, Path)): + with open(ins, 'r') as fd: + return _validate_ping(fd, outs, schema_url=schema_url) + else: + return _validate_ping(ins, outs, schema_url=schema_url) diff --git a/setup.py b/setup.py index 4dfe23d5a..b6b34dc36 100755 --- a/setup.py +++ b/setup.py @@ -32,7 +32,9 @@ 'jsonschema>=3.0.0a3', 'inflection>=0.3.1', 'Jinja2>=2.10', - 'isodate>=0.6.0' + 'isodate>=0.6.0', + 'diskcache>=3.1.0', + 'appdirs>=1.4.3' ] setup_requirements = ['pytest-runner', ] diff --git a/tests/test_validate_ping.py b/tests/test_validate_ping.py new file mode 100644 index 000000000..0e528aae6 --- /dev/null +++ b/tests/test_validate_ping.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# Any copyright is dedicated to the Public Domain. +# http://creativecommons.org/publicdomain/zero/1.0/ + +import io +import json + +from glean_parser import validate_ping + + +def test_validate_ping(): + content = { + "experiments": { + "experiment2": { + "branch": "branch_b", + "extra": { + "key": "value" + } + } + }, + "metrics": { + "string": { + "telemetry.string_metric": "foo" + } + }, + "ping_info": { + "ping_type": "metrics", + "telemetry_sdk_build": "0.32.0", + "seq": 0, + "app_build": "test-placeholder", + "client_id": "900b6d8c-34d2-44d4-926d-83bde790474f", + "start_time": "2018-11-19T16:19-05:00", + "end_time": "2018-11-19T16:19-05:00" + } + } + + input = io.StringIO(json.dumps(content)) + output = io.StringIO() + + schema_url = ( + 'https://raw.githubusercontent.com/mozilla-services/' + 'mozilla-pipeline-schemas/3a15121c582ef0cffe430da024a5bf11b7c48740/' + 'schemas/glean/baseline/baseline.1.schema.json' + ) + + assert validate_ping.validate_ping( + input, output, schema_url=schema_url + ) == 0