Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@ Requirements
The following library requirements are installed automatically when glean_parser
is installed by `pip`.

- appdirs
- Click
- PyYAML
- jsonschema
- diskcache
- inflection
- isodate
- Jinja2
- jsonschema
- PyYAML

Usage
-----
Expand All @@ -37,6 +40,12 @@ Read in `metrics.yaml`, translate to kotlin format, and output to `output_dir`:

$ glean_parser translate -o output_dir -f kotlin metrics.yaml

Check a glean ping against the ping schema:

.. code-block:: console

$ glean_parser check < ping.json

Credits
-------

Expand Down
32 changes: 32 additions & 0 deletions glean_parser/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@

"""Console script for glean_parser."""

import io
from pathlib import Path
import sys

import click

from . import translate as mod_translate
from . import validate_ping


@click.command()
Expand Down Expand Up @@ -55,13 +57,43 @@ def translate(input, format, output):
)


@click.command()
@click.option(
'--schema',
'-s',
type=str,
default=validate_ping.PING_SCHEMA_DEFAULT_URL,
nargs=1,
required=False,
help=(
"HTTP url or file path to glean ping schema. "
"If remote, will cache to disk."
)
)
def check(schema):
"""
Validate the contents of a glean ping.

The ping contents are read from stdin, and the validation errors are
written to stdout.
"""
sys.exit(
validate_ping.validate_ping(
io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8'),
io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8'),
schema_url=schema
)
)


@click.group()
def main(args=None):
"""Command line utility for glean_parser."""
pass


main.add_command(translate)
main.add_command(check)


if __name__ == "__main__":
Expand Down
10 changes: 1 addition & 9 deletions glean_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,7 @@ def _pprint_validation_error(error):
@functools.lru_cache(maxsize=1)
def _get_metrics_schema():
schema = util.load_yaml_or_json(SCHEMAS_DIR / 'metrics.1-0-0.schema.yaml')

class NullResolver(jsonschema.RefResolver):
def resolve_remote(self, uri):
if uri in self.store:
return self.store[uri]
if uri == '':
return self.referrer

resolver = NullResolver.from_schema(schema)
resolver = util.get_null_resolver(schema)

validator_class = jsonschema.validators.validator_for(schema)
validator_class.check_schema(schema)
Expand Down
47 changes: 47 additions & 0 deletions glean_parser/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
import functools
import json
import sys
import urllib.request

import appdirs
import diskcache
import inflection
import jinja2
import jsonschema
import yaml


Expand Down Expand Up @@ -96,3 +100,46 @@ def g(*args, **kwargs):
return ValueKeepingGenerator(f(*args, **kwargs))

return g


def get_null_resolver(schema):
"""
Returns a JSON Pointer resolver that does nothing.

This lets us handle the moz: URLs in our schemas.
"""
class NullResolver(jsonschema.RefResolver):
def resolve_remote(self, uri):
if uri in self.store:
return self.store[uri]
if uri == '':
return self.referrer

return NullResolver.from_schema(schema)


def fetch_remote_url(url, cache=True):
"""
Fetches the contents from an HTTP url or local file path, and optionally
caches it to disk.
"""
is_http = url.startswith('http')

if not is_http:
with open(url, 'r', encoding='utf-8') as fd:
contents = fd.read()
return contents

if cache:
cache_dir = appdirs.user_cache_dir('glean_parser', 'mozilla')
with diskcache.Cache(cache_dir) as dc:
if url in dc:
return dc[url]

contents = urllib.request.urlopen(url).read()

if cache:
with diskcache.Cache(cache_dir) as dc:
dc[url] = contents

return contents
78 changes: 78 additions & 0 deletions glean_parser/validate_ping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Validates the contents of a glean ping against the schema.
"""

import functools
import io
import json
from pathlib import Path
import sys

import jsonschema

from . import util


ROOT_DIR = Path(__file__).parent
SCHEMAS_DIR = ROOT_DIR / 'schemas'


PING_SCHEMA_DEFAULT_URL = (
'https://raw.githubusercontent.com/mozilla-services/'
'mozilla-pipeline-schemas/dev/schemas/glean/baseline/'
'baseline.1.schema.json'
)


@functools.lru_cache(maxsize=1)
def _get_ping_schema(schema_url):
contents = util.fetch_remote_url(
schema_url,
cache=(schema_url != PING_SCHEMA_DEFAULT_URL)
)
return json.loads(contents)


def _validate_ping(ins, outs, schema_url):
schema = _get_ping_schema(schema_url)

resolver = util.get_null_resolver(schema)

document = json.load(ins)

validator_class = jsonschema.validators.validator_for(schema)
validator = validator_class(schema, resolver=resolver)

has_error = 0
for error in validator.iter_errors(document):
outs.write(str(error))
has_error = 1

return has_error


def validate_ping(ins, outs=None, schema_url=PING_SCHEMA_DEFAULT_URL):
"""
Validates the contents of a glean ping.

:param ins: Input stream or file path to the ping contents to validate
:param outs: Output stream to write errors to. (Defaults to stdout)
:param schema_url: HTTP URL or local filesystem path to glean ping schema.
Defaults to the current version of the schema in
mozilla-pipeline-schemas.
:rtype: int 1 if any errors occurred, otherwise 0.
"""
if outs is None:
outs = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

if isinstance(ins, (str, bytes, Path)):
with open(ins, 'r') as fd:
return _validate_ping(fd, outs, schema_url=schema_url)
else:
return _validate_ping(ins, outs, schema_url=schema_url)
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@
'jsonschema>=3.0.0a3',
'inflection>=0.3.1',
'Jinja2>=2.10',
'isodate>=0.6.0'
'isodate>=0.6.0',
'diskcache>=3.1.0',
'appdirs>=1.4.3'
]

setup_requirements = ['pytest-runner', ]
Expand Down
49 changes: 49 additions & 0 deletions tests/test_validate_ping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-

# Any copyright is dedicated to the Public Domain.
# http://creativecommons.org/publicdomain/zero/1.0/

import io
import json

from glean_parser import validate_ping


def test_validate_ping():
content = {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're moving the schema off this repo, let's keep this shorter: let's use a small JSON and provide a sample schema stub.

"experiments": {
"experiment2": {
"branch": "branch_b",
"extra": {
"key": "value"
}
}
},
"metrics": {
"string": {
"telemetry.string_metric": "foo"
}
},
"ping_info": {
"ping_type": "metrics",
"telemetry_sdk_build": "0.32.0",
"seq": 0,
"app_build": "test-placeholder",
"client_id": "900b6d8c-34d2-44d4-926d-83bde790474f",
"start_time": "2018-11-19T16:19-05:00",
"end_time": "2018-11-19T16:19-05:00"
}
}

input = io.StringIO(json.dumps(content))
output = io.StringIO()

schema_url = (
'https://raw.githubusercontent.com/mozilla-services/'
'mozilla-pipeline-schemas/3a15121c582ef0cffe430da024a5bf11b7c48740/'
'schemas/glean/baseline/baseline.1.schema.json'
)

assert validate_ping.validate_ping(
input, output, schema_url=schema_url
) == 0