Skip to content
Merged
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ keywords = [
requires-python = ">=3.12"
dependencies = [
"jsonschema>=4.25.1",
"python-jsonpath>=2.0.1",
"types-jsonschema>=4.25.1.20250822",
]

Expand Down
3 changes: 3 additions & 0 deletions src/check_datapackage/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)
from check_datapackage.issue import Issue
from check_datapackage.read_json import read_json
from check_datapackage.rule import apply_rules


def check(
Expand Down Expand Up @@ -38,5 +39,7 @@ class for more details, especially about the default values.
_add_resource_recommendations(schema)

issues = _check_object_against_json_schema(descriptor, schema)
issues += apply_rules(config.rules, descriptor)
issues = exclude(issues, config.exclude)

return sorted(set(issues))
36 changes: 36 additions & 0 deletions src/check_datapackage/internals.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import re
from dataclasses import dataclass
from itertools import chain
from typing import Any, Callable, Iterable, Iterator, TypeVar

from jsonpath import JSONPathMatch, finditer
from jsonschema import Draft7Validator, FormatChecker, ValidationError

from check_datapackage.constants import (
Expand Down Expand Up @@ -135,6 +138,34 @@ def _get_full_json_path_from_error(error: ValidationError) -> str:
return error.json_path


@dataclass
class DescriptorField:
"""A field in the Data Package descriptor.

Attributes:
jsonpath (str): The direct JSON path to the field.
value (str): The value contained in the field.
"""

jsonpath: str
value: Any


def _get_fields_at_jsonpath(
jsonpath: str, descriptor: dict[str, Any]
) -> list[DescriptorField]:
"""Returns all fields that match the JSON path."""
matches = finditer(jsonpath, descriptor)
return _map(matches, _create_descriptor_field)


def _create_descriptor_field(match: JSONPathMatch) -> DescriptorField:
return DescriptorField(
jsonpath=match.path.replace("['", ".").replace("']", ""),
value=match.obj,
)


In = TypeVar("In")
Out = TypeVar("Out")

Expand All @@ -145,3 +176,8 @@ def _map(x: Iterable[In], fn: Callable[[In], Out]) -> list[Out]:

def _filter(x: Iterable[In], fn: Callable[[In], bool]) -> list[In]:
return list(filter(fn, x))


def _flat_map(items: Iterable[In], fn: Callable[[In], Iterable[Out]]) -> list[Out]:
"""Maps and flattens the items by one level."""
return list(chain.from_iterable(map(fn, items)))
44 changes: 44 additions & 0 deletions src/check_datapackage/rule.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
from dataclasses import dataclass
from typing import Any, Callable

from check_datapackage.internals import (
_filter,
_flat_map,
_get_fields_at_jsonpath,
_map,
)
from check_datapackage.issue import Issue


@dataclass
class Rule:
Expand Down Expand Up @@ -34,3 +42,39 @@ class Rule:
message: str
check: Callable[[Any], bool]
type: str = "custom"


def apply_rules(rules: list[Rule], descriptor: dict[str, Any]) -> list[Issue]:
"""Checks the descriptor for all rules and creates issues for fields that fail.

Args:
rules: The rules to apply to the descriptor.
descriptor: The descriptor to check.

Returns:
A list of `Issue`s.
"""
return _flat_map(
rules,
lambda rule: _apply_rule(rule, descriptor),
)


def _apply_rule(rule: Rule, descriptor: dict[str, Any]) -> list[Issue]:
"""Checks the descriptor against the rule and creates issues for fields that fail.

Args:
rule: The rule to apply to the descriptor.
descriptor: The descriptor to check.

Returns:
A list of `Issue`s.
"""
matching_fields = _get_fields_at_jsonpath(rule.jsonpath, descriptor)
failed_fields = _filter(matching_fields, lambda field: not rule.check(field.value))
return _map(
failed_fields,
lambda field: Issue(
jsonpath=field.jsonpath, type=rule.type, message=rule.message
),
)
27 changes: 27 additions & 0 deletions tests/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

from check_datapackage.check import check
from check_datapackage.config import Config
from check_datapackage.examples import example_package_descriptor
from check_datapackage.exclude import Exclude
from tests.test_rule import lowercase_rule

# Without recommendations

Expand Down Expand Up @@ -168,3 +171,27 @@ def test_fails_descriptor_violating_recommendations():
"$.sources[0].title",
"$.resources[0].name",
}


def test_exclude_not_excluding_rule():
descriptor = example_package_descriptor()
descriptor["name"] = "ALLCAPS"
del descriptor["resources"]
exclude_required = Exclude(type="required")
config = Config(rules=[lowercase_rule], exclude=[exclude_required])

issues = check(descriptor, config=config)

assert len(issues) == 1
assert issues[0].type == "lowercase"


def test_exclude_excluding_rule():
descriptor = example_package_descriptor()
descriptor["name"] = "ALLCAPS"
exclude_lowercase = Exclude(type=lowercase_rule.type)
config = Config(rules=[lowercase_rule], exclude=[exclude_lowercase])

issues = check(descriptor, config=config)

assert issues == []
99 changes: 99 additions & 0 deletions tests/test_rule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from check_datapackage.check import check
from check_datapackage.config import Config
from check_datapackage.examples import (
example_package_descriptor,
example_resource_descriptor,
)
from check_datapackage.issue import Issue
from check_datapackage.rule import Rule

lowercase_rule = Rule(
jsonpath="$.name",
message="Name must be lowercase.",
check=lambda name: name.islower(),
type="lowercase",
)
resource_name_rule = Rule(
jsonpath="$.resources[*].name",
message="Resource name must start with 'woolly'.",
check=lambda name: name.startswith("woolly"),
type="resource-name",
)


def test_direct_jsonpath():
descriptor = example_package_descriptor()
descriptor["name"] = "ALLCAPS"
config = Config(rules=[lowercase_rule])
issues = check(descriptor, config=config)

assert issues == [
Issue(
jsonpath=lowercase_rule.jsonpath,
type=lowercase_rule.type,
message=lowercase_rule.message,
)
]


def test_indirect_jsonpath():
descriptor = example_package_descriptor()
descriptor["resources"].append(example_resource_descriptor())
descriptor["resources"][1]["name"] = "not starting with woolly"

config = Config(rules=[resource_name_rule])
issues = check(descriptor, config=config)

assert issues == [
Issue(
jsonpath="$.resources[1].name",
type=resource_name_rule.type,
message=resource_name_rule.message,
),
]


def test_multiple_rules():
descriptor = example_package_descriptor()
descriptor["name"] = "ALLCAPS"
descriptor["resources"][0]["name"] = "not starting with woolly"

config = Config(rules=[lowercase_rule, resource_name_rule])
issues = check(descriptor, config=config)

assert issues == [
Issue(
jsonpath=lowercase_rule.jsonpath,
type=lowercase_rule.type,
message=lowercase_rule.message,
),
Issue(
jsonpath="$.resources[0].name",
type=resource_name_rule.type,
message=resource_name_rule.message,
),
]


def test_rules_and_default_checks():
descriptor = example_package_descriptor()
descriptor["name"] = "ALLCAPS"
del descriptor["resources"]
config = Config(rules=[lowercase_rule])
issues = check(descriptor, config=config)

assert [issue.type for issue in issues] == ["lowercase", "required"]


def test_no_matching_jsonpath():
descriptor = example_package_descriptor()
rule = Rule(
jsonpath="$.missing",
message="This check always fails.",
check=lambda value: False,
type="always-fail",
)
config = Config(rules=[rule])
issues = check(descriptor, config=config)

assert issues == []
1 change: 0 additions & 1 deletion tools/vulture-allowlist.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# ruff: noqa
# mypy: ignore-errors
rules # unused variable (src/check_datapackage/config.py:38)
version # unused variable (src/check_datapackage/config.py:40)
Loading