Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6045597
feat: :sparkles: add exclusion by JSON path target
lwjohnst86 Sep 29, 2025
712252b
docs: :memo: small clarifications to docstring
lwjohnst86 Sep 29, 2025
aa5f972
build: :arrow_up: upgrade packages in lock file
lwjohnst86 Sep 29, 2025
a51df83
refactor: :recycle: compare literal strings, no regex used
lwjohnst86 Sep 29, 2025
f9d36a9
build: :heavy_plus_sign: add python-jsonpath dependency
lwjohnst86 Oct 1, 2025
6d7f387
build: :bento: rebuild lock file
lwjohnst86 Oct 1, 2025
8ad51cd
feat: :sparkles: incorporate JSONPath querying for exclusions
lwjohnst86 Oct 1, 2025
b505018
test: :white_check_mark: update tests with JSONPath queries
lwjohnst86 Oct 1, 2025
60ec45a
docs: :memo: add back docstring
lwjohnst86 Oct 1, 2025
0995e77
Merge branch 'main' of https://github.com/seedcase-project/check-data…
lwjohnst86 Oct 1, 2025
b5f1e49
fix: :pencil2: fix naming to `jsonpath`
lwjohnst86 Oct 1, 2025
09ddea6
refactor: :recycle: move maps/flat_map into internals
lwjohnst86 Oct 1, 2025
050cc51
revert: :rewind: use previous code for exclude types
lwjohnst86 Oct 1, 2025
7ae6733
refactor: :recycle: rename to match other function
lwjohnst86 Oct 1, 2025
e8c6d20
Merge branch 'main' of https://github.com/seedcase-project/check-data…
lwjohnst86 Oct 6, 2025
fce93f7
refactor: simplify with `replace()`
lwjohnst86 Oct 6, 2025
54756dc
refactor: :construction: trying to do both jsonpath and type
lwjohnst86 Oct 6, 2025
216fa85
Merge branch 'feat/add-target-exclusion' of https://github.com/seedca…
lwjohnst86 Oct 6, 2025
6885395
Merge branch 'main' of https://github.com/seedcase-project/check-data…
lwjohnst86 Oct 9, 2025
ad14f7a
refactor: :recycle: implement AND for excludes
lwjohnst86 Oct 9, 2025
4f10227
fix: :fire: remove unused functions
lwjohnst86 Oct 9, 2025
4af1d8e
build: :arrow_up: updated lock file
lwjohnst86 Oct 9, 2025
02c62ee
Merge branch 'main' of https://github.com/seedcase-project/check-data…
lwjohnst86 Oct 9, 2025
a14cdf8
test: :pencil2: not sure why there were 4 issues before, but its only 3
lwjohnst86 Oct 9, 2025
3f99030
docs: :memo: update `Exclude` docstrings
lwjohnst86 Oct 10, 2025
1d56636
refactor: :pencil2: improvements from review
lwjohnst86 Oct 14, 2025
f63ba41
Merge branch 'main' of https://github.com/seedcase-project/check-data…
lwjohnst86 Oct 14, 2025
a3fccfc
test: :white_check_mark: correctly trigger AND exclude logic
lwjohnst86 Oct 14, 2025
235b7c2
docs: :memo: comment about fixing mypy error
lwjohnst86 Oct 14, 2025
73f03b3
build: :arrow_up: updated lock file
lwjohnst86 Oct 14, 2025
6c36fae
refactor: :recycle: remove `if None` condition
lwjohnst86 Oct 14, 2025
968d722
refactor: :recycle: match `_same_type` with `_same_jsonpath`
lwjohnst86 Oct 14, 2025
b49014f
refactor: :recycle: use `==`, not `in`
lwjohnst86 Oct 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/check_datapackage/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class for more details, especially about the default values.

issues = _check_object_against_json_schema(descriptor, schema)
issues += apply_rules(config.rules, descriptor)
issues = exclude(issues, config.exclude)
issues = exclude(issues, config.exclude, descriptor)

return sorted(set(issues))

Expand Down
98 changes: 57 additions & 41 deletions src/check_datapackage/exclude.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
from dataclasses import dataclass

from check_datapackage.internals import _filter, _map
from typing import Any, Optional

from check_datapackage.internals import (
DescriptorField,
_filter,
_get_fields_at_jsonpath,
_map,
)
from check_datapackage.issue import Issue


@dataclass
class Exclude:
"""Exclude issues when checking a Data Package descriptor.
r"""Exclude issues when checking a Data Package descriptor.

When both `jsonpath` and `type` are provided, an issue has to match both to be
excluded.
When you use both `jsonpath` and `type` in the same `Exclude`, only issues that
match *both* will be excluded, meaning it is an `AND` logic. If you want `OR` logic,
use multiple `Exclude` objects in the `Config`.

Attributes:
jsonpath (str | None): [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/)
to the field or fields in the input object where issues should be ignored,
e.g., `$.resources[*].name`. Needs to point to the location in the
descriptor of the issue to ignore. If not provided, issues of the given
`type` will be excluded for all fields.
type (str | None): The type of the issue to ignore (e.g., "required",
"pattern", or "format"). If not provided, all types of issues will be
ignored for the given `jsonpath`.
jsonpath (Optional[str]): [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/)
to the field or fields in the input object where issues should be ignored.
Uses JSON path syntax for queries, e.g., `$.resources[0].name`, to ignore
issues related to that path.
type (Optional[str]): The type of the issue to ignore (e.g., "required", "type",
"pattern", or "format").

Examples:
```{python}
Expand All @@ -28,45 +33,56 @@ class Exclude:
exclude_required = cdp.Exclude(type="required")
exclude_name = cdp.Exclude(jsonpath="$.name")
exclude_desc_required = cdp.Exclude(
type="required", jsonpath="$.resources[*].description"
type="required",
jsonpath="$.resources[*].description"
)
```
"""

jsonpath: str | None = None
type: str | None = None
jsonpath: Optional[str] = None
type: Optional[str] = None


def exclude(issues: list[Issue], excludes: list[Exclude]) -> list[Issue]:
"""Keep only issues that don't match an exclusion rule.
def exclude(
issues: list[Issue], excludes: list[Exclude], descriptor: dict[str, Any]
) -> list[Issue]:
"""Exclude issues based on the provided configuration settings."""
return _filter(
issues,
lambda issue: not _get_any_matches(issue, excludes, descriptor),
)

Args:
issues: The issues to filter.
excludes: The exclusion rules to apply to the issues.

Returns:
The issues that are kept after applying the exclusion rules.
"""
# kept_issues = _filter(
# issues,
# lambda issue: _drop_any_jsonpath(issue, excludes)
# )
kept_issues: list[Issue] = _drop_any_matching_types(issues, excludes)
return kept_issues
def _get_any_matches(
issue: Issue, excludes: list[Exclude], descriptor: dict[str, Any]
) -> bool:
matches: list[bool] = _map(
excludes, lambda exclude: _get_matches(issue, exclude, descriptor)
)
return any(matches)


def _drop_any_matching_types(
issues: list[Issue], excludes: list[Exclude]
) -> list[Issue]:
return _filter(issues, lambda issue: not _any_matching_types(issue, excludes))
def _get_matches(issue: Issue, exclude: Exclude, descriptor: dict[str, Any]) -> bool:
matches: list[bool] = []

both_none = exclude.jsonpath is None and exclude.type is None
if both_none:
return False

def _any_matching_types(issue: Issue, excludes: list[Exclude]) -> bool:
has_matching_types: list[bool] = _map(
excludes, lambda exclude: _same_type(issue, exclude)
)
return any(has_matching_types)
if exclude.jsonpath:
matches.append(_same_jsonpath(issue, exclude.jsonpath, descriptor))

if exclude.type:
matches.append(_same_type(issue, exclude.type))

return all(matches)


def _same_jsonpath(issue: Issue, jsonpath: str, descriptor: dict[Any, str]) -> bool:
fields: list[DescriptorField] = _get_fields_at_jsonpath(jsonpath, descriptor)
jsonpaths: list[str] = _map(fields, lambda field: field.jsonpath)
return issue.jsonpath in jsonpaths


def _same_type(issue: Issue, exclude: Exclude) -> bool:
return exclude.type == issue.type
def _same_type(issue: Issue, type: str) -> bool:
return type == issue.type
100 changes: 100 additions & 0 deletions tests/test_exclude.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import Any

from pytest import mark

from check_datapackage.check import check
from check_datapackage.config import Config
from check_datapackage.examples import example_package_descriptor
from check_datapackage.exclude import Exclude


Expand Down Expand Up @@ -75,3 +78,100 @@ def test_exclude_multiple_types():
issues = check(descriptor, config=config)

assert len(issues) == 0


@mark.parametrize(
"jsonpath, num_issues",
[
("$", 3),
("..*", 0),
("$.created", 2),
("created", 2),
("$.contributors[*].path", 2),
("$.contributors[0].path", 2),
("$..path", 1),
("contributors[0].path", 2),
("contributors[*].path", 2),
("..resources[*]", 3),
("..resources", 3),
("$.resources[*]", 3),
("$.resources[0]", 3),
("$.resources[*].path", 2),
("$.resources[0].*", 2),
("$.resources[0].path", 2),
],
)
def test_exclude_jsonpath(jsonpath: str, num_issues: int) -> None:
descriptor = example_package_descriptor()
# Total 3 issues
descriptor["created"] = "20240614"
# Two issues for resources: type and pattern
descriptor["resources"][0]["path"] = "/a/bad/path"
descriptor.update({"contributors": [{"path": "/a/bad/path"}]})

exclude = [Exclude(jsonpath=jsonpath)]
config = Config(exclude=exclude)
issues = check(descriptor, config=config)

assert len(issues) == num_issues


def test_exclude_jsonpath_multiple():
descriptor = example_package_descriptor()
descriptor["created"] = "20240614"
descriptor.update({"contributors": [{"path": "/a/bad/path"}]})

exclude = [
Exclude(jsonpath="$.contributors[0].path"),
Exclude(jsonpath="$.created"),
]
config = Config(exclude=exclude)
issues = check(descriptor, config=config)

assert len(issues) == 0


def test_exclude_jsonpath_and_type():
descriptor = example_package_descriptor()
descriptor["contributors"] = [{"path": "/a/bad/path"}, {"path": "/a/bad/path"}]

exclude = [
Exclude(jsonpath="$.contributors[0].path", type="pattern"),
]
config = Config(exclude=exclude)
issues = check(descriptor, config=config)

assert len(issues) == 1


def test_exclude_jsonpath_and_type_non_overlapping():
descriptor = example_package_descriptor()
# There should be two issues
descriptor["created"] = "20240614"
descriptor.update({"contributors": [{"path": "/a/bad/path"}]})

exclude = [
Exclude(jsonpath="$.contributors[0].path"),
Exclude(type="pattern"),
]
config = Config(exclude=exclude)
issues = check(descriptor, config=config)

# For the created field
assert len(issues) == 1


def test_exclude_jsonpath_resources():
"""Exclude by jsonpath for resources."""
properties: dict[str, Any] = {
"name": "woolly-dormice",
"title": "Hibernation Physiology of the Woolly Dormouse: A Scoping Review.",
"description": "",
"id": "123-abc-123",
"created": "2014-05-14T05:00:01+00:00",
"version": "1.0.0",
"licenses": [{"name": "odc-pddl"}],
"resources": "this is a string", # should be an array
}
issues = check(properties, config=Config(exclude=[Exclude(jsonpath="$.resources")]))
assert len(issues) == 0
Loading