Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Cache reference lookups for subschemas #894

Merged
merged 4 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ v4.3.0

* Fix undesired fallback to brute force container uniqueness check on
certain input types (#893)
* Cache reference lookups for subschemas (#893)
* Use cached lookups for resolving fragments if the referent document is known (#893)
* Replace the ``Validator.evolve`` method with an equivalent class attribute
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an implementation detail, we can save mentioning it I think.

And for the other two I'd maybe just say something simpler like "resolving refs has had performance improvements" and have that cover the whole change, someone can look at the PR for details.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! :)

* Implement a PEP544 Protocol for validator classes (#890)

v4.2.1
Expand Down
46 changes: 34 additions & 12 deletions jsonschema/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ class Validator:
schema = attr.ib(repr=reprlib.repr)
resolver = attr.ib(default=None, repr=False)
format_checker = attr.ib(default=None)
evolve = attr.evolve

def __attrs_post_init__(self):
if self.resolver is None:
Expand All @@ -182,9 +183,6 @@ def check_schema(cls, schema):
for error in cls(cls.META_SCHEMA).iter_errors(schema):
raise exceptions.SchemaError.create_from(error)

def evolve(self, **kwargs):
return attr.evolve(self, **kwargs)

def iter_errors(self, instance, _schema=None):
if _schema is not None:
warnings.warn(
Expand Down Expand Up @@ -757,6 +755,10 @@ def resolving(self, ref):
finally:
self.pop_scope()

@lru_cache()
def _find_in_referrer(self, key):
return list(self._finditem(self.referrer, key))

def _finditem(self, schema, key):
values = deque([schema])
while values:
Expand All @@ -767,22 +769,35 @@ def _finditem(self, schema, key):
yield each
values.extendleft(each.values())

def resolve(self, ref):
"""
Resolve the given reference.
"""
url = self._urljoin_cache(self.resolution_scope, ref).rstrip("/")
@lru_cache()
def _find_subschemas(self):
return list(self._finditem(self.referrer, "$id"))

@lru_cache()
def _find_in_subschemas(self, url):
subschemas = self._find_subschemas()
if not subschemas:
return None
uri, fragment = urldefrag(url)

for subschema in self._finditem(self.referrer, "$id"):
for subschema in subschemas:
target_uri = self._urljoin_cache(
self.resolution_scope, subschema["$id"],
)
if target_uri.rstrip("/") == uri.rstrip("/"):
if fragment:
subschema = self.resolve_fragment(subschema, fragment)
return url, subschema
return None

def resolve(self, ref):
"""
Resolve the given reference.
"""
url = self._urljoin_cache(self.resolution_scope, ref).rstrip("/")

match = self._find_in_subschemas(url)
if match is not None:
return match

return url, self._remote_cache(url)

Expand Down Expand Up @@ -821,12 +836,19 @@ def resolve_fragment(self, document, fragment):
if not fragment:
return document

if document is self.referrer:
find = self._find_in_referrer
else:

def find(key):
return self._finditem(document, key)

for keyword in ["$anchor", "$dynamicAnchor"]:
for subschema in self._finditem(document, keyword):
for subschema in find(keyword):
if fragment == subschema[keyword]:
return subschema
for keyword in ["id", "$id"]:
for subschema in self._finditem(document, keyword):
for subschema in find(keyword):
if "#" + fragment == subschema[keyword]:
return subschema

Expand Down