Skip to content

Commit

Permalink
perf: Avoid over-populating operation cache
Browse files Browse the repository at this point in the history
  • Loading branch information
Stranger6667 committed May 20, 2024
1 parent 7c21f46 commit 02276c6
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 28 deletions.
30 changes: 17 additions & 13 deletions src/schemathesis/specs/openapi/_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,21 @@ def get_definition_by_id(self, operation_id: str) -> OperationCacheEntry:
# TODO: Avoid KeyError in the future
return self._id_to_definition[operation_id]

def insert_operation_by_id(self, operation_id: str, operation: APIOperation) -> None:
"""Insert a new operation into cache by ID."""
self._id_to_operation[operation_id] = self._append_operation(operation)

def insert_operation_by_reference(self, reference: str, operation: APIOperation) -> None:
"""Insert a new operation into cache by reference."""
self._reference_to_operation[reference] = self._append_operation(operation)

def insert_operation_by_traversal_key(self, scope: str, path: str, method: str, operation: APIOperation) -> None:
"""Insert a new operation into cache by traversal key."""
self._traversal_key_to_operation[(scope, path, method)] = self._append_operation(operation)
def insert_operation(
self,
operation: APIOperation,
*,
traversal_key: TraversalKey,
operation_id: str | None = None,
reference: str | None = None,
) -> None:
"""Insert a new operation into cache by one or multiple keys."""
idx = self._append_operation(operation)
self._traversal_key_to_operation[traversal_key] = idx
if operation_id is not None:
self._id_to_operation[operation_id] = idx
if reference is not None:
self._reference_to_operation[reference] = idx

def get_operation_by_id(self, operation_id: str) -> APIOperation | None:
"""Get an operation by its ID."""
Expand All @@ -105,9 +109,9 @@ def get_operation_by_reference(self, reference: str) -> APIOperation | None:
return self._operations[idx]
return None

def get_operation_by_traversal_key(self, scope: str, path: str, method: str) -> APIOperation | None:
def get_operation_by_traversal_key(self, key: TraversalKey) -> APIOperation | None:
"""Get an operation by its traverse key."""
idx = self._traversal_key_to_operation.get((scope, path, method))
idx = self._traversal_key_to_operation.get(key)
if idx is not None:
return self._operations[idx]
return None
Expand Down
18 changes: 9 additions & 9 deletions src/schemathesis/specs/openapi/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,15 +425,15 @@ def get_operation_by_id(self, operation_id: str) -> APIOperation:
except KeyError as exc:
matches = get_close_matches(operation_id, cache.known_operation_ids)
self._on_missing_operation(operation_id, exc, matches)
# It could'be been already accessed in a different place
instance = cache.get_operation_by_traversal_key(entry.scope, entry.path, entry.method)
# It could've been already accessed in a different place
traversal_key = (entry.scope, entry.path, entry.method)
instance = cache.get_operation_by_traversal_key(traversal_key)
if instance is not None:
return instance
resolved = self._resolve_operation(entry.operation)
parameters = self._collect_operation_parameters(entry.path_item, resolved)
initialized = self.make_operation(entry.path, entry.method, parameters, entry.operation, resolved, entry.scope)
cache.insert_operation_by_traversal_key(entry.scope, entry.path, entry.method, initialized)
cache.insert_operation_by_id(operation_id, initialized)
cache.insert_operation(initialized, traversal_key=traversal_key, operation_id=operation_id)
return initialized

def _populate_operation_id_cache(self, cache: OperationCache) -> None:
Expand Down Expand Up @@ -474,16 +474,15 @@ def get_operation_by_reference(self, reference: str) -> APIOperation:
path = path.replace("~1", "/").replace("~0", "~")
# Check the traversal cache as it could've been populated in other places
traversal_key = (self.resolver.resolution_scope, path, method)
cached = cache.get_operation_by_traversal_key(*traversal_key)
cached = cache.get_operation_by_traversal_key(traversal_key)
if cached is not None:
return cached
resolved = self._resolve_operation(operation)
parent_ref, _ = reference.rsplit("/", maxsplit=1)
_, path_item = self.resolver.resolve(parent_ref)
parameters = self._collect_operation_parameters(path_item, resolved)
initialized = self.make_operation(path, method, parameters, operation, resolved, scope)
cache.insert_operation_by_traversal_key(*traversal_key, initialized)
cache.insert_operation_by_reference(reference, initialized)
cache.insert_operation(initialized, traversal_key=traversal_key, reference=reference)
return initialized

def get_case_strategy(
Expand Down Expand Up @@ -834,7 +833,8 @@ def _init_operation(self, method: str) -> APIOperation:
cache = schema._operation_cache
path = self._path
scope = self._scope
cached = cache.get_operation_by_traversal_key(scope, path, method)
traversal_key = (scope, path, method)
cached = cache.get_operation_by_traversal_key(traversal_key)
if cached is not None:
return cached
schema.resolver.push_scope(scope)
Expand All @@ -844,7 +844,7 @@ def _init_operation(self, method: str) -> APIOperation:
schema.resolver.pop_scope()
parameters = schema._collect_operation_parameters(self._path_item, resolved)
initialized = schema.make_operation(path, method, parameters, operation, resolved, scope)
cache.insert_operation_by_traversal_key(scope, path, method, initialized)
cache.insert_operation(initialized, traversal_key=traversal_key, operation_id=resolved.get("operationId"))
return initialized

def __getitem__(self, item: str) -> APIOperation:
Expand Down
19 changes: 13 additions & 6 deletions test/specs/openapi/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,31 +33,38 @@ def test_get_operation_via_remote_reference(openapi_version, schema_url):
}


@pytest.mark.openapi_version("3.0")
@pytest.mark.operations("get_user", "update_user")
def test_operation_cache_sharing(mocker, schema_url):
# When the same operation is accessed via different methods
# The second access should use cache

def setup_mock(schema):
mocker.patch.object(
schema._operation_cache, "insert_operation_by_traversal_key", side_effect=ValueError("Not cached")
)
def setup_mock(schema, key):
return mocker.patch.object(schema._operation_cache, key, side_effect=ValueError("Not cached"))

reference = f"{schema_url}#/paths/~1users~1{{user_id}}/patch"
operation_id = "updateUser"

schema = schemathesis.from_uri(schema_url)
first = schema.get_operation_by_reference(reference)
# After accessing by reference, there should not be an attempt to insert it again
setup_mock(schema)
setup_mock(schema, "insert_operation")
second = schema.get_operation_by_id(operation_id)
assert first is second
# And the other way around
schema = schemathesis.from_uri(schema_url)
first = schema.get_operation_by_id(operation_id)
setup_mock(schema)
second = schema.get_operation_by_reference(reference)
assert first is second
# And the cache has just a single entry
assert len(schema._operation_cache._operations) == 1
# Direct access should also add an entry to the "by-id" cache
first = schema["/users/{user_id}"]["GET"]
# It should be taken from the "by-id" cache
setup_mock(schema, "get_operation_by_traversal_key")
second = schema.get_operation_by_id("getUser")
assert first is second
assert len(schema._operation_cache._operations) == 2


SINGLE_METHOD_PATHS = {
Expand Down

0 comments on commit 02276c6

Please sign in to comment.