From 44444bb17c7183dc66529fd9cca9c997fd2acfe5 Mon Sep 17 00:00:00 2001 From: p1c2u Date: Tue, 16 Dec 2025 10:30:36 +0000 Subject: [PATCH] Cache compiled path parser --- Makefile | 3 + openapi_core/templating/paths/iterators.py | 22 ++- openapi_core/templating/paths/parsers.py | 33 ++++ openapi_core/templating/util.py | 38 ----- tests/benchmarks/bench_paths.py | 151 ++++++++++++++++++ ...mplating_util.py => test_paths_parsers.py} | 14 +- 6 files changed, 213 insertions(+), 48 deletions(-) create mode 100644 openapi_core/templating/paths/parsers.py delete mode 100644 openapi_core/templating/util.py create mode 100644 tests/benchmarks/bench_paths.py rename tests/unit/templating/{test_templating_util.py => test_paths_parsers.py} (71%) diff --git a/Makefile b/Makefile index 9386ec8f..56da44fa 100644 --- a/Makefile +++ b/Makefile @@ -38,3 +38,6 @@ docs-cleanup: @rm -rf docs_build cleanup: dist-cleanup test-cleanup + +bench-paths: + @PYTHONHASHSEED=0 python tests/benchmarks/bench_paths.py --paths 500 --templates-ratio 0.7 --lookups 2000 --output bench-paths.json \ No newline at end of file diff --git a/openapi_core/templating/paths/iterators.py b/openapi_core/templating/paths/iterators.py index f78d3342..66ee94ff 100644 --- a/openapi_core/templating/paths/iterators.py +++ b/openapi_core/templating/paths/iterators.py @@ -1,3 +1,4 @@ +from functools import lru_cache from typing import Iterator from typing import List from typing import Optional @@ -12,9 +13,8 @@ from openapi_core.templating.paths.datatypes import PathOperation from openapi_core.templating.paths.datatypes import PathOperationServer from openapi_core.templating.paths.exceptions import PathsNotFound +from openapi_core.templating.paths.parsers import PathParser from openapi_core.templating.paths.util import template_path_len -from openapi_core.templating.util import parse -from openapi_core.templating.util import search class SimplePathsIterator: @@ -52,7 +52,8 @@ def __call__( yield Path(path, path_result) # template path else: - result = search(path_pattern, name) + path_parser = self._get_path_parser(path_pattern) + result = path_parser.search(name) if result: path_result = TemplateResult(path_pattern, result.named) template_paths.append(Path(path, path_result)) @@ -60,6 +61,10 @@ def __call__( # Fewer variables -> more concrete path yield from sorted(template_paths, key=template_path_len) + @lru_cache(maxsize=4096) + def _get_path_parser(self, path_pattern: str) -> PathParser: + return PathParser(path_pattern, post_expression="$") + class SimpleOperationsIterator: def __call__( @@ -156,7 +161,10 @@ def __call__( ) # template path else: - result = parse(server["url"], server_url_pattern) + server_url_parser = self._get_server_url_parser( + server["url"] + ) + result = server_url_parser.parse(server_url_pattern) if result: server_result = TemplateResult( server["url"], result.named @@ -171,7 +179,7 @@ def __call__( # servers should'n end with tailing slash # but let's search for this too server_url_pattern += "/" - result = parse(server["url"], server_url_pattern) + result = server_url_parser.parse(server_url_pattern) if result: server_result = TemplateResult( server["url"], result.named @@ -183,3 +191,7 @@ def __call__( path_result, server_result, ) + + @lru_cache(maxsize=1024) + def _get_server_url_parser(self, server_url: str) -> PathParser: + return PathParser(server_url, pre_expression="^") diff --git a/openapi_core/templating/paths/parsers.py b/openapi_core/templating/paths/parsers.py new file mode 100644 index 00000000..c6234ab1 --- /dev/null +++ b/openapi_core/templating/paths/parsers.py @@ -0,0 +1,33 @@ +from typing import Any + +from parse import Parser + + +class PathParameter: + name = "PathParameter" + pattern = r"[^\/]*" + + def __call__(self, text: str) -> str: + return text + + +class PathParser(Parser): # type: ignore + + parse_path_parameter = PathParameter() + + def __init__( + self, pattern: str, pre_expression: str = "", post_expression: str = "" + ) -> None: + extra_types = { + self.parse_path_parameter.name: self.parse_path_parameter + } + super().__init__(pattern, extra_types) + self._expression: str = ( + pre_expression + self._expression + post_expression + ) + + def _handle_field(self, field: str) -> Any: + # handle as path parameter field + field = field[1:-1] + path_parameter_field = "{%s:PathParameter}" % field + return super()._handle_field(path_parameter_field) diff --git a/openapi_core/templating/util.py b/openapi_core/templating/util.py deleted file mode 100644 index ef5dfa71..00000000 --- a/openapi_core/templating/util.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Any -from typing import Optional - -from parse import Match -from parse import Parser - - -class ExtendedParser(Parser): # type: ignore - def _handle_field(self, field: str) -> Any: - # handle as path parameter field - field = field[1:-1] - path_parameter_field = "{%s:PathParameter}" % field - return super()._handle_field(path_parameter_field) - - -class PathParameter: - name = "PathParameter" - pattern = r"[^\/]*" - - def __call__(self, text: str) -> str: - return text - - -parse_path_parameter = PathParameter() - - -def search(path_pattern: str, full_url_pattern: str) -> Optional[Match]: - extra_types = {parse_path_parameter.name: parse_path_parameter} - p = ExtendedParser(path_pattern, extra_types) - p._expression = p._expression + "$" - return p.search(full_url_pattern) - - -def parse(server_url: str, server_url_pattern: str) -> Match: - extra_types = {parse_path_parameter.name: parse_path_parameter} - p = ExtendedParser(server_url, extra_types) - p._expression = "^" + p._expression - return p.parse(server_url_pattern) diff --git a/tests/benchmarks/bench_paths.py b/tests/benchmarks/bench_paths.py new file mode 100644 index 00000000..c567a237 --- /dev/null +++ b/tests/benchmarks/bench_paths.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +import argparse +import gc +import json +import random +import statistics +import time +from dataclasses import dataclass +from typing import Any +from typing import Dict +from typing import List + +from jsonschema_path import SchemaPath + +from openapi_core.templating.paths.finders import APICallPathFinder + + +@dataclass(frozen=True) +class Result: + paths: int + templates_ratio: float + lookups: int + repeats: int + warmup: int + seconds: List[float] + + def as_dict(self) -> Dict[str, Any]: + return { + "paths": self.paths, + "templates_ratio": self.templates_ratio, + "lookups": self.lookups, + "repeats": self.repeats, + "warmup": self.warmup, + "seconds": self.seconds, + "median_s": statistics.median(self.seconds), + "mean_s": statistics.mean(self.seconds), + "stdev_s": statistics.pstdev(self.seconds), + "ops_per_sec_median": self.lookups + / statistics.median(self.seconds), + } + + +def build_spec(paths: int, templates_ratio: float) -> SchemaPath: + # Mix of exact and templated paths. + # Keep it minimal so we measure finder cost, not schema complexity. + tmpl = int(paths * templates_ratio) + exact = paths - tmpl + + paths_obj: Dict[str, Any] = {} + + # Exact paths (fast case) + for i in range(exact): + p = f"/resource/{i}/sub" + paths_obj[p] = {"get": {"responses": {"200": {"description": "ok"}}}} + + # Template paths (slow case) + for i in range(tmpl): + p = f"/resource/{i}" + "/{item_id}/sub/{sub_id}" + paths_obj[p] = {"get": {"responses": {"200": {"description": "ok"}}}} + + spec_dict = { + "openapi": "3.0.0", + "info": {"title": "bench", "version": "0"}, + "servers": [{"url": "http://example.com"}], + "paths": paths_obj, + } + return SchemaPath.from_dict(spec_dict) + + +def build_urls( + paths: int, templates_ratio: float, lookups: int, seed: int +) -> List[str]: + rnd = random.Random(seed) + tmpl = int(paths * templates_ratio) + exact = paths - tmpl + + urls: List[str] = [] + for _ in range(lookups): + # 50/50 choose from each population, weighted by how many exist + if tmpl > 0 and (exact == 0 or rnd.random() < (tmpl / paths)): + i = rnd.randrange(tmpl) # matches template bucket + item_id = rnd.randrange(1_000_000) + sub_id = rnd.randrange(1_000_000) + urls.append( + f"http://example.com/resource/{i}/{item_id}/sub/{sub_id}" + ) + else: + i = rnd.randrange(exact) if exact > 0 else 0 + urls.append(f"http://example.com/resource/{i}/sub") + return urls + + +def run_once(finder: APICallPathFinder, urls: List[str]) -> float: + t0 = time.perf_counter() + for u in urls: + finder.find("get", u) + return time.perf_counter() - t0 + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--paths", type=int, default=2000) + ap.add_argument("--templates-ratio", type=float, default=0.6) + ap.add_argument("--lookups", type=int, default=100_000) + ap.add_argument("--repeats", type=int, default=7) + ap.add_argument("--warmup", type=int, default=2) + ap.add_argument("--seed", type=int, default=1) + ap.add_argument("--output", type=str, default="") + ap.add_argument("--no-gc", action="store_true") + args = ap.parse_args() + + spec = build_spec(args.paths, args.templates_ratio) + finder = APICallPathFinder(spec) + + urls = build_urls( + args.paths, args.templates_ratio, args.lookups, args.seed + ) + + if args.no_gc: + gc.disable() + + # Warmup (JIT-less, but warms caches, alloc patterns, etc.) + for _ in range(args.warmup): + run_once(finder, urls) + + seconds: List[float] = [] + for _ in range(args.repeats): + seconds.append(run_once(finder, urls)) + + if args.no_gc: + gc.enable() + + result = Result( + paths=args.paths, + templates_ratio=args.templates_ratio, + lookups=args.lookups, + repeats=args.repeats, + warmup=args.warmup, + seconds=seconds, + ) + + payload = result.as_dict() + print(json.dumps(payload, indent=2, sort_keys=True)) + + if args.output: + with open(args.output, "w", encoding="utf-8") as f: + json.dump(payload, f, indent=2, sort_keys=True) + + +if __name__ == "__main__": + main() diff --git a/tests/unit/templating/test_templating_util.py b/tests/unit/templating/test_paths_parsers.py similarity index 71% rename from tests/unit/templating/test_templating_util.py rename to tests/unit/templating/test_paths_parsers.py index 815f6cb0..0331301c 100644 --- a/tests/unit/templating/test_templating_util.py +++ b/tests/unit/templating/test_paths_parsers.py @@ -1,14 +1,15 @@ import pytest -from openapi_core.templating.util import search +from openapi_core.templating.paths.parsers import PathParser class TestSearch: def test_endswith(self): path_pattern = "/{test}/test" + parser = PathParser(path_pattern, post_expression="$") full_url_pattern = "/test1/test/test2/test" - result = search(path_pattern, full_url_pattern) + result = parser.search(full_url_pattern) assert result.named == { "test": "test2", @@ -16,9 +17,10 @@ def test_endswith(self): def test_exact(self): path_pattern = "/{test}/test" + parser = PathParser(path_pattern, post_expression="$") full_url_pattern = "/test/test" - result = search(path_pattern, full_url_pattern) + result = parser.search(full_url_pattern) assert result.named == { "test": "test", @@ -33,9 +35,10 @@ def test_exact(self): ], ) def test_chars_valid(self, path_pattern, expected): + parser = PathParser(path_pattern, post_expression="$") full_url_pattern = "/test/test" - result = search(path_pattern, full_url_pattern) + result = parser.search(full_url_pattern) assert result.named == expected @@ -53,8 +56,9 @@ def test_chars_valid(self, path_pattern, expected): ], ) def test_special_chars_valid(self, path_pattern, expected): + parser = PathParser(path_pattern, post_expression="$") full_url_pattern = "/test/test" - result = search(path_pattern, full_url_pattern) + result = parser.search(full_url_pattern) assert result.named == expected