From 0a9a56b4f5ec147692363557705b34c7b133d995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20M=C3=BCller?= Date: Tue, 31 Jan 2023 15:26:53 +0100 Subject: [PATCH] Revert "Remove filename pattern caches (#2153)" This introduced a performance regession. While it is true that fnmatch already uses functools.lru_cache, that cache is limited to 256 on python 3.10 and older and we have over 1000 matching patterns, which means the cache is evicted entirely on every iteration. This reverts commit 951c894ab23537be90fbf3973d0345591e682371. --- pygments/formatters/__init__.py | 17 ++++++++++++++--- pygments/lexers/__init__.py | 21 ++++++++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/pygments/formatters/__init__.py b/pygments/formatters/__init__.py index 58de5fe3c0..887015c8b9 100644 --- a/pygments/formatters/__init__.py +++ b/pygments/formatters/__init__.py @@ -8,9 +8,10 @@ :license: BSD, see LICENSE for details. """ +import re import sys import types -from fnmatch import fnmatch +import fnmatch from os.path import basename from pygments.formatters._mapping import FORMATTERS @@ -21,6 +22,16 @@ 'get_all_formatters', 'load_formatter_from_file'] + list(FORMATTERS) _formatter_cache = {} # classes by name +_pattern_cache = {} + + +def _fn_matches(fn, glob): + """Return whether the supplied file name fn matches pattern filename.""" + if glob not in _pattern_cache: + pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob)) + return pattern.match(fn) + return _pattern_cache[glob].match(fn) + def _load_formatters(module_name): """Load a formatter (and all others in the module too).""" @@ -111,13 +122,13 @@ def get_formatter_for_filename(fn, **options): fn = basename(fn) for modname, name, _, filenames, _ in FORMATTERS.values(): for filename in filenames: - if fnmatch(fn, filename): + if _fn_matches(fn, filename): if name not in _formatter_cache: _load_formatters(modname) return _formatter_cache[name](**options) for cls in find_plugin_formatters(): for filename in cls.filenames: - if fnmatch(fn, filename): + if _fn_matches(fn, filename): return cls(**options) raise ClassNotFound("no formatter found for file name %r" % fn) diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py index 83be0e4891..79c4e9bf35 100644 --- a/pygments/lexers/__init__.py +++ b/pygments/lexers/__init__.py @@ -8,9 +8,10 @@ :license: BSD, see LICENSE for details. """ +import re import sys import types -from fnmatch import fnmatch +import fnmatch from os.path import basename from pygments.lexers._mapping import LEXERS @@ -27,6 +28,16 @@ 'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) + list(COMPAT) _lexer_cache = {} +_pattern_cache = {} + + +def _fn_matches(fn, glob): + """Return whether the supplied file name fn matches pattern filename.""" + if glob not in _pattern_cache: + pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob)) + return pattern.match(fn) + return _pattern_cache[glob].match(fn) + def _load_lexers(module_name): """Load a lexer (and all others in the module too).""" @@ -158,13 +169,13 @@ def find_lexer_class_for_filename(_fn, code=None): fn = basename(_fn) for modname, name, _, filenames, _ in LEXERS.values(): for filename in filenames: - if fnmatch(fn, filename): + if _fn_matches(fn, filename): if name not in _lexer_cache: _load_lexers(modname) matches.append((_lexer_cache[name], filename)) for cls in find_plugin_lexers(): for filename in cls.filenames: - if fnmatch(fn, filename): + if _fn_matches(fn, filename): matches.append((cls, filename)) if isinstance(code, bytes): @@ -251,11 +262,11 @@ def guess_lexer_for_filename(_fn, _text, **options): matching_lexers = set() for lexer in _iter_lexerclasses(): for filename in lexer.filenames: - if fnmatch(fn, filename): + if _fn_matches(fn, filename): matching_lexers.add(lexer) primary[lexer] = True for filename in lexer.alias_filenames: - if fnmatch(fn, filename): + if _fn_matches(fn, filename): matching_lexers.add(lexer) primary[lexer] = False if not matching_lexers: