From ffc92626105bcb320947cd4c7fa10fc26b6f507e Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 11 Mar 2023 16:50:00 +0000 Subject: [PATCH 1/2] Fix link patterns extra matching against internal hashes --- lib/markdown2.py | 15 ++++++++++++++- .../link_patterns_hash_matching_issue287.html | 1 + .../link_patterns_hash_matching_issue287.opts | 7 +++++++ .../link_patterns_hash_matching_issue287.text | 1 + 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/link_patterns_hash_matching_issue287.html create mode 100644 test/tm-cases/link_patterns_hash_matching_issue287.opts create mode 100644 test/tm-cases/link_patterns_hash_matching_issue287.text diff --git a/lib/markdown2.py b/lib/markdown2.py index ddadb6b0..4583131a 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2241,7 +2241,7 @@ def _do_strike(self, text): def _do_underline(self, text): text = self._underline_re.sub(r"\1", text) return text - + _tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S) def _do_tg_spoiler(self, text): text = self._tg_spoiler_re.sub(r"\1", text) @@ -2533,6 +2533,9 @@ def _do_link_patterns(self, text): for regex, repl in self.link_patterns: replacements = [] for match in regex.finditer(text): + if any(self._match_overlaps_substr(text, match, h) for h in link_from_hash): + continue + if hasattr(repl, "__call__"): href = repl(match) else: @@ -2637,6 +2640,16 @@ def _uniform_indent(self, text, indent, include_empty_lines=False): for line in text.splitlines(True) ) + @staticmethod + def _match_overlaps_substr(text, match, substr): + for instance in re.finditer(re.escape(substr), text): + start, end = instance.span() + if start <= match.start() <= end: + return True + if start <= match.end() <= end: + return True + return False + class MarkdownWithExtras(Markdown): """A markdowner class that enables most extras: diff --git a/test/tm-cases/link_patterns_hash_matching_issue287.html b/test/tm-cases/link_patterns_hash_matching_issue287.html new file mode 100644 index 00000000..7cee86e2 --- /dev/null +++ b/test/tm-cases/link_patterns_hash_matching_issue287.html @@ -0,0 +1 @@ +

this is a test issue #1234 with a test commit (addeddd) made by test @username more text

diff --git a/test/tm-cases/link_patterns_hash_matching_issue287.opts b/test/tm-cases/link_patterns_hash_matching_issue287.opts new file mode 100644 index 00000000..21062d75 --- /dev/null +++ b/test/tm-cases/link_patterns_hash_matching_issue287.opts @@ -0,0 +1,7 @@ +{"extras": ["link-patterns"], + "link_patterns": [ + (re.compile("#(\d+)", re.I), r"https://github.com/pyfa-org/Pyfa/issues/\1"), + (re.compile("@(\w+)", re.I), r"https://github.com/\1"), + (re.compile("([0-9a-f]{6,40})", re.I), r"https://github.com/pyfa-org/Pyfa/commit/\1") + ] +} diff --git a/test/tm-cases/link_patterns_hash_matching_issue287.text b/test/tm-cases/link_patterns_hash_matching_issue287.text new file mode 100644 index 00000000..b0b5f4b9 --- /dev/null +++ b/test/tm-cases/link_patterns_hash_matching_issue287.text @@ -0,0 +1 @@ +this is a test issue #1234 with a test commit (addeddd) made by test @username more text From d7d6471ff578cfb21a88bb208f8b36202a1b54d7 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 11 Mar 2023 16:57:59 +0000 Subject: [PATCH 2/2] Update `CHANGES.md`. Also added docstring to `_match_overlaps_substr` method --- CHANGES.md | 1 + lib/markdown2.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 614d36fd..c16e8fef 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.9 (not yet released) - [pull #500] Add `` tag to html-classes extra +- [pull #501] Fix link patterns extra matching against internal hashes ## python-markdown2 2.4.8 diff --git a/lib/markdown2.py b/lib/markdown2.py index 4583131a..738f91e4 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2642,6 +2642,9 @@ def _uniform_indent(self, text, indent, include_empty_lines=False): @staticmethod def _match_overlaps_substr(text, match, substr): + ''' + Checks if a regex match overlaps with a substring in the given text. + ''' for instance in re.finditer(re.escape(substr), text): start, end = instance.span() if start <= match.start() <= end: