From fc53f9ed3a825e4961fbcdb98d1a1295550c88ce Mon Sep 17 00:00:00 2001 From: jurgenwigg Date: Sun, 6 Apr 2025 17:18:51 +0000 Subject: [PATCH 1/4] initial commit --- Lib/difflib.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 4bba9e7ea5cfa8..df68b836961174 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -361,10 +361,14 @@ def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None): # the unique 'b's and then matching the first two 'a's. a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__ - if ahi is None: - ahi = len(a) - if bhi is None: - bhi = len(b) + ahi = ahi if ahi is not None else len(a) + bhi = bhi if bhi is not None else len(b) + + # Check if both sequences are the same before executing rest of the + # method. + if a[alo:ahi] == b[blo:bhi]: + return Match(alo, blo, len(a[alo:ahi])) + besti, bestj, bestsize = alo, blo, 0 # find longest junk-free match # during an iteration of the loop, j2len[j] = length of longest From 6fce1a0a71505b767239c22b883080896634194b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sun, 6 Apr 2025 17:31:45 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst new file mode 100644 index 00000000000000..ac8626d5ed57b6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst @@ -0,0 +1 @@ +Add checking if sequences `a[alo:ahi]` and `b[blo:bhi]` are the same on the beginning of the method find_longest_match in SequenceMatcher. For identical sequences there is no reason to run whole logic when simple check can be done. It appears to fix issue when comparing two slightly different strings ends up with waiting forever for the result. From 132e9383ffe8b5dab62eb8a5fe774d8590fe867d Mon Sep 17 00:00:00 2001 From: jurgenwigg Date: Sun, 6 Apr 2025 17:47:37 +0000 Subject: [PATCH 3/4] fixed news lint fail --- .../2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst index ac8626d5ed57b6..23e3eaadcca61b 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-06-17-31-44.gh-issue-132166.tbo9yR.rst @@ -1 +1,5 @@ -Add checking if sequences `a[alo:ahi]` and `b[blo:bhi]` are the same on the beginning of the method find_longest_match in SequenceMatcher. For identical sequences there is no reason to run whole logic when simple check can be done. It appears to fix issue when comparing two slightly different strings ends up with waiting forever for the result. +Add checking if sequences ``a[alo:ahi]`` and ``b[blo:bhi]`` are the same on the +beginning of the method find_longest_match in SequenceMatcher. For identical +sequences there is no reason to run whole logic when simple check can be done. +It appears to fix issue when comparing two slightly different strings ends up +with waiting forever for the result. From 7a2f27b27d7338fd2fadc95c3632b33475c99caf Mon Sep 17 00:00:00 2001 From: jurgenwigg <53076001+jurgenwigg@users.noreply.github.com> Date: Sun, 6 Apr 2025 21:32:03 +0200 Subject: [PATCH 4/4] Update difflib.py --- Lib/difflib.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index df68b836961174..e674bb510f4c69 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -361,8 +361,10 @@ def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None): # the unique 'b's and then matching the first two 'a's. a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__ - ahi = ahi if ahi is not None else len(a) - bhi = bhi if bhi is not None else len(b) + if ahi is None: + ahi = len(a) + if bhi is None: + bhi = len(b) # Check if both sequences are the same before executing rest of the # method.