From 4e99380162d1736050af82ea9c358efe26be32a8 Mon Sep 17 00:00:00 2001 From: rahul-nath Date: Thu, 15 Sep 2016 23:22:11 -0700 Subject: [PATCH 1/4] Optimized some lines for logic in fuzz.py --- fuzzywuzzy/fuzz.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/fuzzywuzzy/fuzz.py b/fuzzywuzzy/fuzz.py index 843c72bf..c9ccde26 100644 --- a/fuzzywuzzy/fuzz.py +++ b/fuzzywuzzy/fuzz.py @@ -146,9 +146,7 @@ def _token_set(s1, s2, partial=True, force_ascii=True, full_process=True): p1 = utils.full_process(s1, force_ascii=force_ascii) if full_process else s1 p2 = utils.full_process(s2, force_ascii=force_ascii) if full_process else s2 - if not utils.validate_string(p1): - return 0 - if not utils.validate_string(p2): + if not (utils.validate_string(p1) and utils.validate_string(p2)): return 0 # pull tokens @@ -159,17 +157,13 @@ def _token_set(s1, s2, partial=True, force_ascii=True, full_process=True): diff1to2 = tokens1.difference(tokens2) diff2to1 = tokens2.difference(tokens1) - sorted_sect = " ".join(sorted(intersection)) - sorted_1to2 = " ".join(sorted(diff1to2)) - sorted_2to1 = " ".join(sorted(diff2to1)) - - combined_1to2 = sorted_sect + " " + sorted_1to2 - combined_2to1 = sorted_sect + " " + sorted_2to1 + # sort, join, and strip + sorted_sect = (" ".join(sorted(intersection))).strip() + combined_1to2 = (" ".join([sorted_sect, sorted(diff1to2)])).strip() + combined_2to1 = (" ".join([sorted_sect, sorted(diff2to1)])).strip() - # strip - sorted_sect = sorted_sect.strip() - combined_1to2 = combined_1to2.strip() - combined_2to1 = combined_2to1.strip() + # = " ".join([sorted_sect, sorted_1to2]) + # = " ".join([sorted_sect, sorted_2to1]) if partial: ratio_func = partial_ratio @@ -202,9 +196,7 @@ def QRatio(s1, s2, force_ascii=True): p1 = utils.full_process(s1, force_ascii=force_ascii) p2 = utils.full_process(s2, force_ascii=force_ascii) - if not utils.validate_string(p1): - return 0 - if not utils.validate_string(p2): + if not (utils.validate_string(p1) and utils.validate_string(p2)): return 0 return ratio(p1, p2) @@ -223,9 +215,7 @@ def WRatio(s1, s2, force_ascii=True): p1 = utils.full_process(s1, force_ascii=force_ascii) p2 = utils.full_process(s2, force_ascii=force_ascii) - if not utils.validate_string(p1): - return 0 - if not utils.validate_string(p2): + if not (utils.validate_string(p1) and utils.validate_string(p2)): return 0 # should we look at partials? From 16253c24149639c30c6ebd8e7bddbb66d92d9a60 Mon Sep 17 00:00:00 2001 From: rahul-nath Date: Thu, 15 Sep 2016 23:36:33 -0700 Subject: [PATCH 2/4] Optimized some lines for logic in fuzz.py --- fuzzywuzzy/fuzz.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fuzzywuzzy/fuzz.py b/fuzzywuzzy/fuzz.py index c9ccde26..ca1de707 100644 --- a/fuzzywuzzy/fuzz.py +++ b/fuzzywuzzy/fuzz.py @@ -159,11 +159,8 @@ def _token_set(s1, s2, partial=True, force_ascii=True, full_process=True): # sort, join, and strip sorted_sect = (" ".join(sorted(intersection))).strip() - combined_1to2 = (" ".join([sorted_sect, sorted(diff1to2)])).strip() - combined_2to1 = (" ".join([sorted_sect, sorted(diff2to1)])).strip() - - # = " ".join([sorted_sect, sorted_1to2]) - # = " ".join([sorted_sect, sorted_2to1]) + combined_1to2 = (" ".join([sorted_sect, " ".join(sorted(diff1to2)])]).strip() + combined_2to1 = (" ".join([sorted_sect, " ".join(sorted(diff2to1)])]).strip() if partial: ratio_func = partial_ratio From b6ecd93402de65386340d1ca2f7a7a82af2d7fd5 Mon Sep 17 00:00:00 2001 From: rahul-nath Date: Fri, 16 Sep 2016 01:07:25 -0700 Subject: [PATCH 3/4] misplaced bracket, fixed syntax error in fuzz.py --- fuzzywuzzy/fuzz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzywuzzy/fuzz.py b/fuzzywuzzy/fuzz.py index ca1de707..9cc00de3 100644 --- a/fuzzywuzzy/fuzz.py +++ b/fuzzywuzzy/fuzz.py @@ -159,7 +159,7 @@ def _token_set(s1, s2, partial=True, force_ascii=True, full_process=True): # sort, join, and strip sorted_sect = (" ".join(sorted(intersection))).strip() - combined_1to2 = (" ".join([sorted_sect, " ".join(sorted(diff1to2)])]).strip() + combined_1to2 = (" ".join([sorted_sect, " ".join(sorted(diff1to2))]).strip() combined_2to1 = (" ".join([sorted_sect, " ".join(sorted(diff2to1)])]).strip() if partial: From ea044711c2e96d1ead0f11ea02c5b2ee57790a29 Mon Sep 17 00:00:00 2001 From: rahul-nath Date: Fri, 16 Sep 2016 01:14:40 -0700 Subject: [PATCH 4/4] ...different misplaced bracket...fixed syntax error in fuzz.py --- fuzzywuzzy/fuzz.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fuzzywuzzy/fuzz.py b/fuzzywuzzy/fuzz.py index 9cc00de3..2df2d97f 100644 --- a/fuzzywuzzy/fuzz.py +++ b/fuzzywuzzy/fuzz.py @@ -159,8 +159,8 @@ def _token_set(s1, s2, partial=True, force_ascii=True, full_process=True): # sort, join, and strip sorted_sect = (" ".join(sorted(intersection))).strip() - combined_1to2 = (" ".join([sorted_sect, " ".join(sorted(diff1to2))]).strip() - combined_2to1 = (" ".join([sorted_sect, " ".join(sorted(diff2to1)])]).strip() + combined_1to2 = (" ".join([sorted_sect, " ".join(sorted(diff1to2))])).strip() + combined_2to1 = " ".join([sorted_sect, " ".join(sorted(diff2to1))]).strip() if partial: ratio_func = partial_ratio