Minor whitespace changes for PEP8

seatgeek · Aug 7, 2015 · d0c389d · d0c389d
1 parent 9a4bc22
commit d0c389d
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 25 deletions.
diff --git a/benchmarks.py b/benchmarks.py
@@ -60,7 +60,7 @@ def print_result_from_timeit(stmt='pass', setup='pass', number=1000000):
     print_result_from_timeit('utils.full_process(u\'%s\')' % s,
                              common_setup + basic_setup, number=iterations)
 
-### benchmarking the core matching methods...
+# benchmarking the core matching methods...
 
 for s in cirque_strings:
     print 'Test fuzz.ratio for string: "%s"' % s

diff --git a/fuzzywuzzy/StringMatcher.py b/fuzzywuzzy/StringMatcher.py
@@ -10,6 +10,7 @@
 from Levenshtein import *
 from warnings import warn
 
+
 class StringMatcher:
     """A SequenceMatcher-like class built on the top of Levenshtein"""
 
@@ -75,4 +76,4 @@ def real_quick_ratio(self):
     def distance(self):
         if not self._distance:
             self._distance = distance(self._str1, self._str2)
-        return self._distance
+        return self._distance
diff --git a/fuzzywuzzy/fuzz.py b/fuzzywuzzy/fuzz.py
@@ -113,6 +113,7 @@ def _process_and_sort(s, force_ascii):
     sorted_string = u" ".join(sorted(tokens))
     return sorted_string.strip()
 
+
 # Sorted Token
 #   find all alphanumeric tokens in the string
 #   sort those tokens and take ratio of resulting joined strings
@@ -132,6 +133,7 @@ def _token_sort(s1, s2, partial=True, force_ascii=True):
     else:
         return ratio(sorted1, sorted2)
 
+
 def token_sort_ratio(s1, s2, force_ascii=True):
     """Return a measure of the sequences' similarity between 0 and 100
     but sorting the token before comparing.

diff --git a/fuzzywuzzy/process.py b/fuzzywuzzy/process.py
@@ -163,20 +163,21 @@ def extractOne(query, choices, processor=None, scorer=None, score_cutoff=0):
         return best_list[0]
     return None
 
-def dedupe (contains_dupes, threshold=70, scorer=fuzz.token_set_ratio):
-    """This convenience function takes a list of strings containing duplicates and uses fuzzy matching to identify 
-    and remove duplicates. Specifically, it uses the process.extract to identify duplicates that 
+
+def dedupe(contains_dupes, threshold=70, scorer=fuzz.token_set_ratio):
+    """This convenience function takes a list of strings containing duplicates and uses fuzzy matching to identify
+    and remove duplicates. Specifically, it uses the process.extract to identify duplicates that
     score greater than a user defined threshold. Then, it looks for the longest item in the duplicate list
-    since we assume this item contains the most entity information and returns that. It breaks string 
+    since we assume this item contains the most entity information and returns that. It breaks string
     length ties on an alphabetical sort.
-    
-    Note: as the threshold DECREASES the number of duplicates that are found INCREASES. This means that the 
-        returned deduplicated list will likely be shorter. Raise the threshold for fuzzy_dedupe to be less 
+
+    Note: as the threshold DECREASES the number of duplicates that are found INCREASES. This means that the
+        returned deduplicated list will likely be shorter. Raise the threshold for fuzzy_dedupe to be less
         sensitive.
-    
+
     Args:
         contains_dupes: A list of strings that we would like to dedupe.
-        threshold: the numerical value (0,100) point at which we expect to find duplicates. 
+        threshold: the numerical value (0,100) point at which we expect to find duplicates.
             Defaults to 70 out of 100
         scorer: Optional function for scoring matches between the query and
             an individual processed choice. This should be a function
@@ -193,22 +194,22 @@ def dedupe (contains_dupes, threshold=70, scorer=fuzz.token_set_ratio):
         """
 
     extractor = []
-    
+
     # iterate over items in *contains_dupes*
     for item in contains_dupes:
         # return all duplicate matches found
         matches = extract(item, contains_dupes, limit=None, scorer=scorer)
-        # filter matches based on the threshold 
+        # filter matches based on the threshold
         filtered = [x for x in matches if x[1] > threshold]
         # if there is only 1 item in *filtered*, no duplicates were found so append to *extracted*
         if len(filtered) == 1:
             extractor.append(filtered[0][0])
 
         else:
             # alpha sort
-            filtered = sorted(filtered, key = lambda x: x[0])
+            filtered = sorted(filtered, key=lambda x: x[0])
             # length sort
-            filter_sort = sorted(filtered, key = lambda x: len(x[0]), reverse=True)
+            filter_sort = sorted(filtered, key=lambda x: len(x[0]), reverse=True)
             # take first item as our 'canonical example'
             extractor.append(filter_sort[0][0])
 
@@ -217,7 +218,7 @@ def dedupe (contains_dupes, threshold=70, scorer=fuzz.token_set_ratio):
     for e in extractor:
         keys[e] = 1
     extractor = keys.keys()
-    
+
     # check that extractor differs from contain_dupes (e.g. duplicates were found)
     # if not, then return the original list
     if len(extractor) == len(contains_dupes):

diff --git a/fuzzywuzzy/utils.py b/fuzzywuzzy/utils.py
@@ -7,7 +7,6 @@
 PY3 = sys.version_info[0] == 3
 
 
-
 def validate_string(s):
     try:
         return len(s) > 0

diff --git a/test_fuzzywuzzy.py b/test_fuzzywuzzy.py
@@ -77,6 +77,7 @@ def test_fullProcessForceAscii(self):
         for s in self.mixed_strings:
             utils.full_process(s, force_ascii=True)
 
+
 class RatioTest(unittest.TestCase):
 
     def setUp(self):
@@ -371,12 +372,12 @@ def testWithCutoff(self):
 
         best = process.extractOne(query, choices, score_cutoff=50)
         self.assertTrue(best is None)
-        #self.assertIsNone(best) # unittest.TestCase did not have assertIsNone until Python 2.7
+        # self.assertIsNone(best) # unittest.TestCase did not have assertIsNone until Python 2.7
 
         # however if we had no cutoff, something would get returned
 
-        #best = process.extractOne(query, choices)
-        #self.assertIsNotNone(best)
+        # best = process.extractOne(query, choices)
+        # self.assertIsNotNone(best)
 
     def testWithCutoff2(self):
         choices = [
@@ -450,18 +451,17 @@ def test_dict_like_extract(self):
     def test_dedupe(self):
         """We should be able to use a list-like object for contains_dupes
         """
-        ## Test 1
+        # Test 1
         contains_dupes = ['Frodo Baggins', 'Tom Sawyer', 'Bilbo Baggin', 'Samuel L. Jackson', 'F. Baggins', 'Frody Baggins', 'Bilbo Baggins']
 
         result = process.dedupe(contains_dupes)
         self.assertTrue(len(result) < len(contains_dupes))
 
-
-        ## Test 2
+        # Test 2
         contains_dupes = ['Tom', 'Dick', 'Harry']
-        
+
         # we should end up with the same list since no duplicates are contained in the list (e.g. original list is returned)
-        deduped_list = ['Tom','Dick','Harry']
+        deduped_list = ['Tom', 'Dick', 'Harry']
 
         result = process.dedupe(contains_dupes)
         self.assertEqual(result, deduped_list)