Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

faster remove_punctuation

  • Loading branch information...
commit 7b6590c5833c949bf0fb78bada0735a6552b19db 1 parent c33992a
Pedro Rodrigues medecau authored
Showing with 14 additions and 1 deletion.
  1. +14 −1 fuzzywuzzy/utils.py
15 fuzzywuzzy/utils.py
View
@@ -1,3 +1,5 @@
+import string
+
# encode as string, decode as unicode bytes
def asciidammit(x):
if type(x) is str:
@@ -16,11 +18,22 @@ def asciidammit(x):
x = unicode(x)
return asciidammit(x)
-def remove_punctuation(s):
+def remove_punctuationold(s):
if s is None: return s
s = s.replace(","," ").replace("."," ").replace("-"," ").replace(":"," ")
return s
+table_pattern = ',.-:'
+# Add chars whit ordinals over 127 to the table
+for i in range(128, 256):
+ table_pattern+=chr(i)
+# translation table must have the same length
+table_spaces = ' '*len(table_pattern)
+punctuation_table = string.maketrans(table_pattern, table_spaces)
+
+def remove_punctuation(s):
+ return string.translate(s, punctuation_table)
+
def validate_string(s):
if s is None: return False
try:
Please sign in to comment.
Something went wrong with that request. Please try again.