Permalink
Browse files

faster remove_punctuation

  • Loading branch information...
medecau committed Nov 3, 2011
1 parent c33992a commit 7b6590c5833c949bf0fb78bada0735a6552b19db
Showing with 14 additions and 1 deletion.
  1. +14 −1 fuzzywuzzy/utils.py
View
@@ -1,3 +1,5 @@
import string
# encode as string, decode as unicode bytes
def asciidammit(x):
if type(x) is str:
@@ -16,11 +18,22 @@ def asciidammit(x):
x = unicode(x)
return asciidammit(x)
def remove_punctuation(s):
def remove_punctuationold(s):
if s is None: return s
s = s.replace(","," ").replace("."," ").replace("-"," ").replace(":"," ")
return s
table_pattern = ',.-:'
# Add chars whit ordinals over 127 to the table
for i in range(128, 256):
table_pattern+=chr(i)
# translation table must have the same length
table_spaces = ' '*len(table_pattern)
punctuation_table = string.maketrans(table_pattern, table_spaces)
def remove_punctuation(s):
return string.translate(s, punctuation_table)
def validate_string(s):
if s is None: return False
try:

0 comments on commit 7b6590c

Please sign in to comment.