Permalink
Browse files

cleaned up utils.py

  • Loading branch information...
1 parent 3a8b462 commit def73d61f61edd4a7371353d32d9eabf244b66f0 @medecau medecau committed Nov 15, 2011
Showing with 6 additions and 49 deletions.
  1. +2 −8 fuzzywuzzy/benchmarks.py
  2. +4 −41 fuzzywuzzy/utils.py
View
@@ -3,7 +3,7 @@
from timeit import timeit
import utils
-iterations=100000
+iterations=100000*10
cirque_strings = [
"cirque du soleil - zarkana - las vegas",
@@ -33,12 +33,6 @@
u"a\xac\u1234\u20ac\U00008000"
]
-for s in cirque_strings:
- print 'Test for string: "%s"' % s
- # print 'Old: %f' % round(timeit('utils.remove_punctuationold(\'%s\')' % s, "import utils",number=iterations),4)
- print 'New: %f' % round(timeit('utils.remove_punctuation(\'%s\')' % s, "import utils",number=iterations),4)
-
-print
for s in choices:
print 'Test for string: "%s"' % s
@@ -56,5 +50,5 @@
for s in mixed_strings+cirque_strings+choices:
print 'Test for string: "%s"' % s
- print 'Old: %f' % round(timeit('utils.full_processold(\'%s\')' % s, "import utils",number=iterations),4)
+ #print 'Old: %f' % round(timeit('utils.full_processold(\'%s\')' % s, "import utils",number=iterations),4)
print 'New: %f' % round(timeit('utils.full_process(\'%s\')' % s, "import utils",number=iterations),4)
View
@@ -1,43 +1,16 @@
import string
-table_pattern = ',.-:'
-# Add chars whit ordinals over 127 to the table
-for i in range(128, 256):
- table_pattern+=chr(i)
-# translation table must have the same length
-table_spaces = ' '*len(table_pattern)
-punctuation_table = string.maketrans(table_pattern, table_spaces)
-
bad_chars=''
for i in range(128,256):
bad_chars+=chr(i)
-
-pro_badchars=bad_chars
-pro_table_from=string.punctuation+string.ascii_uppercase
-pro_table_to=' '*len(string.punctuation)+string.ascii_lowercase
-pro_table=string.maketrans(pro_table_from, pro_table_to)
+table_from=string.punctuation+string.ascii_uppercase
+table_to=' '*len(string.punctuation)+string.ascii_lowercase
+trans_table=string.maketrans(table_from, table_to)
def asciionly(s):
return s.translate(None, bad_chars)
-# encode as string, decode as unicode bytes
-'''
-def asciidammitold(x):
- if type(x) is str:
- try:
- return x.decode('ascii')
- except:
- return x.decode('ascii', 'ignore')
- elif type(x) is unicode:
- try:
- return x.encode('ascii').decode('ascii')
- except:
- return x.encode('ascii', 'ignore').decode('ascii')
- else:
- return asciidammit(unicode(x))
-'''
-
# remove non-ASCII characters from strings
def asciidammit(s):
if type(s) is str:
@@ -47,9 +20,6 @@ def asciidammit(s):
else:
return asciidammit(unicode(s))
-def remove_punctuation(s):
- return string.translate(s, punctuation_table)
-
def validate_string(s):
try:
if len(s)>0:
@@ -59,15 +29,8 @@ def validate_string(s):
except:
return False
-def full_processold(s):
- s = s.lower()
- s = s.strip()
- s = remove_punctuation(s)
- x = asciidammit(s)
- return x
-
def full_process(s):
- return s.translate(pro_table, bad_chars).strip()
+ return s.translate(trans_table, bad_chars).strip()

0 comments on commit def73d6

Please sign in to comment.