Skip to content

Commit

Permalink
Modernize with a class
Browse files Browse the repository at this point in the history
  • Loading branch information
offby1 committed Dec 29, 2013
1 parent 6c59992 commit 5b212f1
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 123 deletions.
93 changes: 48 additions & 45 deletions python/anagrams.py
@@ -1,54 +1,57 @@
#!/usr/bin/env python

from bag import bag, bag_empty, bags_equal, subtract_bags
from __future__ import print_function

from bag import Bag
import dict
from optparse import OptionParser
from types import *
import os
import profile
import sys

try:
import psyco
psyco.full()
print >> sys.stderr, "Psyco loaded OK"
print("Psyco loaded OK",
file=sys.stderr)
except ImportError, e:
print >> sys.stderr, "Psyco didn't load:", e
print("Psyco didn't load: {}".format(e),
file=sys.stderr)


def combine (words, anagrams):
def combine(words, anagrams):

rv = []
for w in words:
for a in anagrams:
rv.append ([w] + a)
rv.append([w] + a)

return rv


def anagrams (bag, dict):
def anagrams(bag, dict):

rv = []

for words_processed in range (0, len (dict)):
for words_processed in range(0, len(dict)):
entry = dict[words_processed]
key = entry[0]
words = entry[1]

smaller_bag = subtract_bags (bag, key)
if (not smaller_bag):
smaller_bag = bag.subtract(key)
if not smaller_bag:
continue

if (bag_empty (smaller_bag)):
if smaller_bag.empty():
for w in words:
rv.append ([w])
rv.append([w])
continue

from_smaller_bag = anagrams (smaller_bag,
from_smaller_bag = anagrams(smaller_bag,
dict[words_processed:])
if (not len (from_smaller_bag)):
if not len(from_smaller_bag):
continue

rv.extend (combine (words, from_smaller_bag))
rv.extend(combine(words, from_smaller_bag))

return rv

Expand All @@ -66,14 +69,15 @@ def anagrams (bag, dict):

(options, args) = parser.parse_args()

if (0 == len(args)):
parser.print_help ()
sys.exit (0)
if 0 == len(args):
parser.print_help()
sys.exit(0)

dict_hash_table = dict.snarf_dictionary (options.dict_fn)
dict_hash_table = dict.snarf_dictionary(options.dict_fn)

the_phrase = bag (args[0])
print >> sys.stderr, "Pruning dictionary. Before:", len (dict_hash_table.keys ()), "bags ...",
the_phrase = Bag(args[0])
print("Pruning dictionary. Before: {} bags ...".format(len(dict_hash_table.keys())),
file=sys.stderr, end='')

# Now convert the hash table to a list, longest entries first. (This
# isn't necessary, but it makes the more interesting anagrams appear
Expand All @@ -82,39 +86,38 @@ def anagrams (bag, dict):
# While we're at it, prune the list, too. That _is_ necessary for the
# program to finish before you grow old and die.


the_dict_list = [[k, dict_hash_table[k]]
for k in dict_hash_table.keys ()
if (subtract_bags (the_phrase, k))]
for k in dict_hash_table.keys()
if the_phrase.subtract(k)]

# Note that sorting entries "alphabetically" only makes partial sense,
# since each entry is (at least potentially) more than one word (all
# since each entry is(at least potentially) more than one word(all
# the words in an entry are anagrams of each other).
def biggest_first_then_alphabetically (a, b):
def biggest_first_then_alphabetically(a, b):
a = a[1][0]
b = b[1][0]
result = cmp (len (b), len (a))
if (not result):
result = cmp (a, b)
result = cmp(len(b), len(a))
if not result:
result = cmp(a, b)
return result

the_dict_list.sort (biggest_first_then_alphabetically)

the_dict_list.sort(biggest_first_then_alphabetically)

print >> sys.stderr, "Pruned dictionary. After:", len (the_dict_list), "bags."
profile.Profile.bias = 8e-06 # measured on dell optiplex, Ubuntu 8.04 ("Hoary Hedgehog")
print(" After: {} bags.".format(len(the_dict_list)),
file=sys.stderr)
profile.Profile.bias = 8e-06 # measured on dell optiplex, Ubuntu 8.04("Hoary Hedgehog")
if "psyco" in globals():
result = anagrams (the_phrase, the_dict_list)
result = anagrams(the_phrase, the_dict_list)
else:
profile.run("result = anagrams (the_phrase, the_dict_list)")
print >> sys.stderr, len(result), "anagrams of", sys.argv[1], ":"
profile.run("result = anagrams(the_phrase, the_dict_list)")

print >> sys.stderr, "%d anagrams of %s" % (len(result), args[0])
print("{} anagrams of {}".format(len(result), args[0]),
file=sys.stderr)
for a in result:
sys.stdout.write ("(")
for i, w in enumerate (a):
if (i):
sys.stdout.write (" ")
sys.stdout.write (w)
sys.stdout.write (")")
print
sys.stdout.write("(")
for i, w in enumerate(a):
if i > 0:
sys.stdout.write(" ")
sys.stdout.write(w)
sys.stdout.write(")")
print()
95 changes: 51 additions & 44 deletions python/bag.py
@@ -1,57 +1,59 @@
#!/usr/bin/env python

import string
import sys
from __future__ import print_function

import collections
import functools
import unittest

def bag_empty (b):
return b == 1

def bag (str):
str = string.lower (str)
primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101]
rv = 1
@functools.total_ordering
class Bag(object):
def __init__(self, str):
self.c = collections.Counter([c.lower() for c in str])

def empty(self):
return len(self.c) == 0

def __eq__(self, other):
return self.c == other.c

def __lt__(self, other):
return self.c < other.c

for c in str:
if (c >= 'a') and (c <= 'z'):
rv *= primes [ord (c) - ord ('a')]
def subtract(self, other):
top = collections.Counter(self.c)
top.subtract(other.c)
if any([x < 0 for x in top.values()]):
return False
return Bag(top.elements())

return rv
def __hash__(self):
return str(self).__hash__()

def bags_equal (s1, s2):
return s1 == s2
def __str__(self):
return ''.join(self.c.elements())

def subtract_bags (b1, b2):
remainder = b1 % b2
if (0 == remainder):
return b1 / b2
else:
return 0

class WhatchaMaDingy (unittest.TestCase):
def __init__ (self, methodName='runTest'):
class WhatchaMaDingy(unittest.TestCase):
def __init__(self, methodName='runTest'):
self.done = False
unittest.TestCase.__init__ (self, methodName)
unittest.TestCase.__init__(self, methodName)

def testAlWholeLottaStuff (self):
self.assert_ (bag_empty (bag ("")))
def testAlWholeLottaStuff(self):
self.assert_(Bag("").empty())

self.assert_ (not (bag_empty (bag ("a"))))
self.assert_(not(Bag("a").empty()))

self.assert_ (bags_equal (bag ("abc"),
bag ("cba")))
self.assert_(Bag("abc") == Bag("cba"))

self.assert_ (not (bags_equal (bag ("abc"),
bag ("bc"))))
self.assert_(Bag("abc") != Bag("bc"))

self.assert_ (bags_equal (bag ("a"),
subtract_bags (bag("ab"),
bag ("b"))))
self.assert_ (not (subtract_bags (bag ("a"),
bag ("b"))))
self.assert_(Bag("a") == Bag("ab").subtract(Bag("b")))

self.assert_ (not (subtract_bags (bag ("a"),
bag ("aa"))))
self.assert_(not(Bag("a").subtract(Bag("b"))))

self.assert_(not(Bag("a").subtract(Bag("aa"))))

silly_long_string = "When first I was a wee, wee lad\n\
Eating on my horse\n\
Expand All @@ -63,14 +65,19 @@ def testAlWholeLottaStuff (self):
With dimples on your tie."

ever_so_slightly_longer_string = silly_long_string + "x"
self.assert_ (bags_equal (bag ("x"),
subtract_bags (bag (ever_so_slightly_longer_string),
bag (silly_long_string))))
self.assert_(Bag("x") == Bag(ever_so_slightly_longer_string).subtract(Bag(silly_long_string)))

self.assert_(Bag("abc") == Bag("ABC"))

self.done = True

self.assert_ (bags_equal (bag ("abc"),
bag ("ABC")))

self.done = True;
class Hashable(unittest.TestCase):
def testIt(self):
h = {}
h[Bag('hello')] = 3
self.assert_(Bag('hello') in h)
self.assert_(h[Bag('hello')] == 3)

if __name__ == "__main__":
exit(unittest.main ())
exit(unittest.main())
70 changes: 36 additions & 34 deletions python/dict.py
Expand Up @@ -7,38 +7,39 @@
import cPickle
import os
import unittest
from stat import *
from bag import bag, bag_empty, bags_equal, subtract_bags
from bag import Bag

has_a_vowel_re = re.compile(r'[aeiouy]')
long_enough_re = re.compile(r'^i$|^a$|^..')
non_letter_re = re.compile(r'[^a-z]')

has_a_vowel_re = re.compile (r'[aeiouy]')
long_enough_re = re.compile (r'^i$|^a$|^..')
non_letter_re = re.compile (r'[^a-z]')

def word_acceptable(w):
if non_letter_re.search (w):
if non_letter_re.search(w):
return False
if (not long_enough_re.match (w)):
if(not long_enough_re.match(w)):
return False
if (not has_a_vowel_re.search (w)):
if(not has_a_vowel_re.search(w)):
return False

return True

default_dict_name =os.path.join(os.path.dirname(__file__), "../words.utf8")
default_dict_name = os.path.join(os.path.dirname(__file__), "../words.utf8")


def snarf_dictionary_from_IO (I):
def snarf_dictionary_from_IO(I):
print >> sys.stderr, "Snarfing", I
hash_table = {}
for w in re.findall (r'.+', I.read ()):
w = string.lower (w)
for w in re.findall(r'.+', I.read()):
w = string.lower(w)

if not word_acceptable(w):
continue

key = bag(w)
if hash_table.has_key (key):
if (0 == hash_table[key].count (w)): # avoid duplicates
hash_table[key].append (w)
key = Bag(w)
if key in hash_table:
if(0 == hash_table[key].count(w)): # avoid duplicates
hash_table[key].append(w)
else:
hash_table[key] = [w]

Expand All @@ -47,42 +48,43 @@ def snarf_dictionary_from_IO (I):

hash_cache = os.path.join(os.path.dirname(__file__), "hash.cache")

def snarf_dictionary (fn):

def snarf_dictionary(fn):
try:
fh = open (hash_cache, "rb")
rv= cPickle.load (fh)
fh = open(hash_cache, "rb")
rv = cPickle.load(fh)
print >> sys.stderr, "Reading cache", hash_cache, "instead of dictionary", fn
except:
fh = open (fn, "r")
rv = snarf_dictionary_from_IO (fh)
fh.close ()
fh = open (hash_cache, "wb")
cPickle.dump (rv, fh, 2)
fh = open(fn, "r")
rv = snarf_dictionary_from_IO(fh)
fh.close()
fh = open(hash_cache, "wb")
cPickle.dump(rv, fh, 2)

fh.close ()
fh.close()
return rv


if __name__ == "__main__":
class TestStuff(unittest.TestCase):
def setUp(self):
self.fake_input = "cat\ntac\nfred\n"
self.fake_dict = snarf_dictionary_from_IO (StringIO.StringIO (self.fake_input))
self.fake_dict = snarf_dictionary_from_IO(StringIO.StringIO(self.fake_input))

def test_word_acceptable(self):
self.assert_(word_acceptable("dog"))
self.assertFalse (word_acceptable("C3PO"))
self.assertFalse(word_acceptable("C3PO"))
d = snarf_dictionary(os.path.join(default_dict_name))
self.assertEqual(66965, len(d))
self.assertEqual(72794, sum(len(words) for words in d.values()))

def test_this_and_that(self):
self.assert_ (2 == len (self.fake_dict.keys ()))
cat_hits = self.fake_dict[bag ("cat")]
self.assert_ (2 == len (cat_hits))
self.assert_ (cat_hits[0] == "cat")
self.assert_ (cat_hits[1] == "tac")
self.assert_ (1 == len (self.fake_dict[bag ("fred")]))
self.assert_ (self.fake_dict[bag ("fred")][0] == "fred")
self.assert_(2 == len(self.fake_dict.keys()))
cat_hits = self.fake_dict[Bag("cat")]
self.assert_(2 == len(cat_hits))
self.assert_(cat_hits[0] == "cat")
self.assert_(cat_hits[1] == "tac")
self.assert_(1 == len(self.fake_dict[Bag("fred")]))
self.assert_(self.fake_dict[Bag("fred")][0] == "fred")

unittest.main()

0 comments on commit 5b212f1

Please sign in to comment.