# 8.0 Tries

## 8.1 Implement autocomplete

### Problem Statement
Implement an autocomplete feature for a search engine.  In other words given a string prefix, return all of the words in the index which start with the prefix.

#### Examples
Index: [`the`, `their`, `there`, `was`, `waste`]

Search Term: `the` <br/>
Returns: `the`, `their`, `there`

Search Term: `wa` <br/>
Returns: `was`, `waste`

Search Term: `wasp` <br/>
Returns: <>

In [1]:
from collections import defaultdict, deque, namedtuple
import unittest


class Trie(object):
    """Trie implemented using nested python dicts."""
    
    # Non-alphabet character used to identify end of word.
    EOW = '.'

    def __init__(self):
        self.index = defaultdict(dict)

    def add_word(self, word):
        node = self.index
        for ind, c in enumerate(word):
            if c in node:
                node = node[c]
            else:
                node[c] = {}
                node = node[c]
        if len(word) > 0:
            node[self.EOW] = ''

    def search(self, prefix):
        """Search for words starting with prefix."""
        assert not(len(prefix) < 1), 'invalid: len(word) < 1'

        matches, node = [], self.index

        # Find the furthest matching node in the tree.
        for c in prefix:
            if c not in node:
                return matches  # No matching prefix.
            node = node[c]

        # Perform a depth first traversal of the subtree
        # accumulating all of the words matching prefix.
        stack = deque([(node, '')])
        while len(stack) > 0:
            node, suffix = stack.popleft()
            for c in node.keys():
                if c == self.EOW:
                    matches.append(prefix + suffix)
                else:
                    stack.appendleft((node[c], suffix+c))
        
        return matches


class AutocompleteTest(unittest.TestCase):
    
    def setUp(self):
        self.words1 = ['t','the','their','there','was','waste']

    def test_autocomplete(self):
        case = namedtuple('case', ['words','prefix','expected'])
        cases = [
            case(self.words1, 't', ['t','the','their','there']),
            case(self.words1, 'th', ['the','their','there']),
            case(self.words1, 'the', ['the','their','there']),
            case(self.words1, 'them', []),
            case(self.words1, 'thei', ['their']),
            case(self.words1, 'their', ['their']),
            case(self.words1, 'theirs', []),
            case(self.words1, 'ther', ['there']),
            case(self.words1, 'there', ['there']),
            case(self.words1, 'therein', []),
            case(self.words1, 'w', ['was', 'waste']),
            case(self.words1, 'wb', []),
            case(self.words1, 'wa', ['was', 'waste']),
            case(self.words1, 'was', ['was', 'waste']),
            case(self.words1, 'wae', []),
            case(self.words1, 'wast', ['waste']),
            case(self.words1, 'waste', ['waste']),
            case(self.words1, 'wastes', []),
            case(self.words1, 'x', []),
        ]
        for c in cases:
            trie = Trie()
            for word in c.words:
                trie.add_word(word)
            rcv = trie.search(c.prefix)
            self.assertEqual(sorted(rcv), sorted(c.expected))


unittest.main(AutocompleteTest(), argv=[''], verbosity=2, exit=False)

test_autocomplete (__main__.AutocompleteTest) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.005s

OK


<unittest.main.TestProgram at 0x7f5f54594320>

## 8.2 PrefixMapSum

### Problem Statement
Implement a PrefixMapSum class with the following interface:
* `insert(key, value)` sets or updates an existing key with some value
  * This operation should complete in time proportional to size of key.
* `sum(prefix)` returns the sum of the values in the map for keys matching the prefix
  * This operation should complete in time proportional to size of key.

In [2]:
from collections import defaultdict, namedtuple
import unittest


class PrefixMapSum(object):
    """Efficiently store the sum of all prefixes."""

    # Dictionary entry used to store sum at a particular level.
    SUM = '.sum'

    def __init__(self):
        self.index = defaultdict(dict)
        self.words = {}

    def insert(self, key, value):
        """Insert or update a string with some value."""

        # Decrement value if this is an update.
        incr = value - self.words[key] if key in self.words else value
        self.words[key] = value
        
        # Update the value for each character from key.
        node = self.index
        for c in key:
            if c in node:
                node = node[c]
                node[self.SUM] += incr
            else:
                node[c] = {self.SUM: incr}
                node = node[c]

    def sum(self, prefix):
        """Return sum of all words from index begining with prefix."""
        
        prefixsum, node = 0, self.index
        for c in prefix:
            if c not in node:
                return 0  # No matching prefix.
            node = node[c]
            # Since sums are accumulated during insert, returned
            # sum is the sum at the last matching node in prefix.
            prefixsum = node[self.SUM]
        return prefixsum


class PrefixMapSumTest(unittest.TestCase):

    def test_prefix_map_sum(self):
        t = PrefixMapSum()

        # Check empty index.
        self.assertEqual(t.sum('t'), 0)

        # Add first word.
        t.insert('the', 3)
        self.assertEqual(t.sum('t'), 3)
        self.assertEqual(t.sum('th'), 3)
        self.assertEqual(t.sum('the'), 3)
        self.assertEqual(t.sum('ther'), 0)  # No matching prefix.

        # Add second word.
        t.insert('they', 4)
        self.assertEqual(t.sum('t'), 7)
        self.assertEqual(t.sum('th'), 7)
        self.assertEqual(t.sum('the'), 7)
        self.assertEqual(t.sum('they'), 4)
        self.assertEqual(t.sum('theys'), 0)  # No matching prefix.

        # Add third word.
        t.insert('them', 5)
        self.assertEqual(t.sum('t'), 12)
        self.assertEqual(t.sum('th'), 12)
        self.assertEqual(t.sum('the'), 12)
        self.assertEqual(t.sum('them'), 5)
        self.assertEqual(t.sum('thems'), 0)  # No matching prefix.

        # Update first word with larger value.
        t.insert('the', 6)
        self.assertEqual(t.sum('t'), 15)
        self.assertEqual(t.sum('th'), 15)
        self.assertEqual(t.sum('the'), 15)
        self.assertEqual(t.sum('they'), 4)  # Unchanged.
        self.assertEqual(t.sum('them'), 5)  # Unchanged

        # Update first word with smaller value.
        t.insert('the', 1)
        self.assertEqual(t.sum('t'), 10)
        self.assertEqual(t.sum('th'), 10)
        self.assertEqual(t.sum('the'), 10)
        self.assertEqual(t.sum('they'), 4)  # Unchanged.
        self.assertEqual(t.sum('them'), 5)  # Unchanged


unittest.main(PrefixMapSumTest(), argv=[''], verbosity=2, exit=False)

test_prefix_map_sum (__main__.PrefixMapSumTest) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.003s

OK


<unittest.main.TestProgram at 0x7f5f54517320>