-
Notifications
You must be signed in to change notification settings - Fork 1
/
trie.py
129 lines (117 loc) · 4.59 KB
/
trie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pickle
from functools import reduce
class TrieNode(object):
"""
Implementation of a trie data-structure
"""
def __init__(self, char):
self.char = char
self.children = []
# Is it the last character of the word.
self.word_finished = False
# How many times this character appeared in the addition process
self.counter = 1
def add(self, word):
'''
Adding a word in the trie structure
'''
node = self
for char in word:
found_in_child = False
# Search for the character in the children of the present `node`
for child in node.children:
if child.char == char:
# We found it, increase the counter by 1 to keep track that another
# word has it as well
child.counter += 1
# And point the node to the child that contains this char
node = child
found_in_child = True
break
# We did not find it so add a new chlid
if not found_in_child:
new_node = TrieNode(char)
node.children.append(new_node)
# And then point node to the new child
node = new_node
# Everything finished. Mark it as the end of a word.
node.word_finished = True
def find_prefix(self, prefix):
'''
Check and return
1. If the prefix exists in any of the words we added so far
2. If yes then how may words actually have the prefix
'''
node = self
# If the root node has no children, then return False.
# Because it means we are trying to search in an empty trie
if not self.children:
return False, 0
for char in prefix:
char_not_found = True
# Search through all the children of the present `node`
for child in node.children:
if child.char == char:
# We found the char existing in the child.
char_not_found = False
# Assign node as the child containing the char and break
node = child
break
# Return False anyway when we did not find a char.
if char_not_found:
return False, 0
# Well, we are here means we have found the prefix. Return true to indicate that
# And also the counter of the last node. This indicates how many words have this
# prefix
return True, node.counter
def all_suffixes(self, prefix):
'''
Finds all the suffixes possible of a given input string from any given node
'''
results = set()
if self.word_finished:
results.add(prefix)
if not self.children:
return results
return results | reduce(lambda a, b: a | b, [node.all_suffixes(prefix + node.char) for node in self.children])
def autocomplete(self, prefix):
'''
Finds all the possible autocomplete options of a given string, by making use of the all_suffixes function.
First we traverse the trie for the letters in the prefix we have then call all_suffixes
We refine the list to make sure that the returned results have only letters of the alphabet
'''
node = self
for char in prefix:
if char not in [child.char for child in node.children]:
return None
for child in node.children:
if child.char == char:
node = child
auto_list = list(node.all_suffixes(prefix))
refined_auto_list = list()
for word in auto_list:
flag = 0
for char in word:
if not (('a' <= char <= 'z') or ('A' <= char <= 'Z') or (char == ' ')):
flag = 1
if flag == 0:
refined_auto_list.append(word)
return refined_auto_list
def main():
'''
Building the trie on our vocabulary and storing it.
'''
print('Building Trie Object...', end=' ')
import json
fileNames = ['ingredients', 'directions', 'dish_names', 'recipe_links']
trie = TrieNode('*')
for fileName in fileNames[:-1]:
with open("vocabulary/{}_autocomplete.json".format(fileName), 'r') as fp:
vocab_list = json.load(fp)
for word in vocab_list:
trie.add(word)
with open("my_trie.pickle", "wb") as fp:
pickle.dump(trie, fp)
print('Done.')
if __name__ == '__main__':
main()