In [3]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end_of_word = False

class Trie:
    def __init__(self):
        self.root = TrieNode()
    
    def insert(self,word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_end_of_word = True
    
    def dfs_with_parentheses(self,node=None,label = '',output = None):
        if output is None:
            output = []

        if node is None:
            node = self.root
        
        if label:
            output.append(f"({label}")

        for child in node.children:
            self.dfs_with_parentheses(node.children[child],child,output)
        
        if label:
            output.append(")")
        return output

    def size_of_subtree(self,output,node_label):
        open_idx = -1
        for i, token in enumerate(output):
            if token == f"({node_label}":
                open_idx = i
                break
        
        if open_idx == -1:
            return 0
        
        count = 0
        depth = 0

        for i in range(open_idx,len(output)):
            if "(" in output[i]:
                depth += 1
                count += 1
            elif ")" in output[i]:
                depth -= 1
                if depth == 0:
                    break
        return count

In [4]:
def load_words(file_path):
    with open(file_path, 'r') as file:
        words = file.read().splitlines()
    return words

In [9]:
words = load_words('words.txt') 
trie = Trie()

for word in words:
    trie.insert(word)


# Perform DFS to get the parenthesis-based representation
parenthesis_output = trie.dfs_with_parentheses()

# Count the number of nodes (number of opening parentheses) in the Trie
num_nodes = sum(1 for token in parenthesis_output if '(' in token)

# Estimate the size of the compact representation
# Each node in the Trie corresponds to an opening parenthesis "(" and a letter
# Each node requires an opening "(" and a closing ")" and stores a letter
estimated_size = num_nodes * 2  # One opening and one closing parenthesis per node

# Print the estimated size of the compact representation
print(f"Number of nodes in the Trie: {num_nodes}")
print(f"Estimated size of the compact representation (in terms of parentheses): {estimated_size}")


Number of nodes in the Trie: 1420646
Estimated size of the compact representation (in terms of parentheses): 2841292
