In [1]:
from queue import PriorityQueue
from collections import defaultdict
import heapq
import pandas as pd
import numpy as np

In [44]:
class TreeNode:    
   
    def __init__(self, val, parent = None, child_type = None, print_step = False):
        self.val = val
        self.left = None
        self.right = None
        self.parent = parent
        self.child_type = child_type
        self.print_step = print_step
    
    def print_tree(self):
        lines, *_ = self.display()
        for line in lines:
            print(line)
            
        
    def display(self):
        
        # No child.
        if not self.left and not self.right:
            line = str(self.val)
            width = len(line)
            height = 1
            middle = width // 2
            return [line], width, height, middle

        # Only left child.
        elif self.left and not self.right:
            lines, n, p, x = self.left.display()
            s = str(self.val)
            u = len(s)
            first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s
            second_line = x * ' ' + '/' + (n - x - 1 + u) * ' '
            shifted_lines = [line + u * ' ' for line in lines]
            return [first_line, second_line] + shifted_lines, n + u, p + 2, n + u // 2

        # Only right child.
        elif not self.left and self.right:
            lines, n, p, x = self.right.display()
            s = str(self.val)
            u = len(s)
            first_line = s + x * '_' + (n - x) * ' '
            second_line = (u + x) * ' ' + '\\' + (n - x - 1) * ' '
            shifted_lines = [u * ' ' + line for line in lines]
            return [first_line, second_line] + shifted_lines, n + u, p + 2, u // 2

        # Two children.
        else:
            left, n, p, x = self.left.display()
            right, m, q, y = self.right.display()
            s = str(self.val)
            u = len(s)
            first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s + y * '_' + (m - y) * ' '
            second_line = x * ' ' + '/' + (n - x - 1 + u + y) * ' ' + '\\' + (m - y - 1) * ' '
            if p < q:
                left += [n * ' '] * (q - p)
            elif q < p:
                right += [m * ' '] * (p - q)
            zipped_lines = zip(left, right)
            lines = [first_line, second_line] + [a + u * ' ' + b for a, b in zipped_lines]
            return lines, n + m + u, max(p, q) + 2, n + u // 2
        
        
class Huffman:
    def __init__(self, characters = None, frequency = None, input_string = None):
        self.characters = characters
        self.frequency = frequency  
        self.input_string = input_string
        if self.input_string:
            self.process_string()
        self.prepare_object_map()
        
    def process_string(self):
        self.char_map = {}
        for c in self.input_string:
            if c == ' ':
                c = 'SP'

            if not self.char_map.get(c):
                self.char_map[c] = 1
            else:
                self.char_map[c] +=1
        self.characters = [k for k in self.char_map.keys()]
        self.frequency = [v for v in self.char_map.values()]
    
    def prepare_object_map(self):
        self.object_map = defaultdict(list)
        for i in range(len(self.characters)):
            self.object_map[self.frequency[i]].append(self.characters[i])
        #{c:o for o, c in zip(self.characters, self.frequency)}
        #print(self.object_map)
        
    def construct_tree(self):
         
        pq = PriorityQueue()
        for x in self.frequency:
            pq.put(x)
        
        self.root = None
        
        node_dict = defaultdict(list)
        self.leaf_dict = defaultdict(list)
        #{x:[TreeNode(x)] for x in self.frequency}
        for x in self.frequency:
            t = TreeNode(x)
            node_dict[x].append(t)
            self.leaf_dict[x].append(t)
        
        #self.leaf_dict = {}
        
#         for k,v in node_dict.items():
#             self.leaf_dict[k] = v
        
        while not pq.empty() and pq.qsize() > 1:
            n1 = pq.get()
            n2 = pq.get()
            #print(n1, n2)
            sum_n = int(n1 + n2)
            #print(sum_n)
            node = TreeNode(sum_n)
            node.left = node_dict.get(n1)[0]
            node.left.parent = node
            node.left.child_type = 'left'
            node_dict.get(n1).remove(node.left)
            node.right = node_dict.get(n2)[0]
            node.right.parent = node
            node.right.child_type = 'right'
            node_dict.get(n2).remove(node.right)
            
#             if not node_dict.get(sum_n):
#                 node_dict[sum_n] = []
            node_dict[sum_n].append(node)
            pq.put(sum_n)
            self.root = node
        print('Huffman Tree:\n')
        self.root.print_tree()
        self.get_huffman_codes()
        
    def get_huffman_codes(self):
        self.code_dict = defaultdict(PriorityQueue)
        #print(self.leaf_dict)
        print('\n\nAssigning weight 0 to the left edges and weight 1 to the right edges')
        #print(self.leaf_dict)
        for k,v in self.leaf_dict.items():
            #code = ''
            #print(k,v)
            for node in v:
                #print(node)
                code = ''
                while node:
                    if node.child_type =='left':
                        code +='0'
                    elif node.child_type =='right':
                        code +='1'
                    node = node.parent
                code = code[::-1]
                #print(code)
                self.code_dict[k].put(code)
        #print(self.code_dict)
        df_huffman = pd.DataFrame(np.array([self.characters, self.frequency]).T, columns=['Characters', 'Frequency'])
        df_huffman['Huffman Code'] = df_huffman['Frequency'].apply(lambda x: self.code_dict[int(x)].get())
        df_huffman['No of bits'] = df_huffman['Huffman Code'].apply(lambda x: len(x))
        display(df_huffman)
        #print(self.code_dict)

In [45]:
h = Huffman(characters = ['I','II','III','VI','X','XVI'], frequency=[121, 389, 127, 645, 237,198])

In [46]:
h.construct_tree()

Huffman Tree:

  _1717___________                 
 /                \                
645         ____1072__________     
           /                  \    
         _435_           ____637_  
        /     \         /        \ 
       198   237      _248_     389
                     /     \       
                    121   127      


Assigning weight 0 to the left edges and weight 1 to the right edges


Unnamed: 0,Characters,Frequency,Huffman Code,No of bits
0,I,121,1100,4
1,II,389,111,3
2,III,127,1101,4
3,VI,645,0,1
4,X,237,101,3
5,XVI,198,100,3


In [47]:
h = Huffman(input_string='THE CAT SAT ON THE MAT')

In [48]:
h.construct_tree()

Huffman Tree:

   _22______          
  /         \         
 10       _12___      
/  \     /      \     
5  5    _5     _7_    
       /  \   /   \   
       2  3   3   4_  
      / \    / \ /  \ 
      1 1    1 2 2  2 
                   / \
                   1 1


Assigning weight 0 to the left edges and weight 1 to the right edges


Unnamed: 0,Characters,Frequency,Huffman Code,No of bits
0,T,5,0,2
1,H,2,1101,4
2,E,2,1110,4
3,SP,5,1,2
4,C,1,1000,4
5,A,3,101,3
6,S,1,1001,4
7,O,1,1100,4
8,N,1,11110,5
9,M,1,11111,5
