In [1]:
class Node:
    def __init__(self, char, frequency, left=None, right=None):
        self.char = char
        self.frequency = frequency
        self.left = left
        self.right = right

In [2]:
# define class binary tree
class BinaryTree:
    def __init__(self, root):
        self.root = root   # class Node
        self.frequency = root.frequency  # to simplify heap implementation

In [3]:
# Combine two binary trees into one
def new_binary_tree(T1, T2):
    root_T1 = T1.root
    root_T2 = T2.root
    freq_T1 = T1.root.frequency
    freq_T2 = T2.root.frequency
    root = Node(None, freq_T1+freq_T2, root_T1, root_T2)
    return BinaryTree(root)

In [4]:
# Tree traverasal algorithm to present final results
def in_order(node, code):
    # reach leaf
    if (not node.left) and (not node.right):
        print("Char {} with code {} and frequency {}".format(node.char, code, node.frequency))
    if node.left:
        new_code = code + "0"
        in_order(node.left, new_code)
    if node.right:
        new_code = code + "1"
        in_order(node.right, new_code)
        
def in_order_traversal(T):
    in_order(T.root, "")
    

In [5]:
class PriorityQueue:
    def __init__(self):
        # put index 0 to empty, use index 1 as start
        self.queue = [None]
        self.size = 0
    
    def insert(self, tree):
        self.queue.append(tree)
        self.size += 1
        self.swift_up(self.size)
        
    def remove_min(self):
        # check for boundary condition
        if self.size == 0:
            print("The queue is already empty")
            return
        if self.size == 1:
            self.size = 0
            return self.queue.pop()
        # remove the last element
        tmp = self.queue.pop()
        # store the first element to return
        res = self.queue[1]
        # move last element to first
        self.queue[1] = tmp
        # call swift_down
        self.size -= 1
        self.swift_down(1)
        return res
        
    def swift_up(self, index):
        if index == 1:
            return
        node = self.queue[index]
        parent = self.queue[index // 2]
        if node.frequency < parent.frequency:
            tmp = parent
            self.queue[index//2] = node
            self.queue[index] = tmp
            self.swift_up(index//2)


    def swift_down(self, index):
        node = self.queue[index]
        left_index = 2*index
        right_index = 2*index+1
        min_frequency = -1
        # both index out of range
        if left_index > self.size:
            return
        # left index in range but right index out of range
        if right_index > self.size:
            left = self.queue[left_index]
            min_frequency = min(node.frequency, left.frequency)
        # all indices in range
        else:
            right = self.queue[right_index]
            left = self.queue[left_index]
            min_frequency = min(node.frequency, right.frequency)
            min_frequency = min(min_frequency, left.frequency)
        
        # check which is minimum
        if min_frequency == node.frequency:
            return
        elif min_frequency == left.frequency:
            tmp = left
            self.queue[left_index] = node
            self.queue[index] = tmp
            self.swift_down(left_index)
        else:
            tmp = right
            self.queue[right_index] = node
            self.queue[index] = tmp
            self.swift_down(right_index)

In [6]:
def huffman(S):
    # count char frequency in string S
    char_to_count = {}
    total_char = 0
    for c in S:
        if c in char_to_count:
            char_to_count[c] += 1
        else:
            char_to_count[c] = 1
        
    stats_list = []
    for k, v in char_to_count.items():
        stats_list.append((k, v))
    
    # insert into Q
    Q = PriorityQueue()
    for e in stats_list:
        leaf = Node(e[0], int(e[1]))
        Q.insert(BinaryTree(leaf))
                 
    while Q.size > 1:
        left = Q.remove_min()
        right = Q.remove_min()
        new_tree = new_binary_tree(left, right)
        Q.insert(new_tree)
    
    res = Q.remove_min()
    return res

In [7]:
S = "LJFSOPDJFLSDFOPOJFODFJSOPJWOENROPFSHJNFWOIPEHNFOPNSFOJKSNDPOFIHSLHNLHFDOSUJLKSFJSLFJFSDOLFJLS"
T = huffman(S)
in_order_traversal(T)

Char L with code 000 and frequency 8
Char E with code 00100 and frequency 2
Char K with code 00101 and frequency 2
Char H with code 0011 and frequency 5
Char J with code 010 and frequency 11
Char D with code 0110 and frequency 6
Char N with code 0111 and frequency 6
Char S with code 100 and frequency 12
Char I with code 10100 and frequency 2
Char W with code 101010 and frequency 2
Char R with code 1010110 and frequency 1
Char U with code 1010111 and frequency 1
Char P with code 1011 and frequency 7
Char O with code 110 and frequency 13
Char F with code 111 and frequency 15
