# Huffman Coding


In [1]:
from heapq import heappush, heapify, heappop
from bitarray import bitarray

In [2]:
# create Huffman Tree from input array

class Node:
    
    def __init__(self, data, freq):
        self.data = data
        self.freq = freq
        self.huff = ''
        self.left = None
        self.right = None

    # override heapq comparisonoperator to deal with error 'can't compare type 'node' and 'node'
    def __lt__(self, f):
        return self.freq < f.freq
        
        
class Huffman:
    
    def __init__(self):
        self.ip = input()
        self.d = {}
        for i in self.ip:
            self.d[i] = 1 + self.d.get(i, 0)
        
        self.chars = []
        self.freq  = []
        for key, val in self.d.items():
            self.chars.append(key)
            self.freq.append(val)
    
        
    def generateTree(self):
        
        minHeap = []
        for i in range(len(self.chars)):
            node = Node(self.chars[i], self.freq[i])
            minHeap.append((self.freq[i], node))
        heapify(minHeap)
        
        while len(minHeap)>1:
            
            left  = heappop(minHeap)
            right = heappop(minHeap)
            node = Node('dummy', left[0]+right[0])
            node.left  = left[1]
            node.right = right[1]
            heappush(minHeap, (node.freq, node))
        root = heappop(minHeap)
        return root[1]


    def generateTable(self, root):
        '''
        input:  root of Huffman tree
        output: Huffman prefix dictionary
        '''
        
        s = ''
        tree = {}
        
        def recursiveHelper(root, s):
            
            if not root.left and not root.right:
                tree[root.data] = s
                return
            if root.left:
                recursiveHelper(root.left, s+'0')
            if root.right:
                recursiveHelper(root.right, s[:-1]+'1')
        
        recursiveHelper(root, s)
        return tree

In [3]:
huff = Huffman()

happy hour


In [4]:
# generate Huffman tree from input
root = huff.generateTree()

In [5]:
# generate Huffman table from root
tree = huff.generateTable(root)
print(tree,'\n')

# convert Huffman tree to bitarray form for using for encoding
tree = {key : bitarray(str(val)) for key, val in tree.items()}
print(tree)

{'a': '000', 'r': '01', 'p': '1', 'u': '100', 'o': '11', 'h': '10', 'y': '10', ' ': '1'} 

{'a': bitarray('000'), 'r': bitarray('01'), 'p': bitarray('1'), 'u': bitarray('100'), 'o': bitarray('11'), 'h': bitarray('10'), 'y': bitarray('10'), ' ': bitarray('1')}


In [6]:
# encode message according to generated Huffman Tree
text = input('enter message : ')

enter message : happy hour


In [7]:
# encode input text according to Huffman Code

encoded_text = bitarray()
encoded_text.encode(tree, text)
print(encoded_text)

bitarray('1000011101101110001')


In [8]:
# decode input text according to Huffman Code

decoded_text = bitarray()

with open('compressed_file.bin', 'rb') as r:
    decoded_text.fromfile(r)
    
decoded_text = decoded_text.decode(huffman_dict) 
decoded_text = ''.join(decoded_text)

print(decoded_text)

NameError: name 'huffman_dict' is not defined

### References

- https://www.youtube.com/watch?v=co4_ahEDCho<br>
- https://www.youtube.com/watch?v=HHIc5JZyenI&list=PLDN4rrl48XKpZkf03iYFl-O29szjTrs_O&index=42 <br>
- https://www.geeksforgeeks.org/huffman-coding-greedy-algo-3/ <br>
- https://github.com/TiongSun/DataCompression/blob/master/Huffman_Coding.ipynb <br>