## **Huffman Coding**

In [105]:
from time import time
from random import randint
import heapq

In [106]:
class Node:
    def __init__(self,data,freq,leaf):
        self.data = data
        self.freq = freq
        self.leaf = leaf
        self.left = None
        self.right = None

    # for heap comparison
    def __lt__(self,other):
        return self.freq < other.freq

In [107]:
def create_huffman_tree(string):
    # store characters and frequencies
    data = {}
    for c in string:
        if c in data:
            data[c] += 1
        else:
            data[c] = 1

    heap = []
    for c,freq in data.items():
        heapq.heappush(heap,Node(c,freq,True))

    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)

        internal = Node('#',left.freq+right.freq,False)
        internal.left = left
        internal.right = right

        heapq.heappush(heap,internal)
    
    return heapq.heappop(heap)


def huffman_codes(root,code,codes):
    if not root:
        return
    if root.leaf:
        codes[root.data] = code
    huffman_codes(root.left,code+"0",codes)
    huffman_codes(root.right,code+"1",codes)
    return codes

def encode_huffman(codes,string):
    encoding = ""
    for c in string:
        encoding += codes[c]
    return encoding

def decode_huffman(tree, encoding):
    decoding = ""
    root = tree
    for bit in encoding:
        if bit == "0":
            root = root.left
        else:
            root = root.right
        
        if root.leaf:
            decoding += root.data
            root = tree
    return decoding
        



Coding + Decoding Tests

In [111]:
string = "huffman coding example"
tree = create_huffman_tree(string)
codes = huffman_codes(tree,"",{})
encoding = encode_huffman(codes,string)
print(encoding)
decoding = decode_huffman(tree,encoding)
print(decoding)

1100001100010011011000010101011101111100111111000101100110101001101000010111101111111100
huffman coding example
