# Problem Set 4: Huffman Coding
Kim Merchant

Here are some tools you'll need to implement the Huffman Coding algorithm.

In [13]:
from heapq import heappush, heappop

In [14]:
class Tree(object):
    def __init__(self, data, left, right):
        self.data = data
        self.left = left
        self.right = right

### 1) Huffman tree

Fill in the function below to build and return a Huffman tree given a string of text.

In [15]:
def huffman_tree(text):
    # Find the frequency of each letter
    freq = dict()
    for c in text:
        if c in freq:
            freq[c] += 1
        else:
            freq[c] = 1
            
    # Make leaves for each letter, and add these into a heap based on their frequencies
    heap = []
    for val in freq:
        (heappush(heap, (freq[val], Tree(val, None, None))))
    
    # Create parent nodes, starting with combining the two smallest-frequency letters
    aFull = heappop(heap)
    a = aFull[1]
    total = aFull[0]
    
    while len(heap) > 0:
        b = heappop(heap)
        
        # Determine if the leaf should be a left branch, or right
        if total < b[0]:
            curBranch = Tree(a.data + b[1].data, b[1], a)
        else:
            curBranch = Tree(a.data + b[1].data, a, b[1])
        a = curBranch
        total += b[0]
    return a

In [16]:
# Testing
text = "a"*8 + "b"*4 + "c"*2 + "d"*1
tree = huffman_tree(text)

### 2) Get code

Fill in the function below to return the code for a character according to a Huffman tree.

In [17]:
def get_code(char, tree):
    code = ""
    # Loop will end once the correct character has been reached in the tree
    while tree.left != None:
        # The correct leaf has not been passed
        if char in tree.data:
            if char in tree.left.data:
                curr = tree.left.data
                code = code + "0"
                return code + get_code(char, tree.left)
            elif char in tree.right.data:
                code = code + "1"
                return code + get_code(char, tree.right)
            else:
                # The correct leaf has been reached
                return code
    return code

In [18]:
# Testing
for char in "abcd":
    print(char, get_code(char, tree))

a 0
b 10
c 110
d 111


### 3) Encode

Fill in the function below to use a Huffman tree to encode a string of text.

In [19]:
def encode(text, tree):
    encoding = ""
    for char in text:
        encoding = encoding + get_code(char, tree)
    return encoding

In [20]:
# Testing
binary = encode(text, tree)
print(binary)

0000000010101010110110111


### 4) Get character

Fill in the function below to return the character encoded at an index in an encoded string.

In [21]:
def get_char(binary, index, tree):
    if tree.left == None:
        return tree.data
    elif binary[index] == "0":
        return get_char(binary, index+1, tree.left)
    else:
        return get_char(binary, index+1, tree.right)

In [22]:
# Testing
print(get_char(binary, 8, tree))

b


### 5) Decode

Fill in the function below to use a Huffman tree to decode an encoded string.

In [23]:
def decode(binary, tree):
    total = ""
    i = 0
    while i < len(binary):
        new = get_char(binary, i, tree)
        total = total + new
        
        # increment the index pointer by the length of 
        # the last character's binary representation
        i += len(get_code(new, tree))
    return total

In [24]:
# Testing
decoded = decode(binary, tree)
print(decoded)

aaaaaaaabbbbccd
