## Huffman Compression Algorithm

## Character Frequency

In [1]:
def char_freq_table(input_string):
    table = {}
    for char in input_string:
        if char in table.keys():
            table[char] += 1
        else:
            table[char] = 1

    return table

In [2]:
table1 = char_freq_table("Mississipi")

In [3]:
table1

{'M': 1, 'i': 4, 's': 4, 'p': 1}

In [4]:
table2 = char_freq_table("APPLES AND BANANAS")

In [5]:
table2

{'A': 5, 'P': 2, 'L': 1, 'E': 1, 'S': 2, ' ': 2, 'N': 3, 'D': 1, 'B': 1}

In [6]:
import queue

In [96]:
class LeafNode():

    def __init__(self, frequency=0, character=''):
        self.frequency = frequency
        self.character = character

    def __str__(self,):
        return f"({self.character}, {self.frequency})"

    def __eq__(self,other):
        if isinstance(other, LeafNode):
            return self.frequency == other.frequency
        return False
    
    def __lt__(self,other):
        if isinstance(other, LeafNode):
            return self.frequency < other.frequency
        return False
    
    def __gt__(self,other):
        if isinstance(other, LeafNode):
            return self.frequency > other.frequency
        return False

    def set_frequency(self,num):
        self.frequency = num

    def set_character(self, char):
        self.character = char

    def set_node(self, char, freq):
        self.frequency = freq
        self.character = char

In [129]:
class InternalNode():

    def __init__(self, frequency_sum=0, leftNode=None, rightNode=None):
        self.frequency = frequency_sum
        self.left_child = leftNode
        self.right_child = rightNode

    def __str__(self,):
        return f"{self.frequency}: [{str(self.left_child)} , {str(self.right_child)}]"

    def __eq__(self,other):
        if isinstance(other, InternalNode):
            return self.frequency == other.frequency
        return False
    
    def __lt__(self,other):
        if isinstance(other, InternalNode):
            return self.frequency < other.frequency
        return False
    
    def __gt__(self,other):
        if isinstance(other, InternalNode):
            return self.frequency > other.frequency
        return False

In [130]:
def HuffmanBuildTree(input_string):
    table = char_freq_table(input_string)
    
    nodes = queue.PriorityQueue()

    for char, freq in table.items():
        newLeaf = LeafNode(freq, char)
        nodes.put(newLeaf)

    # Make parent nodes up to the root
    while nodes.qsize() > 1:
        # Dequeue lowest prio nodes
        left = nodes.get()
        right = nodes.get()

        # Create parent with sum of two frequencies
        freq_sum = right.frequency + left.frequency
        parent = InternalNode(freq_sum, left, right)

        nodes.put(parent)

    return nodes.get()

In [136]:
treeRoot = HuffmanBuildTree("BANANAS AND APPLES")

In [137]:
treeRoot

<__main__.InternalNode at 0x291a8c714f0>

In [138]:
print(treeRoot)

18: [7: [2: [(L, 1) , (E, 1)] , 5: [( , 2) , (N, 3)]] , 11: [(A, 5) , 6: [2: [(B, 1) , (D, 1)] , 4: [(P, 2) , (S, 2)]]]]


In [139]:
print(treeRoot.left_child)

7: [2: [(L, 1) , (E, 1)] , 5: [( , 2) , (N, 3)]]


In [140]:
print(treeRoot.right_child)

11: [(A, 5) , 6: [2: [(B, 1) , (D, 1)] , 4: [(P, 2) , (S, 2)]]]


In [141]:
test = LeafNode()
test2=InternalNode()

In [143]:
type(LeafNode())

__main__.LeafNode

In [152]:
def HuffmanGetCodes(node, prefix, output):
    if type(node) is type(LeafNode()):
        output[node.character] = prefix
    else:
        HuffmanGetCodes(node.left_child, prefix + "0", output)
        HuffmanGetCodes(node.right_child, prefix + "1", output)

    return output

In [153]:
root = HuffmanBuildTree("BANANAS")

In [154]:
codes = HuffmanGetCodes(root, "", dict())

In [155]:
codes

{'B': '00', 'S': '01', 'N': '10', 'A': '11'}

In [156]:
codesTreeRoot = HuffmanGetCodes(treeRoot, "", dict())

In [157]:
codesTreeRoot

{'L': '000',
 'E': '001',
 ' ': '010',
 'N': '011',
 'A': '10',
 'B': '1100',
 'D': '1101',
 'P': '1110',
 'S': '1111'}