In [413]:
#Binary trees implementation using node representation.
#1 Node
class Node:
    def __init__(self,char,value):
        # Define this '-1' as the summing node, if both false nodes, it is the leaf node.
        self.char     = char
        self.value    = value
        self.codeword = None
        self.left  = False
        self.right = False
    def display_node(self):
        print("-------------------")
        print("char:",self.char,"value:",self.value,"codeword:",self.codeword)
        

## Make a f table using dictionary 

In [414]:
img = [99,2,3,3,44,1,2,3,44,2]

In [415]:
frequency = []
# e.g. [3244,7,0010011011]
# Use 3 slots, the first slots indicates the Encoding number
# 2nd slot indicates frequency or total frequency
# 3rd slot is the codewords

for e in img:
    added_flag = 0
    
    for node in frequency:
        if node.char == e:
            node.value += 1
            added_flag = 1
            break
        
    if added_flag == 0:
        frequency.append(Node(e,1))
            

In [416]:
for e in frequency:
    e.display_node()

-------------------
char: 99 value: 1 codeword: None
-------------------
char: 2 value: 3 codeword: None
-------------------
char: 3 value: 3 codeword: None
-------------------
char: 44 value: 2 codeword: None
-------------------
char: 1 value: 1 codeword: None


# Build Huffman Tree

## Create a MinHeap for use in huffman tree building

In [417]:
import numpy as np
import math
# [-1,n] is the sum node
class huffmanTree:
    def __init__(self,size):
        self.size = size
        self.tree = np.ndarray(shape = (size,) , dtype = Node)
        self.end  = 1
    def insert_node(self,node):
        # Now insert an element of [A,F,C]
        # Insertion requires the bubble up approach
        # To compare, find the value of the parent.
        
        # First insert the element to the last position of array
        currentNodeIndex = self.end
        if self.end <= self.size:
            if node is not None:
                self.tree[self.end] = node
                self.end += 1

        # Bubble up, compare with the parents until, 1. it meets the root, 2. It is smaller than the parent.
        while currentNodeIndex != 1:
            # Compare until meets root.
            currentNode      = self.tree[currentNodeIndex]
            
            parentNode,parentIdx  = self.giveParent(currentNodeIndex)
        
            # Compare with the parents
            if currentNode.value <= parentNode.value:
                # If smaller than parent swap with parent
                tmp = currentNode
                self.tree[currentNodeIndex] = parentNode
                currentNodeIndex = int(currentNodeIndex/2)
                self.tree[currentNodeIndex] = tmp
            else:
                # Unable to compare anymore it finds its position.
                break
        
    def delete_node(self):
        # Drop down approach
        # First determine if the tree is empty or not
        if self.end == 1:
            assert("Tree is empty, cannot delete")
            return None
        else:
            rootnode      = self.tree[1]

            # Put the root to the end of the tree
            self.tree[self.end] = rootnode
            self.end -= 1
            
            # Get the last element of tree and drop it down from the root.
            lastElement = self.tree[self.end]
            self.tree[1]     = lastElement
            currentNodeIndex = 1
                        
            # Keep comparing with children, until 1. it reaches leaf node 2. it is larger than both of its children.
            while self.isLeafNode(currentNodeIndex) is False:
                # print("Comparing")
                # Keep on comparing with children. Swap with the larger children.
                
                currentNodeValue = math.inf
                leftChildNodeValue = math.inf
                rightChildNodeValue = math.inf
                
                currentNode  = self.tree[currentNodeIndex]
                
                leftChildNode , leftChildIndex   = self.giveLeftChild(currentNodeIndex)
                rightChildNode ,rightChildIndex  = self.giveRightChild(currentNodeIndex)
                
                if currentNode is not None:
                    currentNodeValue = currentNode.value
                if leftChildNode is not None:
                    leftChildNodeValue = leftChildNode.value
                if rightChildNode is not None:
                    rightChildNodeValue = rightChildNode.value
                
                if  self.isLeafNode(currentNodeIndex):
                    # Reaches the leaf node
                    break
                elif currentNodeValue <= leftChildNodeValue and currentNodeValue <= rightChildNodeValue:
                    # It found its position once it is smaller than both of its children
                    break
                else:
                    #Compare and swap
                    if leftChildNodeValue <= rightChildNodeValue:
                        # print("Swap left")
                        # Swap with leftChild
                        tmp = leftChildNode
                        self.tree[leftChildIndex]  = currentNode
                        self.tree[currentNodeIndex] = tmp
                        currentNodeIndex = leftChildIndex
                    else:
                        # print("Swap right")
                        # Swap with RightChild
                        tmp = rightChildNode
                        self.tree[rightChildIndex]  = currentNode
                        self.tree[currentNodeIndex] = tmp
                        currentNodeIndex = rightChildIndex
                
            return rootnode
                    
    def display_tree(self):
        if self.end == 1:
            print("Tree is empty")
        else:
            for i in range(1,self.end):
                self.tree[i].display_node()
           
    def giveLeftChild(self,idx):
        # print("In left child")
        # Input:  idx in tree
        # Output: left child and its index
        if idx is None:
            return None,None
        else:
            leftchildIndex = 2*idx  
            if leftchildIndex < self.size:
                leftchildNode = self.tree[leftchildIndex]
                if  leftchildNode == None or leftchildIndex > self.end:
                    return None,None
                else:
                    return leftchildNode,leftchildIndex

            
        
    def giveRightChild(self,idx):
        # Input idx
        # Give rightchild node & its index
        if idx is None:
            return None,None
        else: 
            rightchildIndex = 2*idx+1
            if rightchildIndex < self.size:
                rightchildNode = self.tree[rightchildIndex]
                if  rightchildNode == None or rightchildIndex > self.end:
                    return None,None
                else:
                    return rightchildNode,rightchildIndex
    def giveParent(self,node):
        # Return the parentNode & its index
        parentIndex = int(node/2)
        parentNode = self.tree[parentIndex]
        
        return parentNode,parentIndex

    def isLeafNode(self,idx):
        leftchildNode  , leftchildIndex= self.giveLeftChild(idx)
        rightchildNode , rightchildIndex= self.giveRightChild(idx)

        return leftchildIndex is None and rightchildIndex is None

        
    

In [418]:
MAX_SIZE = 30
tree = huffmanTree(MAX_SIZE)

In [419]:
tree.display_tree()

Tree is empty


## Insert all the element in frequency table

In [420]:

for e in frequency:
    tree.insert_node(e)

In [421]:
tree.display_tree()


-------------------
char: 1 value: 1 codeword: None
-------------------
char: 99 value: 1 codeword: None
-------------------
char: 3 value: 3 codeword: None
-------------------
char: 2 value: 3 codeword: None
-------------------
char: 44 value: 2 codeword: None


In [422]:
# Test left right child
print(tree.giveLeftChild(3))
print(tree.giveRightChild(3))
print(tree.giveParent(3))
print(tree.isLeafNode(3))

print(tree.giveLeftChild(2))
print(tree.giveRightChild(2))
print(tree.giveParent(2))

tree.display_tree()

print(tree.giveLeftChild(4))
print(tree.giveRightChild(4))
print(tree.isLeafNode(4))

(None, None)
(None, None)
(<__main__.Node object at 0x000001700823EBB0>, 1)
True
(<__main__.Node object at 0x000001700823E820>, 4)
(<__main__.Node object at 0x000001700823E340>, 5)
(<__main__.Node object at 0x000001700823EBB0>, 1)
-------------------
char: 1 value: 1 codeword: None
-------------------
char: 99 value: 1 codeword: None
-------------------
char: 3 value: 3 codeword: None
-------------------
char: 2 value: 3 codeword: None
-------------------
char: 44 value: 2 codeword: None
(None, None)
(None, None)
True


In [423]:
tree.display_tree()

-------------------
char: 1 value: 1 codeword: None
-------------------
char: 99 value: 1 codeword: None
-------------------
char: 3 value: 3 codeword: None
-------------------
char: 2 value: 3 codeword: None
-------------------
char: 44 value: 2 codeword: None


### Random testing

In [424]:
import random

random.seed(42)

NUM_ELEMENT = 200
MAX_SIZE    = 1000

tree_test = huffmanTree(MAX_SIZE)

for _ in range(NUM_ELEMENT):
    random_number1 = random.randint(1, 300)
    random_number2 = random.randint(1 ,300)
    node = Node(random_number1,random_number2)
    tree_test.insert_node(node)  #2


In [425]:
tree_test.display_tree()

-------------------
char: 288 value: 5 codeword: None
-------------------
char: 251 value: 10 codeword: None
-------------------
char: 212 value: 13 codeword: None
-------------------
char: 118 value: 17 codeword: None
-------------------
char: 58 value: 13 codeword: None
-------------------
char: 17 value: 16 codeword: None
-------------------
char: 206 value: 31 codeword: None
-------------------
char: 161 value: 30 codeword: None
-------------------
char: 167 value: 29 codeword: None
-------------------
char: 136 value: 23 codeword: None
-------------------
char: 66 value: 22 codeword: None
-------------------
char: 35 value: 17 codeword: None
-------------------
char: 240 value: 28 codeword: None
-------------------
char: 282 value: 51 codeword: None
-------------------
char: 217 value: 33 codeword: None
-------------------
char: 278 value: 32 codeword: None
-------------------
char: 36 value: 35 codeword: None
-------------------
char: 21 value: 42 codeword: None
-----------------

In [426]:
for _ in range(tree_test.end):
    tree_test.delete_node()

In [427]:
tree_test.display_tree()

Tree is empty


### Huffman Coding
0. Modify the min heap to handle the data structure of newly defined node.
1. Make a frequency table of the read in pattern
2. Build a huffman tree, huffman tree is NOT a min heap, however; it uses a max heap to generate the maximum node for constructing the huffman tree.
3. Build out a huffman tree using linked representations is a much much more easier task, even in HW, since we dont need to do the deletion of the tree anymore after constructing it.
4. After constructing the huffman tree, start assigning codeword 0/1 onto the branches and final leaf nodes.

### Design
1. Please first predefine the nodes and beware of the boundary condition before coding.
2. Make out 3 examples, 1 base case, 1 extreme case and 1 general case.

### Inserted all nodes into the heap

In [428]:
def dfs_traversal(x):
    if x == False:
        return False
    
    x.display_node()
    
    if x.right is not False:
        dfs_traversal(x.right)
    if x.left is not False:
        dfs_traversal(x.left)
    


In [429]:
while tree.end != 2:
    print("-----------------")
    print(tree.end)
    tree.display_tree()
    node1 = tree.delete_node()
    node2 = tree.delete_node()
    sumNode = Node(-1,node1.value+node2.value)
    sumNode.right = node1
    sumNode.left = node2
    print("-----------DFS Traversal--------------")
    tree.insert_node(sumNode)

huffmanTreeRoot = tree.delete_node()    
print("huffmanTreeRoot")
huffmanTreeRoot.display_node()

-----------------
6
-------------------
char: 1 value: 1 codeword: None
-------------------
char: 99 value: 1 codeword: None
-------------------
char: 3 value: 3 codeword: None
-------------------
char: 2 value: 3 codeword: None
-------------------
char: 44 value: 2 codeword: None
-----------DFS Traversal--------------
-----------------
5
-------------------
char: -1 value: 2 codeword: None
-------------------
char: 44 value: 2 codeword: None
-------------------
char: 3 value: 3 codeword: None
-------------------
char: 2 value: 3 codeword: None
-----------DFS Traversal--------------
-----------------
4
-------------------
char: 3 value: 3 codeword: None
-------------------
char: 2 value: 3 codeword: None
-------------------
char: -1 value: 4 codeword: None
-----------DFS Traversal--------------
-----------------
3
-------------------
char: -1 value: 4 codeword: None
-------------------
char: -1 value: 6 codeword: None
-----------DFS Traversal--------------
huffmanTreeRoot
-------------

## Traverse tree using dfs

In [430]:
dfs_traversal(huffmanTreeRoot)

-------------------
char: -1 value: 10 codeword: None
-------------------
char: -1 value: 4 codeword: None
-------------------
char: -1 value: 2 codeword: None
-------------------
char: 1 value: 1 codeword: None
-------------------
char: 99 value: 1 codeword: None
-------------------
char: 44 value: 2 codeword: None
-------------------
char: -1 value: 6 codeword: None
-------------------
char: 3 value: 3 codeword: None
-------------------
char: 2 value: 3 codeword: None


## Assign CodeWord

In [431]:
def dfs_assign_codeWord(x,codeWord,leaf):
    if x == None:
        return False
    
    x.codeword = codeWord
    x.display_node()
    
    if x.right is False and x.left is False:
        if x.char not in leaf:
            leaf[x.char] = codeWord
        
    if x.right is not False:
        codeWord = x.codeword + '1' 
        dfs_assign_codeWord(x.right,codeWord,leaf)
    if x.left is not False:
        codeWord = x.codeword + '0'
        dfs_assign_codeWord(x.left,codeWord,leaf)

In [432]:
leafnodes = {}
dfs_assign_codeWord(huffmanTreeRoot,codeWord='',leaf = leafnodes)
print("------------------------------------------")
print("Leafs nodes and their codewords")
print(leafnodes)


-------------------
char: -1 value: 10 codeword: 
-------------------
char: -1 value: 4 codeword: 1
-------------------
char: -1 value: 2 codeword: 11
-------------------
char: 1 value: 1 codeword: 111
-------------------
char: 99 value: 1 codeword: 110
-------------------
char: 44 value: 2 codeword: 10
-------------------
char: -1 value: 6 codeword: 0
-------------------
char: 3 value: 3 codeword: 01
-------------------
char: 2 value: 3 codeword: 00
------------------------------------------
Leafs nodes and their codewords
{1: '111', 99: '110', 44: '10', 3: '01', 2: '00'}


### Encode the message

In [433]:
encoded_img = []
for char in img:
    codeword = leafnodes[char]
    encoded_img.append(codeword)

print(encoded_img)

['110', '00', '01', '01', '10', '111', '00', '01', '10', '00']
