In [1]:
import heapq as hq

class Node:
    """
        To represent a node with its value, pointer to left child and pointer to right child
    """
    def __init__(self, value):
        self.value = value
        self.left = None
        self.right = None
        
    def __str__(self):
        return f"{self.value}"


class Huffman:
    """
        A data structure of binary prex-free encoding tree for a given set of character by their frequencies.
        The higher frequencies, the closer to the root the character is.
    """
    def __init__(self, data, node_count):
        self.data = data     #holder of data to be processed
        self.merge_count = {i: 0 for i in range(1, node_count + 1)}    #keep track of merge count of each node
        self.codes = {i:"" for i in range(1, node_count + 1)}    #keep track of the Huffman codes of each node
        
        
    def merge(self):
        """
            Pop two elements from heap, merge these two nodes, meaning new weight is the total weights and add two nodes to 
            one list in decending order of weight, and create a new combining nodes in both subtrees. 
            Then push the new weigth, new list of nodes, and new tree back to heap.
            Also updates the merge_count of each node that getting merged
        """
        
        # Pop from the heap
        weight1, node_list1, node1 = hq.heappop(self.data) 
        weight2, node_list2, node2 = hq.heappop(self.data)
        
        # initiate the parent node for merge two node branches
        new_node = Node("parent")
        
        # also add 1 to merge_count of each node in node1 and node2
        for node in node_list1:
            self.merge_count[node] += 1
        for node in node_list2:
            self.merge_count[node] += 1  
        
        if len(node_list1) ==1 and len(node_list2) != 1:
            small_node = node_list2
            large_node = node_list1
            # create a new tree with subtree of smaller weight on the right, and subtree of larger weight on the left
            new_node.left = node1
            new_node.right = node2
        else:
            small_node = node_list1
            large_node = node_list2
            # create a new tree with subtree of smaller weight on the right, and subtree of larger weight on the left
            new_node.left = node2
            new_node.right = node1
        
        # combine weights and node list, then push new weight, new node list, and new tree back to heap
        new_weight = weight1 + weight2
        new_node_list = large_node + small_node
        hq.heappush(self.data, (new_weight, new_node_list, new_node))
        
        # add a bit in the front of correspond codes of each node that just got merged, to come up with the final Huffman code
        for node in small_node:
            self.codes[node] = '1' + self.codes[node]
        for node in large_node:
            self.codes[node] = '0' + self.codes[node]
         
        
    def create_tree(self):
        """
            Returns the Binary Tree of Huffman Codes; Return error message if not all the nodes have been merged
        """
        if len(self.data) != 1:
            print("Something went wrong! Not ready for creating tree yet")
            
        else:
            self.tree = self.data[0][2]
                
                
    def huffman_algorithm(self):
        """
            The complete Huffman's Algorithm
        """
        # merge nodes till only two group of nodes left
        while len(self.data) > 1:
            self.merge()
        
        self.create_tree()
        
        
    def max_len(self):
        # returns maximum length of a encoded character
        return max(self.merge_count.values())
    
    
    def min_len(self):
        # returns minimum length of a encoded character
        return min(self.merge_count.values())
        
    
    def decode(self, string):
        """
            To decode a string provided. If the char is 0, go to left child; if the char is 1, go to right child.
            Once found a value, add to result and restart the search from the root of the tree.
        """
        result = ""
        temp = self.get_tree()
        for bit in string:
            if bit == '0':
                temp = temp.left
                if temp.value != "parent":
                    result += str(temp.value)
                    temp = self.tree
            elif bit == '1':
                temp = temp.right
                if temp.value != "parent":
                    result += str(temp.value)
                    temp = self.tree
            else:
                return "The string you entered must only have 1 or 0!!"

        return result
    
    def get_tree(self):
        """
            Return the Huffman Codes binary tree
        """
        return self.data[0][2]
         
    
def load(filename):
    """
        To load data to a heap with tuple (key, [node(s)], tree) inside
        Returns the heap
    """
    # initiate a empty heap
    data = []
    
    with open(filename) as file:
        f = file.readlines()
        
        # extract the first line to be node_count
        node_count= int(f[0].strip())
        
        # read the rest of lines to heap as (line value, line number, tree), which is (weight, node, Node(i))
        for i in range(1, len(f)):
            hq.heappush(data, (int(f[i].strip()), [i], Node(i)))
        
    return node_count, data
        
        
        
if __name__ == "__main__":
    node_count, data = load("test.txt")
    print("The original heap of data is: \n", data, "\n")
    
    huffman = Huffman(data, node_count)
    huffman.huffman_algorithm()
    print("The maximum length of a codeword is: ", huffman.max_len())
    print("The minimum length of a codeword is: ", huffman.min_len())
    print()
    
    print("The Huffman Codes for all words are: \n:", huffman.codes)
    print()
    
    string = '000110100010011001110'
    print("The Huffman codes is: ", string)
    print("The decoded string is: ", huffman.decode(string))
    

        

The original heap of data is: 
 [(121, [2], <__main__.Node object at 0x0000018BA9A13910>), (144, [9], <__main__.Node object at 0x0000018BA9AD4310>), (153, [7], <__main__.Node object at 0x0000018BA9AD4250>), (378, [5], <__main__.Node object at 0x0000018BA9AAED60>), (589, [11], <__main__.Node object at 0x0000018BA9AD43D0>), (301, [12], <__main__.Node object at 0x0000018BA9AD4430>), (188, [3], <__main__.Node object at 0x0000018BA9AAE910>), (953, [4], <__main__.Node object at 0x0000018BA9AAEEE0>), (579, [8], <__main__.Node object at 0x0000018BA9AD42B0>), (895, [1], <__main__.Node object at 0x0000018BA9A9E820>), (727, [10], <__main__.Node object at 0x0000018BA9AD4370>), (849, [6], <__main__.Node object at 0x0000018BA9AAE070>), (442, [13], <__main__.Node object at 0x0000018BA9AD4490>), (327, [14], <__main__.Node object at 0x0000018BA9AD44F0>), (930, [15], <__main__.Node object at 0x0000018BA9AD4550>)] 

The maximum length of a codeword is:  6
The minimum length of a codeword is:  3

The Huff

Question 1 & 2

In this programming problem and the next you'll code up the greedy algorithm from the lectures on Huffman coding using huffman.txt

This file describes an instance of the problem. It has the following format:

[number_of_symbols]

[weight of symbol #1]

[weight of symbol #2]

...

For example, the third line of the file is "6852892," indicating that the weight of the second symbol of the alphabet is 6852892.  (We're using weights instead of frequencies, like in the "A More Complex Example" video.)

Your task in this problem is to run the Huffman coding algorithm from lecture on this data set. What is the maximum length of a codeword in the resulting Huffman code?

In [2]:
import time
    
start = time.time()

node_count, data = load("huffman.txt")
huffman = Huffman(data, node_count)
huffman.huffman_algorithm()    
print("The maximum length of a codeword is: ", huffman.max_len())
print("The minimum length of a codeword is: ", huffman.min_len())

end = time.time()
print(f"The run time of Huffman's Algorithm is {end-start} second(s). ")
    

The maximum length of a codeword is:  19
The minimum length of a codeword is:  9
The run time of Huffman's Algorithm is 0.007946968078613281 second(s). 


Question 3

In this programming problem you'll code up the dynamic programming algorithm for computing a maximum-weight independent set of a path graph using mwis.txt

This file describes the weights of the vertices in a path graph (with the weights listed in the order in which vertices appear in the path). It has the following format:

[number_of_vertices]

[weight of first vertex]

[weight of second vertex]

...

For example, the third line of the file is "6395702," indicating that the weight of the second vertex of the graph is 6395702. 

Your task in this problem is to run the dynamic programming algorithm (and the reconstruction procedure) from lecture on this data set.  The question is: of the vertices 1, 2, 3, 4, 17, 117, 517, and 997, which ones belong to the maximum-weight independent set?  (By "vertex 1" we mean the first vertex of the graph---there is no vertex 0.)   In the box below, enter a 8-bit string, where the ith bit should be 1 if the ith of these 8 vertices is in the maximum-weight independent set, and 0 otherwise. For example, if you think that the vertices 1, 4, 17, and 517 are in the maximum-weight independent set and the other four vertices are not, then you should enter the string 10011010 in the box below.



In [3]:
def load1(filename):
    """
        To load data to a list where index is vertex number, and value is the weight of the vertex
        First element of the list is the count of vertices
        Returns the list
    """
    # initiate a empty list
    data = []
    
    with open(filename) as file:
        for item in file:
            data.append(int(item.strip()))
        
    return data


def maximum_weight(lis):
    """
        Find the maximum weight of the max-weight independent set in a given lis, where index is vertex number,
        and value is the weight of the vertex, and first elment of list is vertex count.
        
        Dynamic programming: A[i] = max{ A[i-1] , A[i-2] + weight of i }
                meaning when visit each vertex, only two possible cases:
                    Case 1 - max-wt IS of G(i-1) is still the max weight
                    Case 2 - max-wt IS of G(i-2) + vertex i is the new max weight
        
        Returns the maximum weight list, and the maximum weight
    """
    max_value = [0, lis[1]]
    for i in range(2, len(lis)):
        max_value.append(max(max_value[i-1], max_value[i-2] + lis[i]))
    return max_value, max_value[-1]


def max_wis(lis, max_list):
    """
        Find the vertices of the max-weight independent set based on maximum value list from above function
        and the original data lis, where index is vertex number, and value is the weight of the vertex, 
        and first elment of list is vertex count.
        Returns the vertices in a list in backward orders.
    """
    vertices = []
    i = len(lis) - 1
    while i >= 1:
        if max_list[i-1] >= max_list[i-2] + lis[i]:
            i -= 1
        else:
            vertices.append(i)
            i -= 2
    return vertices
        
    
if __name__ == "__main__":
    import time
    start = time.time()
    
    data = load1("mwis.txt")
    max_list, max_value = maximum_weight(data)
    print("The maximum total weight of a max-weight IS of the data set is: \n", max_value, "\n")
    max_weight_is = max_wis(data, max_list)
    print("The vertices in the max-weight IS of the data set is: \n", max_weight_is, "\n")
    
    end = time.time()
    print(f"The run time of WIS in Path Graphs Algorithm using Dynamic Programming is: {end-start} seconds. \n")

    print(f"Vertex {1} is in maximum-weight independent set?  {1 in max_weight_is}")
    print(f"Vertex {2} is in maximum-weight independent set?  {2 in max_weight_is}")
    print(f"Vertex {3} is in maximum-weight independent set?  {3 in max_weight_is}")
    print(f"Vertex {4} is in maximum-weight independent set?  {4 in max_weight_is}")
    print(f"Vertex {17} is in maximum-weight independent set?  {17 in max_weight_is}")
    print(f"Vertex {117} is in maximum-weight independent set?  {117 in max_weight_is}")
    print(f"Vertex {517} is in maximum-weight independent set?  {517 in max_weight_is}")
    print(f"Vertex {997} is in maximum-weight independent set?  {997 in max_weight_is}")

The maximum total weight of a max-weight IS of the data set is: 
 2955353732 

The vertices in the max-weight IS of the data set is: 
 [1000, 998, 995, 993, 991, 989, 987, 985, 983, 981, 979, 977, 975, 973, 971, 969, 966, 964, 962, 960, 958, 956, 954, 952, 950, 948, 946, 944, 942, 940, 938, 936, 934, 932, 929, 927, 924, 921, 919, 917, 915, 913, 911, 909, 907, 905, 903, 900, 898, 896, 894, 892, 890, 888, 886, 884, 882, 880, 878, 876, 873, 871, 868, 866, 864, 861, 859, 857, 855, 853, 851, 849, 846, 843, 841, 839, 837, 834, 832, 829, 827, 825, 823, 821, 819, 816, 814, 812, 809, 806, 803, 801, 799, 797, 795, 793, 790, 788, 786, 784, 781, 778, 776, 774, 772, 770, 768, 766, 763, 760, 757, 755, 752, 750, 748, 745, 743, 741, 739, 737, 735, 733, 730, 728, 726, 723, 721, 719, 717, 715, 713, 710, 708, 706, 704, 702, 699, 697, 695, 692, 690, 688, 686, 684, 682, 679, 676, 673, 671, 669, 666, 664, 662, 660, 658, 656, 654, 652, 649, 647, 645, 643, 641, 639, 637, 634, 632, 630, 628, 625, 623, 621, 619