In [25]:
import heapq
import os
class BinaryTree:
    def __init__(self,value,frequ):
        self.value = value
        self.frequ = frequ
        self.left = None
        self.right = None
        
    def __lt__(self, other):
        return self.frequ < other.frequ
    
    def __eq__(self, other):
        return self.freq == other.frequ
    
class Huffmancode:
    def __init__(self,path):
        self.path = path
        self.heap = []
        self.code = {}
        self.reversecode = {}
        
    def frequency(self, text):
        frequ_dict = {}
        for char in text:
            if char not in frequ_dict:
                frequ_dict[char] = 0
            frequ_dict[char] += 1
        return frequ_dict
    
    def Build_Heap(self,frequency_dict):
        for key in frequency_dict:
            frequency = frequency_dict[key]
            node = BinaryTree(key, frequency)
            heapq.heappush(self.heap, node)
            
            
    def Huffman_Tree(self):
        while len(self.heap) > 1:
            node1 = heapq.heappop(self.heap)
            node2 = heapq.heappop(self.heap)
            sum_freq = node1.frequ + node2.frequ
            newnode = BinaryTree(None, sum_freq)
            newnode.left= node1
            newnode.right = node2
            heapq.heappush(self.heap,newnode)
        return 
    
    
    def Mapping(self,root,bits):
        if root is None:
            return
        if root.value is not None:
            self.code[root.value] = bits
            self.reversecode[bits] = root.value
            return 
        self.Mapping(root.left, bits+'0')
        self.Mapping(root.right, bits+'1')
        
    
    def Help_Mapping(self):
        root = heapq.heappop(self.heap)
        self.Mapping(root, '')
    
    def Encoded(self, text):
        encoded_text = ''
        for char in text:
            encoded_text += self.code[char]
        
        return encoded_text 
    
    def padded(self, encoded_text):
        padding_value = 8 -(len(encoded_text)%8)
        encoded_text += '0'*padding_value
        padding_info = "{0:08b}".format(padding_value)
        padded_encoded_text = padding_info + encoded_text
        return padded_encoded_text
    
    def Byte_Array(self, padded_text):
        array = []
        for i in range(0, len(padded_text), 8):
            byte = padded_text[i:i+8]
            array.append(int(byte,2))
        return array
    
    
    def compression(self):
        filename, file_extension = os.path.splitext(self.path)
        output_path = filename + '.bin'
        with open (self.path, 'r+') as file, open(output_path,'wb') as output:
            text = file.read()
            text = text.rstrip()
            
            frequency_dict = self.frequency(text)
            #print(frequency_dict)
            #Calculate frequency of each text and store it in freq. dictionary
            
            build_heap = self.Build_heap(frequency_dict)
            
            # Min heap for two minimum frequency.
            
            # Construct binary tree from Heap.
            
            self.Huffman_Tree()
            
            #construct code from binary tree and stored it in dictioary.
            
            self.Build_Tree_Code()
            
            #Construct encoded text.
            
            encoded_text = self.Encoded(text)
            
            # PADDING OF TEXT
            
            padded_text = self.padded(encoded_text)
            
            # We have to return that binary file as an output.
            bytes_array = self.Byte_Array(padded_text)
            
            final_bytes = bytes(bytes_array)
            output.write(final_bytes)
        print("Compressed")
        return output_path
path = input("Enter Path of File")
h = Huffmancode(path)
h.compression()

Enter Path of File hufman.txt


{'a': 224, 's': 1152, 'h': 1088, 'f': 2560, 'g': 352, 'd': 832, '\n': 64, 'v': 32, 'k': 448, 'j': 960, 'e': 160, ' ': 32, 'y': 32, 'q': 448, 'u': 128, 'i': 160, 'n': 96, 'l': 128, 'o': 96, 'z': 32}
Compressed


'hufman.bin'