In [1]:
import os
import heapq

class BinaryTree :
    
    def __init__(self,value,frequ):
        self.value = value
        self.frequ = frequ
        self.left = None
        self.right = None
        
    def __lt__(self,other):
        return self.frequ < other.frequ
    
    def __eq__(self,other):
        return self.frequ == other.frequ
    


class HuffmanCode :
    
    def __init__(self,path):
        
        self.path = path 
        
        self._heap = []
        self._code = {}
        
        
    
    def _frequency_from_text(self,text):
        
        frequ_dict = {}
        for char in text:
            if char not in frequ_dict:
                frequ_dict[char] = 0
            frequ_dict[char] +=1
        return frequ_dict
    
    def _Build_heap(self,frequ_dict):
        
        for key in frequ_dict:
            frequency = frequ_dict[key]
            binary_tree_node = BinaryTree(key,frequency)
            heapq.heappush(self._heap , binary_tree_node)
    
    def _Build_Binary_Tree(self):
        while len(self._heap)>1:
            binary_tree_node_1 = heapq.heappop(self._heap)
            binary_tree_node_2 = heapq.heappop(self._heap)
            sum_of_freq = binary_tree_node_1.frequ + binary_tree_node_2.frequ
            newnode = BinaryTree(None,sum_of_freq)
            newnode.left = binary_tree_node_1
            newnode.right = binary_tree_node_2
            heapq.heappush(self._heap,newnode)
        return    
    
    def _Build_Tree_Code_Helper(self,root,curr_bits):
        
        if root is None:
            return
        if root.value is not None:
            self._code[root.value]=curr_bits
            return
        self._Build_Tree_Code_Helper(root.left,curr_bits+'0')
        self._Build_Tree_Code_Helper(root.right,curr_bits+'1')
        
    def _Build_Tree_Code(self):
        root = heapq.heappop(self._heap)
        self._Build_Tree_Code_Helper(root,'')
    
    def _Build_Encoded_Text(self,text):
        
        encoded_text=''
        for char in text:
            encoded_text += self._code[char]
        return encoded_text    
    
    def _Build_Padded_text(self,encoded_text):
        
        padding_value =8-len(encoded_text)%8
        for i in range(padding_value):
            encoded_text +='0'
            
        padded_info = "{0:08b}".format(padding_value) 
        padded_text = padded_info+encoded_text
        return padded_text
    
    def _Build_Bite_Array(self,padded_text):
        array=[]
        for i in range(0,len(padded_text),8):
            byte = padded_text[i:i+8]
            array.append(int(byte,2))
                
        return array
    
    
    def compression(self):
        
        print("compression starts")
        filename,file_extension = os.path.splitext(self.path)
        output_path = filename + '.bin'
        with open(self.path,'r+') as file , open(output_path,'wb') as output:
            text = file.read()
            text = text.rstrip()
            frequency_dict = self._frequency_from_text(text)
            build_heap = self._Build_heap(frequency_dict)
            self._Build_Binary_Tree()
            self._Build_Tree_Code()
            encoded_text = self._Build_Encoded_Text(text)
            padded_text = self._Build_Padded_text(encoded_text)
            bytes_array = self._Build_Bite_Array(padded_text)
            final_bytes = bytes(bytes_array)
            output.write(final_bytes)
        print("compression SUCESSFULL")
        return output_path

    

path = 'sample.txt'    
h = HuffmanCode(path)
h.compression()

compression starts
comression SUCESSFULL


'sample.bin'