In [50]:
import heapq
import os
class BinaryTreeNode:
    def __init__(self,value,frequency):
        self.value=value
        self.frequency=frequency
        self.left=None
        self.right=None
    def __lt__(self,other):
        return self.frequency<other.frequency
    def __eq__(self,other):
        return self.frequency==other.frequency
        
class HuffmanCoding:
    
    def __init__(self,path):
        self.path=path
        self.__minHeap=[]
        self.__BinaryCodesDic={}
        self.__reverseBinaryCodesDic={}
        
    def __construct_freqDict(self,text):
        freq_dic={}
        for char in text:
            freq_dic[char]=freq_dic.get(char,0)+1
        return freq_dic
    
    def __buildHeap(self,freq_dic):
        for key in freq_dic:
            binary_tree_node=BinaryTreeNode(key,freq_dic[key])
            heapq.heappush(self.__minHeap,binary_tree_node)
            
    def __buildTree(self):
        while len(self.__minHeap)>1:
            BTnode1=heapq.heappop(self.__minHeap)
            BTnode2=heapq.heappop(self.__minHeap)
            newBTnode=BinaryTreeNode(None,BTnode1.frequency+BTnode2.frequency)
            heapq.heappush(self.__minHeap,newBTnode)
            newBTnode.left=BTnode1
            newBTnode.right=BTnode2
    
    def __buildCodesHelper(self, root, current_code):
        if root is None:
            return
        if root.value is not None:
            self.__BinaryCodesDic[root.value]=current_code
            self.__reverseBinaryCodesDic[current_code]=root.value
            return
        self.__buildCodesHelper(root.left,current_code+'0')
        self.__buildCodesHelper(root.right,current_code+'1')
    
    def __buildCodes(self):
        root=heapq.heappop(self.__minHeap)
        self.__buildCodesHelper(root,'')
    
    def __getEncodedText(self,text):
        encodedText=''
        for char in text:
            encodedText+=self.__BinaryCodesDic[char]
        return encodedText
    
    def __getPaddedEncodedText(self,encodedText):
        padding_amount=8-(len(encodedText)%8)
        for i in range(padding_amount):
            encodedText+='0'
        padded_info=format(padding_amount,'08b')
        padded_encoded_text=padded_info+encodedText
        return padded_encoded_text
            
    def __getBytesArray(self,padded_encoded_text):
        bytes_array=[]
        for i in range(0,len(padded_encoded_text),8):
            byte=padded_encoded_text[i:i+8]
            bytes_array.append(int(byte,2))
        return bytes_array
    
    def compress(self):
        #get file from path
        #Read the text from file
        file_name,file_extension=os.path.splitext(self.path)
        output_path=file_name + '.bin'
        
        with open(self.path,'r+') as file, open(output_path,'wb') as output:
            
            text=file.read()
            text=text.rstrip()
        
            #Construct freq dict of the characters in the text
            freq_dic=self.__construct_freqDict(text)

            #Create minHeap of the freq Dict
            self.__buildHeap(freq_dic)

            #Create Binary tree, till we have one element left in minHeap
            self.__buildTree()
            
            

            #Create dict of Binary Codes corresponding to each character
            self.__buildCodes()
            

            #Create the encoded text
            encodedText=self.__getEncodedText(text)

            #Pad the encoded text (to make our encoded text a multiple of 8) 
            #and also adding info about how much we have padded 
            padded_encoded_text=self.__getPaddedEncodedText(encodedText)

            #To convert the padded_encoded_text into bytes, we first create bytes_array where each 8 bits is converted 
            # to its int value and stored in array
            bytes_array=self.__getBytesArray(padded_encoded_text)

            #Return the encoded text via passing list of intergers to bytes func
            final_compressed_text=bytes(bytes_array)
            output.write(final_compressed_text)
        print('Compressed')
        return output_path
    
    def decompress(self,input_path):
        # os.path.splitext() splits the path on the basis of '.', (filepath.extension)
        file_name,file_extension=os.path.splitext(self.path)
        output_path=file_name+'_decompressed'+'.txt'
        with open(self.path,'rb') as file, open(output_path,'w') as output:
            decoded_string=''
            byte=file.read(1)
            #here our byte if of the form b'T' so using str(byte)[2] gives 'T'
            while byte:
                decoded_string+=str(byte)[2]
                byte=file.read(1)
            
            # writing the decoded string in the output file
            output.write(decoded_string)
            print('Decompressed')
        
        
        
path=r"C:\Users\HP\Downloads\sample3.txt" #put the path of the file to compress it
h=HuffmanCoding(path)
output_path = h.compress()
h.decompress(output_path)

Compressed
Decompressed
