In [3]:
import os
import heapq

class binarytreenode:
    def __init__(self,value,freq):
        self.value=value
        self.freq=freq
        self.left=None
        self.right=None
     
    #overload these functions
    def __lt__(self,other):
        return self.freq<other.freq
    
    def __eq__(self,other):
        return self.freq==other.freq

class Huffmancoding:
    
    def __init__(self,path):
        self.path=path
        self.__heap=[]
        self.__codes={}
        
    def __buildheap(self,freq_dict):
        for key in freq_dict:
            frequency=freq_dict[key] 
            binary_tree_node=binarytreenode(key,frequency)
            heapq.heappush(self.__heap,binary_tree_node)
    
    def __make_frequency_dictionary(self,text):
        freq_dict={}
        for char in text:
            if char not in freq_dict:
                freq_dict[char]=1
            freq_dict[char]+=1
        return freq_dict
            
    def __buildtree(self):
        while (len(self.__heap)>1):
            binary_tree_node_1=heapq.heappop(self.__heap)
            binary_tree_node_2=heapq.heappop(self.__heap)
            freq_sum= binary_tree_node_1.freq+ binary_tree_node_2.freq
            newnode= binarytreenode(None,freq_sum)
            newnode.left=binary_tree_node_1
            newnode.right=binary_tree_node_2
            heapq.heappush(self.__heap,newnode)
            
        return
    
    def __buildcodehelper(self,root,curr_bits):
        
        if root is None:
            return 
        
        if root.value is not None:
            self.__codes[root.value]= curr_bits
            return
        #correction no self
        self.__buildcodehelper(root.left,curr_bits+"0")
        self.__buildcodehelper(root.right,curr_bits+"1")
        
        
        
    
    def __buildcodes(self):
        root=heapq.heappop(self.__heap)
        self.__buildcodehelper(root,"")
        
    def __getencodedtext(self,text):
        encodedtext=""
        for char in text:
            encodedtext+=self.__codes[char]
        return encodedtext
    
    def __getpaddedencodedtext(self,encodedtext):
        
        padded_amount= 8-(len(encodedtext)%8)
        padded_encoded_text=""
        for i in range (padded_amount):
            encodedtext+="0"
        #correction colon outside bracket    
        padded_info = "{0:08b}".format(padded_amount)
        padded_encoded_text= padded_info + encodedtext
        return padded_encoded_text
    
    def __getsbytesarray(self,padded_encoded_text):
        array=[]
        
        for i in range(0,len(padded_encoded_text),8):
            byte = padded_encoded_text[i:i+8]
            array.append(int(byte,2))
            
        return array
        
    def compress(self):
        #get file from the path
        #read text from the file
        filename,fileextension = os.path.splitext(self.path)
        outputpath = filename + ".bin"
        
        with open(self.path,"r+") as file , open(outputpath,"wb") as output:
            
        
#         text = "uudbvsnurhaddasadsccadaxsfbweuiofohirvbi"
        
            #make frequency dictionary using the texts
            text = file.read()
            text = text.rstrip()
            freq_dict= self.__make_frequency_dictionary(text)

            #construct the heap from the frequency_dictionary
            self.__buildheap(freq_dict)
            #construct the binary tree from the heap
            self.__buildtree()
            #construct the code from binary tree
            self.__buildcodes()
            #creating the encoded text using the codes
            encodedtext= self.__getencodedtext(text)
            #put this encoded text into the binary file 
            padded_encoded_text= self.__getpaddedencodedtext(encodedtext)

            bytes_array= self.__getsbytesarray(padded_encoded_text)
            #return this binary file as output
            finalbytes= bytes(bytes_array)
            output.write(finalbytes)
        print("compressed")
        return outputpath


path = r"C:\Users\ssart\Downloads\samplez.txt"
h = Huffmancoding(path)
outputpath=h.compress()

compressed
