In [None]:
# rahul = "{0:08b}".format(6)
# final_bytes = bytes(array)

In [13]:
import heapq
import os
class BinaryTreeNode:
    
    def __init__(self,value,freq):
        self.value = value
        self.freq = freq
        self.left = None
        self.right = None
    
    def __lt__(self,other):
        
        return self.freq < other.freq
    
    def __eq__(self,other):
        
        return self.freq == other.freq
    
    
    pass
class HuffmanCoding:
    
    def __init__(self,path):
        self.path = path
        self.__heap = list()
        self.__codes = dict()
        
    def __make_freq_dict(self,text):
        
        freq = dict()
        for char in text:
            freq[char] = freq.get(char,0) + 1
            
        return freq
        
    def __buildheap(self,freq_dict):
        
        for key in freq_dict:
            frequency = freq_dict[key]
            binary_tree_node = BinaryTreeNode(key,frequency)
            heapq.heappush(self.__heap,binary_tree_node)
        
    def __buildtree(self):
        
        while len(self.__heap) > 1:
            binary_tree_node_1 = heapq.heappop(self.__heap)
            binary_tree_node_2 = heapq.heappop(self.__heap)
            frequency_sum = binary_tree_node_1.freq + binary_tree_node_2.freq
            newNode = BinaryTreeNode(None,frequency_sum)
            newNode.left = binary_tree_node_1
            newNode.right = binary_tree_node_2
            heapq.heappush(self.__heap,newNode)
        return
    
    
    def __buildCodesHelper(self,root,curr_bits):
        
        if root is None:
            return
        
        if root.value is not None:
            self.__codes[root.value] = curr_bits
            return
        self.__buildCodesHelper(root.left,curr_bits+'0')
        self.__buildCodesHelper(root.right,curr_bits+'1')
        
    
    def __buildCodes(self):
        
        root = heapq.heappop(self.__heap)
        self.__buildCodesHelper(root,'')
        
    def __getencodedtext(self,text):
        
        encodedtext = ''
        for char in text:
            encodedtext += self.__codes[char]
            
        return encodedtext
    
    def __getPaddedEncodedText(self,encoded_text):
        
        padded_amount = 8 - (len(encoded_text)%8)
        
        for i in range(padded_amount):
            encoded_text += '0'
            
        padded_info = "{0:08b}".format(padded_amount)
        
        padded_encoded_text = padded_info + encoded_text
        
        return padded_encoded_text
    
    def __getBytesArray(self,padded_encoded_text):
        
        array = list()
        for i in range(0,len(padded_encoded_text),8):
            byte = padded_encoded_text[i:i+8]
            array.append(int(byte,2))
            
        return array
            
    
    def compress(self):
        # get file from the path
        # get text from the file
        file_name,file_extension = os.path.splitext(self.path)
        # os.path.splittext(self.path would split the path string via the "." store the former part in filename and latter
#         in file extension)
        output_path = file_name + '.bin'
        
        with open(self.path,'r+') as file , open(output_path,'wb') as output:
            # r+ means read, wb means write in binary
            text = file.read()
            text = text.rstrip()

            # create a frequency table from the text
            freq_dict = self.__make_freq_dict(text)

            # build a min heap from the frequency table
            self.__buildheap(freq_dict)

            # create a binary tree from heap
            self.__buildtree()
            # create codes for each character of the text

            self.__buildCodes()

            # encode the text using above codes
            encoded_text = self.__getencodedtext(text)
            # put this encoded text into binary file

            # pad this encoded text
            padded_encoded_text = self.__getPaddedEncodedText(encoded_text)

            bytes_array = self.__getBytesArray(padded_encoded_text)

            # return this binary file as output

            final_bytes = bytes(bytes_array)
            output.write(final_bytes)
            
        print('Compressed')


        return output_path

        

        


In [14]:
path = r'C:\Users\Gehna Ohlan\Desktop\coding_ninja\data structures in python\_20 huffman coding\huffman.txt'
h = HuffmanCoding(path)
output_path = h.compress()


Compressed


In [3]:
rahul = "{0:08b}".format(5)
print(rahul)

00000101
