In [39]:
import heapq,os

class tree:
    def __init__(self, val, freq):
        self.val = val
        self.freq = freq
        self.left = None
        self.right = None
        
    def __lt__(self, other):
        return self.freq < other.freq
    
    def __eq__(self, other):
        return self.freq == other.freq
    
        
class Huffmancode:
    
    def __init__(self,filepath):
        self.filepath = filepath
        self.__heap = []
        self.__code = {}
        self.__reverse_code = {}
        
    
    def text_frequency(self,text):
        frequency = {}
        for char in text:
            if char not in frequency:
                frequency[char]  = 0
            frequency[char]+=1
            
        return frequency
    
    def buildHeap(self, freq_dict):
        
        for key in freq_dict:
            freq = freq_dict[key]
            treeNode = tree(key, freq)
            heapq.heappush(self.__heap, treeNode)
            
    def buildTree(self):
        while len(self.__heap) > 1:
            node1 = heapq.heappop(self.__heap)
            node2 = heapq.heappop(self.__heap)
            sum_of_nodefreq = node1.freq + node2.freq
            new_node = tree(None, sum_of_nodefreq)
            new_node.left = node1
            new_node.right = node2
            heapq.heappush(self.__heap, new_node)
        return
    
    def tree_traversal(self, root, string_bits):
        if root is None:
            return
        if root.val is not None:
            self.__code[root.val] = string_bits
            self.__reverse_code[string_bits] = root.val
            
        self.tree_traversal(root.left, string_bits + '0')
        self.tree_traversal(root.right, string_bits + '1')
    
    def build_treecode(self):
        root = heapq.heappop(self.__heap)
        self.tree_traversal(root, '')
        
    def build_encode(self, text):
        encodedtext = ''
        for char in text:
            encodedtext += self.__code[char]
            
        return encodedtext
    def build_padding(self, text):
        valuetopad = 8 - len(text)%8
        for i in range(valuetopad):
            text += '0'
            
        front_padding = "{0:08b}".format(valuetopad)
        padded_text = front_padding + text
        return padded_text
    
    def build_bytesarray(self, padded_text):
        array = []
        for i in range(0, len(padded_text), 8):
            bytes = padded_text[i:i+8]
            array.append(int(bytes,2))
        return array
        
    def compression(self):
        
        
        
        
        #to access the file and get text from the file
        #geeting the file name and extension
        filename,file_extension = os.path.splitext(self.filepath)
        output_path = filename + '.bin'
        
        #openin file in reading mode opening the output path as stored in binary form
        
        with open(self.filepath, 'r') as file, open(output_path, 'wb') as output:
            text = file.read()
            text = text.rstrip()          #removinf extra spaces
            
        
            #         text = 'hjbdcbknwnjndh'
            dist_freq = self.text_frequency(text)    #calculating the frequency of each text

            bulid_heap = self.buildHeap(dist_freq)    # constructing min heap as compare tpo frequency

            self.buildTree()     #building the binary tree from heap

            self.build_treecode()      #constructing code for text from binary tree and storin in disctionaries with value its as key

            encoded_text = self.build_encode(text)
            print()
            print(encoded_text)
            print()
            

            #padding
            padded_text = self.build_padding(encoded_text)
            print()
            print(padded_text)
            print()
            bytes_array = self.build_bytesarray(padded_text)
            print()
            print('bytes_array',bytes_array)#############
            print()
            final_bytes = bytes( bytes_array) 
            output.write(final_bytes)
            
        print('File is compressed succesfully')
        return output_path
    
    def remove_padding(self, text):
        #getting the padding value from starting 8 bits
        padded_info = text[:8]
        padded_value = int(padded_info, 2)
        #removing padding
        text = text[8:]
        text = text[:-1*padded_value]    #negetive slicing or back slicing
        return text
    
    def decode_text(self, text_data):
        current_bits = ''
        decode_text = ''
        for char in text_data:
            current_bits += char
            if current_bits in self.__reverse_code:
                #searching in our reverse coded disctionaries
                decode_text += self.__reverse_code[current_bits]
                current_bits = ''
        return decode_text
        
        
        
    def decompression(self, input_path):
        filename,file_extension = os.path.splitext(input_path)
        #creating output path
        output_path = filename + '_decompressed' + '.txt'
        # reading binary og the file
        with open(input_path, 'rb') as file, open(output_path, 'w') as output:
            bit_string = ''
            byte = file.read(1) #reading file one by one
            while byte:
                #org fun is to convert the compressed file to ineger form
                byte = ord(byte)
                print()
                print("byte",byte)
                print()
                #bin fun is to convert it into binary form
                #rjust function is to convert it in 8 bit format form after slicing
                bits = bin(byte)[2:].rjust(8,'0')
                print()
                print("bits",bits)
                print()
                #storing the output bits in bitstring
                bit_string  += bits
                byte = file.read(1)
                
            print()
            print('bit_string',bit_string)
            print()
                
            text_after_removedpadding = self.remove_padding(bit_string)
            #actual decoded text
            
            print()
            print('text_after_removedpadding',text_after_removedpadding)
            print()
            
            actual_text = self.decode_text(text_after_removedpadding)
            
            print()
            print('actual_text',actual_text)
            print()
            
            output.write(actual_text)
        return output_path
        
        
filepath =  input("Please put here the path of file that you want to compress")
h = Huffmancode(filepath)
compressed_file = h.compression()
h.decompression(compressed_file)

        
        

Please put here the path of file that you want to compressp.txt

10110


0000001110110000


bytes_array [3, 176]

File is compressed succesfully

byte 3


bits 00000011


byte 176


bits 10110000


bit_string 0000001110110000


text_after_removedpadding 10110


actual_text abc



'p_decompressed.txt'

In [36]:
print(h._Huffmancode__reverse_code)

{'0': 'c', '10': 'a', '11': 'b'}


In [37]:
print(h._Huffmancode__code)

{'c': '0', 'a': '10', 'b': '11'}
