In [11]:
import heapq
import os

class BinaryTree:
    def __init__(self, value, frequ):
        self.value = value
        self.frequ = frequ
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.frequ < other.frequ

    def __eq__(self, other):
        return self.frequ == other.frequ

class Huffmancode:
    def __init__(self, path):
        self.path = path
        self.__heap = []
        self.__code = {}

    def _frequency_from_text(self, text):
        frequ_dict = {}
        for char in text:
            frequ_dict[char] = frequ_dict.get(char, 0) + 1
        return frequ_dict

    def __Build_heap(self, frequency_dict):
        for key, frequency in frequency_dict.items():
            binary_tree_node = BinaryTree(key, frequency)
            heapq.heappush(self.__heap, binary_tree_node)

    def __Build_Binary_Tree(self):
        while len(self.__heap) > 1:
            node1 = heapq.heappop(self.__heap)
            node2 = heapq.heappop(self.__heap)
            sum_of_freq = node1.frequ + node2.frequ
            newnode = BinaryTree(None, sum_of_freq)
            newnode.left = node1
            newnode.right = node2
            heapq.heappush(self.__heap, newnode)

    def __Build_Tree_Code_Helper(self, root, curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__code[root.value] = curr_bits
            return
        self.__Build_Tree_Code_Helper(root.left, curr_bits + '0')
        self.__Build_Tree_Code_Helper(root.right, curr_bits + '1')

    def __Build_Tree_Code(self):
        root = heapq.heappop(self.__heap)
        self.__Build_Tree_Code_Helper(root, '')

    def __Build_Encoded_Text(self, text):
        encoded_text = ''.join(self.__code[char] for char in text)
        return encoded_text

    def __Build_Padded_Text(self, encoded_text):
        padding_value = 8 - (len(encoded_text) % 8)
        padded_text = encoded_text + '0' * padding_value
        padded_info = f"{padding_value:08b}"  # 8-bit representation of padding length
        return padded_info + padded_text

    def __Build_Byte_Array(self, padded_text):
        array = [int(padded_text[i:i+8], 2) for i in range(0, len(padded_text), 8)]
        return array

    def compression(self):
        """
        This function reads a file, computes the Huffman encoding, and compresses the file.
        Steps:
        1. Read the file contents.
        2. Calculate character frequencies.
        3. Construct a min heap.
        4. Build the Huffman Tree.
        5. Generate Huffman codes.
        6. Encode the text.
        7. Pad the encoded text.
        8. Convert to byte format and write to a binary file.
        """
        print('Compression for your file starts....')

        filename, file_extension = os.path.splitext(self.path)
        output_path = filename + '.bin'

        with open(self.path, 'r') as file, open(output_path, 'wb') as output:
            text = file.read().rstrip()

            frequency_dict = self._frequency_from_text(text)
            self.__Build_heap(frequency_dict)
            self.__Build_Binary_Tree()
            self.__Build_Tree_Code()

            encoded_text = self.__Build_Encoded_Text(text)
            padded_text = self.__Build_Padded_Text(encoded_text)
            byte_array = self.__Build_Byte_Array(padded_text)
            final_bytes = bytes(byte_array)

            output.write(final_bytes)

        print('Compression successfully completed.')
        return output_path

# Input file path from user and compress the file
path = input('Enter the path of the file you need to compress: ')
h = Huffmancode(path)
compressed_file = h.compression()
print(f"Compressed file saved at: {compressed_file}")

Enter the path of the file you need to compress:  dummy.txt


Compression for your file starts....
Compression successfully completed.
Compressed file saved at: dummy.bin


In [27]:
import heapq, os

class BinaryTree:
    def __init__(self, value, frequ):
        self.value = value
        self.frequ = frequ
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.frequ < other.frequ

    def __eq__(self, other):
        return self.frequ == other.frequ

class Huffmancode:
    def __init__(self, path):
        self.path = path
        self.__heap = []
        self.__code = {}
        self.__reversecode = {}

    def _frequency_from_text(self, text):
        frequ_dict = {}
        for char in text:
            frequ_dict[char] = frequ_dict.get(char, 0) + 1
        return frequ_dict

    def __Build_heap(self, frequency_dict):
        for key, frequency in frequency_dict.items():
            binary_tree_node = BinaryTree(key, frequency)
            heapq.heappush(self.__heap, binary_tree_node)

    def __Build_Binary_Tree(self):
        while len(self.__heap) > 1:
            node1 = heapq.heappop(self.__heap)
            node2 = heapq.heappop(self.__heap)
            sum_of_freq = node1.frequ + node2.frequ
            newnode = BinaryTree(None, sum_of_freq)
            newnode.left = node1
            newnode.right = node2
            heapq.heappush(self.__heap, newnode)

    def __Build_Tree_Code_Helper(self, root, curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__code[root.value] = curr_bits
            self.__reversecode[curr_bits] = root.value
            return
        self.__Build_Tree_Code_Helper(root.left, curr_bits + '0')
        self.__Build_Tree_Code_Helper(root.right, curr_bits + '1')

    def __Build_Tree_Code(self):
        root = heapq.heappop(self.__heap)
        self.__Build_Tree_Code_Helper(root, '')

    def __Build_Encoded_Text(self, text):
        encoded_text = ''.join(self.__code[char] for char in text)
        return encoded_text

    def __Build_Padded_Text(self, encoded_text):
        padding_value = 8 - len(encoded_text) % 8
        padded_info = "{:08b}".format(padding_value)
        padded_text = padded_info + encoded_text + '0' * padding_value
        return padded_text

    def __Build_Bite_Array(self, padded_text):
        array = []
        for i in range(0, len(padded_text), 8):
            byte = padded_text[i:i+8]
            array.append(int(byte, 2))
        return array

    def compression(self):
        print('Compression for your file starts....')   
        filename, file_extension = os.path.splitext(self.path)
        output_path = filename + '.bin'

        with open(self.path, 'r', encoding="utf-8") as file, open(output_path, 'wb') as output:
            text = file.read().rstrip()
            frequency_dict = self._frequency_from_text(text)

            self.__Build_heap(frequency_dict)
            self.__Build_Binary_Tree()
            self.__Build_Tree_Code()

            encoded_text = self.__Build_Encoded_Text(text)
            padded_text = self.__Build_Padded_Text(encoded_text)
            bytes_array = self.__Build_Bite_Array(padded_text)

            output.write(bytes(bytes_array))

        print('Compression successful! File saved as:', output_path)
        return output_path

        def __Remove_padding(self, text):
            padded_info = text[:8]
            padding_value = int(padded_info, 2)
            text = text[:8]
            text = text[-1*padding_value]
            return text

        def __Decoded_Text(self, text):
            current_bits = ''
            decoded_text = ''
            for char in text:
                current_bits +=char
                if current_bits in self.__reversecode:
                    decoded_text += self.__reversecode[current_bits]
                    current_bits = ''
            return decoded_text
        
        def decompression(self, input_path):
            filename, file_extension = os.path.slicetext(input_path)
            output_path = filename + '_decompressed' + '.txt'
            with open(input_path, 'rb') as file, open(output_path, 'w') as output:
                bit_string = ''
                byte = file.read(1)
                while byte:
                    byte = ord(byte)
                    bits = bin(byte)[2:].rjust(8, '0')
                    bit_string += bits
                    byte = file.read(1)

                text_after_removing_padding = self.__Remove_Padding(bit_string)
                actual_text = self.__Decoded_Text(text_after_removing_padding)
                output.write(actual_text)
            return output_path
                
path = input('Enter the path of the file which you need to compress: ')
h = Huffmancode(path)
compressed_file = h.compression()
h.decompression(compressed_file)

Enter the path of the file which you need to compress:  dummy.txt


Compression for your file starts....
Compression successful! File saved as: dummy.bin


AttributeError: 'Huffmancode' object has no attribute 'decompress'

In [29]:
import heapq, os

class BinaryTree:
    def __init__(self, value, frequ):
        self.value = value
        self.frequ = frequ
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.frequ < other.frequ

    def __eq__(self, other):
        return self.frequ == other.frequ

class Huffmancode:
    def __init__(self, path):
        self.path = path
        self.__heap = []
        self.__code = {}
        self.__reversecode = {}

    def _frequency_from_text(self, text):
        frequ_dict = {}
        for char in text:
            frequ_dict[char] = frequ_dict.get(char, 0) + 1
        return frequ_dict

    def __Build_heap(self, frequency_dict):
        for key, frequency in frequency_dict.items():
            binary_tree_node = BinaryTree(key, frequency)
            heapq.heappush(self.__heap, binary_tree_node)

    def __Build_Binary_Tree(self):
        while len(self.__heap) > 1:
            node1 = heapq.heappop(self.__heap)
            node2 = heapq.heappop(self.__heap)
            sum_of_freq = node1.frequ + node2.frequ
            newnode = BinaryTree(None, sum_of_freq)
            newnode.left = node1
            newnode.right = node2
            heapq.heappush(self.__heap, newnode)

    def __Build_Tree_Code_Helper(self, root, curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__code[root.value] = curr_bits
            self.__reversecode[curr_bits] = root.value
            return
        self.__Build_Tree_Code_Helper(root.left, curr_bits + '0')
        self.__Build_Tree_Code_Helper(root.right, curr_bits + '1')

    def __Build_Tree_Code(self):
        root = heapq.heappop(self.__heap)
        self.__Build_Tree_Code_Helper(root, '')

    def __Build_Encoded_Text(self, text):
        encoded_text = ''.join(self.__code[char] for char in text)
        return encoded_text

    def __Build_Padded_Text(self, encoded_text):
        padding_value = 8 - len(encoded_text) % 8
        padded_info = "{:08b}".format(padding_value)
        padded_text = padded_info + encoded_text + '0' * padding_value
        return padded_text

    def __Build_Bite_Array(self, padded_text):
        array = []
        for i in range(0, len(padded_text), 8):
            byte = padded_text[i:i+8]
            array.append(int(byte, 2))
        return array

    def compression(self):
        print('Compression for your file starts....')   
        filename, file_extension = os.path.splitext(self.path)
        output_path = filename + '.bin'

        with open(self.path, 'r', encoding="utf-8") as file, open(output_path, 'wb') as output:
            text = file.read().rstrip()
            frequency_dict = self._frequency_from_text(text)

            self.__Build_heap(frequency_dict)
            self.__Build_Binary_Tree()
            self.__Build_Tree_Code()

            encoded_text = self.__Build_Encoded_Text(text)
            padded_text = self.__Build_Padded_Text(encoded_text)
            bytes_array = self.__Build_Bite_Array(padded_text)

            output.write(bytes(bytes_array))

        print('Compression successful! File saved as:', output_path)
        return output_path

    def __Remove_padding(self, text):
        padded_info = text[:8]
        padding_value = int(padded_info, 2)
        text = text[8:]  # Remove first 8 bits containing padding info
        text = text[:-padding_value]  # Remove padded zeros
        return text

    def __Decoded_Text(self, text):
        current_bits = ''
        decoded_text = ''
        for char in text:
            current_bits += char
            if current_bits in self.__reversecode:
                decoded_text += self.__reversecode[current_bits]
                current_bits = ''
        return decoded_text

    def decompress(self, input_path):
        filename, file_extension = os.path.splitext(input_path)
        output_path = filename + '_decompressed.txt'

        with open(input_path, 'rb') as file, open(output_path, 'w', encoding="utf-8") as output:
            bit_string = ''
            byte = file.read(1)
            while byte:
                byte = int.from_bytes(byte, 'big')  # Fix: Convert byte to int correctly
                bits = bin(byte)[2:].rjust(8, '0')
                bit_string += bits
                byte = file.read(1)

            text_after_removing_padding = self.__Remove_padding(bit_string)
            actual_text = self.__Decoded_Text(text_after_removing_padding)
            output.write(actual_text)

        print('Decompression successful! File saved as:', output_path)
        return output_path

# Run the program
path = input('Enter the path of the file which you need to compress: ')
h = Huffmancode(path)
compressed_file = h.compression()
h.decompress(compressed_file)


Enter the path of the file which you need to compress:  dummy.txt


Compression for your file starts....
Compression successful! File saved as: dummy.bin
Decompression successful! File saved as: dummy_decompressed.txt


'dummy_decompressed.txt'