In [19]:
import heapq
import os
class TreeNode:
    
    def __init__(self,char,freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None
        
    def __lt__(self,other):
        return self.freq < other.freq
    
    def __gt__(self,other):
        return self.freq > other.freq
    
class huffman:
    
    def __init__(self,path):
        self.path = path
        self.__heap = []
        self.__codes = {}
        self.__revcodes = {}
        
    def __getfreqdict(self,text):
        freq_dict = {}
        for i in text:
            freq_dict[i] = freq_dict.get(i,0) + 1
        return freq_dict
    
    def __createheap(self,freq_dict):
        for val in freq_dict:
            tree_node = TreeNode(val,freq_dict[val])
            heapq.heappush(self.__heap,tree_node)
    
    def __createtree(self):
        while len(self.__heap) > 1:
            tree_node_1 = heapq.heappop(self.__heap)
            tree_node_2 = heapq.heappop(self.__heap)
            tree_node_3 = TreeNode('',tree_node_1.freq + tree_node_2.freq)
            tree_node_3.left = tree_node_1
            tree_node_3.right = tree_node_2
            heapq.heappush(self.__heap,tree_node_3)
    
    def __generatecodes(self,root,code):
        if root == None:
            return
        if root.char is not '':
            self.__codes[root.char] = code
            self.__revcodes[code] = root.char
            return
        self.__generatecodes(root.left,code+'0')
        self.__generatecodes(root.right,code+'1')
    
    def __converttexttocodes(self,text):
        codedtext = ''
        for i in text:
            codedtext = codedtext + self.__codes[i]
        return codedtext
    
    def __padcodedtext(self,codedtext):
        n = len(codedtext)
        paddingamt = 8 - (n%8)
        paddedtext = codedtext + '0'*(8-(n%8))
        paddinginfo = "{0:08b}".format(paddingamt)
        paddedtext = paddinginfo + paddedtext
        return paddedtext
    
    def __bytesarray(self,paddedtext):
        arr = []
        for i in range(0,len(paddedtext),8):
            byte = paddedtext[i:i+8]
            arr.append(int(byte,2))
        return arr
    
    def compress(self):
        ##read input
        ##count frequency
        ##create heap
        ##create tree
        ##generate codes
        ##convert input to coded string
        ##pad the string
        ##convert string to binary
        ##copy to binary file
        ##return the binary file's address
        file_name,file_extension = os.path.splitext(self.path)
        output_path = file_name + ".bin"
        with open(self.path,'r+') as file , open(output_path,'wb') as output:
            text = file.read()
            text = text.rstrip()
            
            dict_freq = self.__getfreqdict(text)
            
            self.__createheap(dict_freq)
            
            self.__createtree()
            
            self.__generatecodes(self.__heap[0],'')
            
            codedtext = self.__converttexttocodes(text)
            
            paddedcodetext = self.__padcodedtext(codedtext)
            
            bytesarr = self.__bytesarray(paddedcodetext)
            finalbytes = bytes(bytesarr)
            
            output.write(finalbytes)
        return output_path
    
    def __removepadding(self,bitstr):
        paddinginfo = bitstr[:8]
        padded = int(paddinginfo,2)
        bitstr = bitstr[8:]
        nonpaddedtext = bitstr[:-1*padded]
        return nonpaddedtext
    
    def __decodetext(self,text):
        output = ''
        bits = ''
        for i in text:
            bits += i
            if self.__revcodes.get(bits) is not None:
                output += self.__revcodes[bits]
                bits = ''
        return output
    
    def decompress(self,input_path):
        filename,fileextn = os.path.splitext(self.path)
        output_path = filename + '(decompressed)' + ".txt"
        with open (input_path,'rb') as file , open (output_path,'w') as output:
            bitstr = ''
            byte = file.read(1)
            while byte:
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8,'0')
                bitstr += bits
                byte = file.read(1)
            
            nonpaddedbitstr = self.__removepadding(bitstr)
            decodedtext = self.__decodetext(nonpaddedbitstr)
            output.write(decodedtext)
        return

filepath = input()
h = huffman(filepath)
output_address = h.compress()
h.decompress(output_address)

/Users/Riya Agarwal/Desktop/sample.txt
