In [17]:
from scipy.io import loadmat
import heapq
import string

In [22]:
alphabet = list(string.ascii_lowercase)
print("The alphabet is:", alphabet)

The alphabet is: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [20]:
table = loadmat('freq.mat')
frequencies = table['freq']
print("Frequencies: \n", frequencies)

Frequencies: 
 [[0.08167]
 [0.01492]
 [0.02782]
 [0.04253]
 [0.12702]
 [0.02228]
 [0.02015]
 [0.06094]
 [0.06966]
 [0.00153]
 [0.00772]
 [0.04025]
 [0.02406]
 [0.06749]
 [0.07507]
 [0.01929]
 [0.00095]
 [0.05987]
 [0.06327]
 [0.09056]
 [0.02758]
 [0.00978]
 [0.0236 ]
 [0.0015 ]
 [0.01947]
 [0.00102]]


In [15]:
class HeapNode:
   
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

    def __gt__(self, other):
        return self.freq > other.freq

In [33]:
class HuffmanEncoder:
    
    def __init__(self, input_text, frequencies):
        self.input_text = input_text
        self.frequencies = frequencies
        self.heap = []
        self.codes = {}
        self.reverse_mapping = {}
        
    def encode(self):
        freq_dict = self.make_dict()
        self.make_heap(freq_dict)
        self.merge_nodes()
        self.make_codes()
        print(self.get_encoded(self.input_text))
        
    def make_dict(self):
        return {k:v for k,v in zip(alphabet, frequencies)}
        
    def make_heap(self, freq_dict):
        for key in alphabet:
            node = HeapNode(key, freq_dict[key])
            heapq.heappush(self.heap, node)
            
    def merge_nodes(self):
        while(len(self.heap) > 1):
            node1 = heapq.heappop(self.heap)
            node2 = heapq.heappop(self.heap)

            merged = HeapNode(None, node1.freq + node2.freq)
            merged.left = node1
            merged.right = node2

            heapq.heappush(self.heap, merged)
            
    
    def recursive_make_codes(self, root, current_code):
        if(root == None):
            return

        if(root.char != None):
            self.codes[root.char] = current_code
            self.reverse_mapping[current_code] = root.char
            return

        self.recursive_make_codes(root.left, current_code + "0")
        self.recursive_make_codes(root.right, current_code + "1")


    def make_codes(self):
        root = heapq.heappop(self.heap)
        current_code = ""
        self.recursive_make_codes(root, current_code)
        
    def get_encoded(self, text):
        encoded_text = ""
        for character in text:
            encoded_text += self.codes[character]
        return encoded_text

https://bhrigu.me/blog/2017/01/17/huffman-coding-python-implementation/

In [34]:
huffman_encoder = HuffmanEncoder("mahsaeskandari", frequencies)
huffman_encoder.encode()

001111110011001111110100011100101111110101011111111001011011
