In [1]:
import heapq
from collections import defaultdict, Counter

In [2]:
class HuffmanNode:
    def __init__(self, char=None, freq=0):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.freq < other.freq

In [3]:
def build_huffman_tree(frequencies):
    heap = [HuffmanNode(char, freq) for char, freq in frequencies.items()]
    heapq.heapify(heap)

    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        merged = HuffmanNode(freq=left.freq + right.freq)
        merged.left = left
        merged.right = right
        heapq.heappush(heap, merged)

    return heap[0]

def generate_huffman_codes(root):
    codes = {}
    def _generate_huffman_codes(node, current_code):
        if node is not None:
            if node.char is not None:
                codes[node.char] = current_code
            _generate_huffman_codes(node.left, current_code + "0")
            _generate_huffman_codes(node.right, current_code + "1")

    _generate_huffman_codes(root, "")
    return codes

def huffman_encode(data):
    frequencies = Counter(data)
    root = build_huffman_tree(frequencies)
    codes = generate_huffman_codes(root)
    encoded_data = "".join(codes[char] for char in data)
    return encoded_data, codes

def huffman_decode(encoded_data, codes):
    reverse_codes = {v: k for k, v in codes.items()}
    current_code = ""
    decoded_data = []

    for bit in encoded_data:
        current_code += bit
        if current_code in reverse_codes:
            decoded_data.append(reverse_codes[current_code])
            current_code = ""

    return "".join(decoded_data)

In [4]:
# Пример использования алгоритма Хаффмана
if __name__ == "__main__":
    data = "this is an example for huffman encoding"

    print("Оригинальные данные:", data)

    encoded_data, codes = huffman_encode(data)
    print("Закодированные данные:", encoded_data)
    print("Коды символов:", codes)

    decoded_data = huffman_decode(encoded_data, codes)
    print("Декодированные данные:", decoded_data)

Оригинальные данные: this is an example for huffman encoding
Закодированные данные: 0101001001001001010110010010101111100010111100111011110011100000110111101011101110010110010101001100011011101001111110001011110000011111100101011100100010001
Коды символов: {'n': '000', 's': '0010', 'm': '0011', 'h': '0100', 't': '01010', 'd': '01011', 'r': '01100', 'l': '01101', 'x': '01110', 'c': '01111', 'p': '10000', 'g': '10001', 'i': '1001', ' ': '101', 'u': '11000', 'o': '11001', 'f': '1101', 'e': '1110', 'a': '1111'}
Декодированные данные: this is an example for huffman encoding
