In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import math

In [5]:
from collections import Counter

class Node:
    def __init__(self, char=None, freq=0):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

def build_huffman_tree(text):
    frequency = Counter(text)
    nodes = [Node(char, freq) for char, freq in frequency.items()]

    while len(nodes) > 1:
        nodes.sort(key=lambda x: x.freq)
        left = nodes.pop(0)
        right = nodes.pop(0)
        merged = Node(freq=left.freq + right.freq)
        merged.left = left
        merged.right = right
        nodes.append(merged)

    return nodes[0]

def generate_codes(node, current_code='', code_table=None):
    if code_table is None:
        code_table = {}
    if node is not None:
        if node.char is not None:
            code_table[node.char] = current_code
        generate_codes(node.left, current_code + '0', code_table)
        generate_codes(node.right, current_code + '1', code_table)
    return code_table

def encode_text(text, code_table):
    return ''.join(code_table[char] for char in text)

def decode_text(encoded_text, root):
    decoded_text = ''
    current_node = root
    for bit in encoded_text:
        if bit == '0':
            current_node = current_node.left
        else:
            current_node = current_node.right
        if current_node.char is not None:
            decoded_text += current_node.char
            current_node = root
    return decoded_text

def average_bits_per_char(code_table, occurrences):
    total_bits = sum(len(code_table[char]) * freq for char, freq in occurrences.items())
    total_chars = sum(occurrences.values())
    return total_bits / total_chars

sentences = [
    "peter piper picked a peck of pickled peppers",
    "she sells seashells by the seashore"
]

for sentence in sentences:
    root = build_huffman_tree(sentence)
    code_table = generate_codes(root)
    occurrences = Counter(sentence)
    average_bits = average_bits_per_char(code_table, occurrences)
    encoded = encode_text(sentence, code_table)
    decoded = decode_text(encoded, root)

    print(f"Original: {sentence}")
    print(f"Encoded: {encoded}")
    print(f"Decoded: {decoded}")
    print(f"Average Bits per Character: {average_bits}\n")


Original: peter piper picked a peck of pickled peppers
Encoded: 01111000101111000110011001011111000110011001101010111110000110000111100111110101011110001000010111001100110101011001101110000110011110101111100000111
Decoded: peter piper picked a peck of pickled peppers
Average Bits per Character: 3.3863636363636362

Original: she sells seashells by the seashore
Encoded: 010011111100111110010001110011110001010011111001000111010100101011101011000111111001111000101001101110000111
Decoded: she sells seashells by the seashore
Average Bits per Character: 3.085714285714286

