In [None]:
%pip install spytial-diagramming
from spytial import *
from spytial.annotations import *

In [2]:
# Setup for performance metrics
import random
import string

# Set this to True to run performance tests
RUN_PERF = False

perf_base = "spytial_perf"
def get_perf_path(structure, size):
    return perf_base + "_" + structure + "_" + f"{size}.json"
PI = 20
SIZES = [5, 10, 25, 50]


# Huffman Codes



In [None]:
import heapq

@attribute(field="sym")
@attribute(field="freq")
@hideAtom(selector="NoneType+int+str")
@orientation(selector="(_Node->_Node) & ^(zero_ + one_)", directions=["below"]) 
@orientation(selector="(zero_.*(zero_ + one_)) & (_Node->_Node)", directions=["left"]) 
@orientation(selector="(one_.*(zero_ + one_)) & (_Node->_Node)", directions=["right"]) # 
@align(selector="((~zero_).one_) & (_Node->_Node)", direction="horizontal") 
class _Node:
    __slots__ = ("sym", "freq", "zero_", "one_")
    def __init__(self, freq, sym=None, zero_=None, one_=None):
        self.sym, self.freq, self.zero_, self.one_ = sym, freq, zero_, one_

def _build_tree(freqs):
    heap, uid = [], 0
    for s, f in freqs.items():
        heap.append((f, uid, _Node(f, sym=s))); uid += 1
    if not heap:
        return None
    heapq.heapify(heap)
    if len(heap) == 1:
        f, _, n = heap[0]
        return _Node(f, zero_=n, one_=None)
    while len(heap) > 1:
        f1, _, n1 = heapq.heappop(heap)
        f2, _, n2 = heapq.heappop(heap)
        p = _Node(f1 + f2, zero_=n1, one_=n2)
        heapq.heappush(heap, (p.freq, uid, p)); uid += 1
    return heap[0][2]

def _gen_codes(root):
    if root is None: return {}
    codes = {}
    def dfs(n, pref):
        leaf = (n.sym is not None) and (n.zero_ is None and n.one_ is None)
        if leaf:
            codes[n.sym] = pref or "0"
            return
        if n.zero_: dfs(n.zero_, pref + "0")
        if n.one_:  dfs(n.one_,  pref + "1")
    dfs(root, "")
    return codes

def huffman_codes(data):
    freqs = {}
    for s in data:
        freqs[s] = freqs.get(s, 0) + 1
    root = _build_tree(freqs)
    return _gen_codes(root), root

def encode(data, codes):
    return "".join(codes[s] for s in data)

def decode(bits, root):
    if root is None: return []
    out, n = [], root
    if root.zero_ is None and root.one_ is None:
        sym = root.zero_.sym if root.zero_ else root.sym
        return [sym] * (len(bits) if bits else 1)
    for b in bits:
        n = n.zero_ if b == "0" else n.one_
        if n.sym is not None and n.zero_ is None and n.one_ is None:
            out.append(n.sym); n = root
    return out







![img](img/huffman-codes.png)

In [4]:

# demo, s should be a string like in CLRS fig 16.4. That is 45 a's , 13 b's, 12 c's, 16 d's, 9 e's, 5 f's
s = "a" * 45 + "b" * 13 + "c" * 12 + "d" * 16 + "e" * 9 + "f" * 5
codes, root = huffman_codes(s)
enc = encode(s, codes)
dec = "".join(decode(enc, root))

diagram(root, height=800)


## Performance

In [5]:
if RUN_PERF:
    STRUCTURE = "huffmantree"
    for size in SIZES:
        # Generate random string with character frequencies
        chars = random.sample(string.ascii_lowercase, min(size, 26))
        # Create a string where each character appears a random number of times
        s = ""
        for char in chars:
            s += char * random.randint(1, 100)
        
        codes, root = huffman_codes(s)
        
        diagram(root, method="browser", perf_path=get_perf_path(STRUCTURE, size), perf_iterations=PI, headless=True)
else:
    print("Performance testing skipped. Set RUN_PERF = True to enable.")


Performance testing skipped. Set RUN_PERF = True to enable.
