<a href="https://colab.research.google.com/github/selcuk-yalcin/Information-Communication-Processing/blob/main/Entropy_and_Huffman_Coding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import math
import heapq
from collections import defaultdict
import pandas as pd

# Raw counts extracted from the image (letter: frequency)
# These are the absolute frequencies of each character.
frequencies = {
    ...
}

total_count = sum(frequencies.values())

# Step 1: Calculate probabilities
# Convert absolute frequencies into relative probabilities (p = count / total).
probabilities = {char: count / total_count for char, count in frequencies.items()}

# Step 2: Compute entropy H = -∑ p * log2(p)
# Shannon entropy measures the average uncertainty (information content).
entropy = -sum(p * math.log2(p) for p in probabilities.values())

# Step 3: Huffman coding
# Define a binary tree structure where each node represents a symbol and its probability.
class Node:
    def __init__(self, symbol=None, prob=0, left=None, right=None):
        self.symbol = symbol
        self.prob = prob
        self.left = left
        self.right = right

    def __lt__(self, other):
        return self.prob < other.prob

# Build the Huffman tree by combining the two lowest-probability nodes at each step.
def build_huffman_tree(probs):
    heap = [Node(symbol=symbol, prob=prob) for symbol, prob in probs.items()]
    heapq.heapify(heap)

    while len(heap) > 1:
        n1 = heapq.heappop(heap)
        n2 = heapq.heappop(heap)
        merged = Node(prob=n1.prob + n2.prob, left=n1, right=n2)
        heapq.heappush(heap, merged)

    return heap[0]

# Recursively assign binary codes to each symbol.
def build_code_table(node, prefix='', codebook={}):
    if node.symbol is not None:
        codebook[node.symbol] = prefix
    else:
        build_code_table(node.left, prefix + '0', codebook)
        build_code_table(node.right, prefix + '1', codebook)
    return codebook

# Build tree and generate Huffman codes
tree = build_huffman_tree(probabilities)
huffman_codes = build_code_table(tree)

# Step 4: Calculate average codeword length
# Expected length = ∑ p(x) * code_length(x).
average_code_length = sum(probabilities[ch] * len(huffman_codes[ch]) for ch in huffman_codes)

# Output results
print(f"Total symbols: {total_count}")
print(f"Entropy (bits): {entropy:.4f}")
print(f"Average Huffman code length: {average_code_length:.4f}\n")

# Create table with probabilities and code lengths
rows = []
for ch in sorted(huffman_codes):
    prob = probabilities[ch]
    code_len = len(huffman_codes[ch])
    rows.append({"Character": ch, "Probability": prob, "Code Length (bits)": code_len})

df = pd.DataFrame(rows)
print(df)

In [None]:
#