Skip to content
Browse files

Huffman encoding trees

  • Loading branch information...
1 parent 9015a0b commit 8bcaf3ca8a99431f8b38dbc1856b5064c5c0a96a @sarabander committed
Showing with 374 additions and 0 deletions.
  1. +74 −0 2.3/2.67.scm
  2. +56 −0 2.3/2.68.scm
  3. +46 −0 2.3/2.69.scm
  4. +48 −0 2.3/2.70.scm
  5. +107 −0 2.3/2.71.scm
  6. +43 −0 2.3/2.72.scm
View
74 2.3/2.67.scm
@@ -0,0 +1,74 @@
+
+;; From the book
+(define (make-leaf symbol weight)
+ (list 'leaf symbol weight))
+
+(define (leaf? object)
+ (eq? (car object) 'leaf))
+
+(define (symbol-leaf x) (cadr x))
+
+(define (weight-leaf x) (caddr x))
+
+(define (make-code-tree left right)
+ (list left
+ right
+ (append (symbols left) (symbols right))
+ (+ (weight left) (weight right))))
+
+(define (left-branch tree) (car tree))
+
+(define (right-branch tree) (cadr tree))
+
+(define (symbols tree)
+ (if (leaf? tree)
+ (list (symbol-leaf tree))
+ (caddr tree)))
+
+(define (weight tree)
+ (if (leaf? tree)
+ (weight-leaf tree)
+ (cadddr tree)))
+
+(define (decode bits tree)
+ (define (decode-1 bits current-branch)
+ (if (null? bits)
+ '()
+ (let ((next-branch
+ (choose-branch (car bits) current-branch)))
+ (if (leaf? next-branch)
+ (cons (symbol-leaf next-branch)
+ (decode-1 (cdr bits) tree))
+ (decode-1 (cdr bits) next-branch)))))
+ (decode-1 bits tree))
+
+(define (choose-branch bit branch)
+ (cond ((= bit 0) (left-branch branch))
+ ((= bit 1) (right-branch branch))
+ (else (error "bad bit - CHOOSE-BRANCH" bit))))
+
+(define (adjoin-set x set)
+ (cond ((null? set) (list x))
+ ((< (weight x) (weight (car set))) (cons x set))
+ (else (cons (car set)
+ (adjoin-set x (cdr set))))))
+
+(define (make-leaf-set pairs)
+ (if (null? pairs)
+ '()
+ (let ((pair (car pairs)))
+ (adjoin-set (make-leaf (car pair) ; symbol
+ (cadr pair)) ; frequency
+ (make-leaf-set (cdr pairs))))))
+;; ------------------------
+
+(define sample-tree
+ (make-code-tree (make-leaf 'A 4)
+ (make-code-tree
+ (make-leaf 'B 2)
+ (make-code-tree (make-leaf 'D 1)
+ (make-leaf 'C 1)))))
+
+(define sample-message '(0 1 1 0 0 1 0 1 0 1 1 1 0))
+
+(decode sample-message sample-tree) ; '(A D A B B C A)
View
56 2.3/2.68.scm
@@ -0,0 +1,56 @@
+
+(define (encode message tree)
+ (if (null? message)
+ '()
+ (append (encode-symbol (car message) tree)
+ (encode (cdr message) tree))))
+
+(define (encode-symbol symbol tree)
+ (cond ((leaf? tree) empty)
+ ((contains? symbol (left-branch tree))
+ (cons 0 (encode-symbol symbol (left-branch tree))))
+ ((contains? symbol (right-branch tree))
+ (cons 1 (encode-symbol symbol (right-branch tree))))
+ (else (error "Unrecognized symbol:" symbol))))
+
+;; Naïve version (2n steps)
+(define (contains? symbol tree)
+ (if (leaf? tree)
+ (if (eq? symbol (symbol-leaf tree))
+ true
+ false)
+ (or (contains? symbol (left-branch tree))
+ (contains? symbol (right-branch tree)))))
+
+;; Better version (n steps)
+(define (contains? symbol tree)
+ (define (search symb lst)
+ (cond ((empty? lst) false)
+ ((eq? symb (car lst)) true)
+ (else (search symb (cdr lst)))))
+ (search symbol (symbols tree)))
+
+;; Unit tests
+(contains? 'A sample-tree) ; true
+(contains? 'B sample-tree) ; true
+(contains? 'C sample-tree) ; true
+(contains? 'D sample-tree) ; true
+
+(contains? 'E sample-tree) ; false
+(contains? 'R sample-tree) ; false
+(contains? 'a sample-tree) ; false (symbols are case-sensitive)
+
+(encode-symbol 'A sample-tree) ; '(0)
+(encode-symbol 'B sample-tree) ; '(1 0)
+(encode-symbol 'C sample-tree) ; '(1 1 1)
+(encode-symbol 'D sample-tree) ; '(1 1 0)
+
+(encode-symbol 'F sample-tree) ; => Unrecognized symbol: F
+(encode-symbol 'S sample-tree) ; => Unrecognized symbol: S
+
+;; Encoding the result of 2.67
+
+(encode '(A D A B B C A) sample-tree) ; '(0 1 1 0 0 1 0 1 0 1 1 1 0)
+(decode '(0 1 1 0 0 1 0 1 0 1 1 1 0) sample-tree) ; '(A D A B B C A)
+
+;; correct!
View
46 2.3/2.69.scm
@@ -0,0 +1,46 @@
+
+;; Uses these procedures from the book
+(define (adjoin-set x set)
+ (cond ((null? set) (list x))
+ ((< (weight x) (weight (car set))) (cons x set))
+ (else (cons (car set)
+ (adjoin-set x (cdr set))))))
+
+(define (make-code-tree left right)
+ (list left
+ right
+ (append (symbols left) (symbols right))
+ (+ (weight left) (weight right))))
+;; ----------------------
+
+(define (generate-huffman-tree pairs)
+ (successive-merge (make-leaf-set pairs)))
+
+(define (successive-merge leaf-set)
+ (if (= (length leaf-set) 1)
+ (car leaf-set)
+ (successive-merge
+ (adjoin-set (make-code-tree (car leaf-set)
+ (cadr leaf-set))
+ (cddr leaf-set)))))
+
+(generate-huffman-tree '()) ; invalid input
+(generate-huffman-tree '((A 2))) ; '(leaf A 2)
+(generate-huffman-tree '((A 3) (B 1))) ; '((leaf B 1) (leaf A 3) (B A) 4)
+(generate-huffman-tree '((B 2) (D 1) (A 4) (C 1)))
+;; '((leaf A 4)
+;; ((leaf B 2) ((leaf C 1) (leaf D 1) (C D) 2) (B C D) 4)
+;; (A B C D)
+;; 8)
+
+(generate-huffman-tree '((A 4) (B 2) (C 1) (D 1)))
+;; '((leaf A 4)
+;; ((leaf B 2) ((leaf D 1) (leaf C 1) (D C) 2) (B D C) 4)
+;; (A B D C)
+;; 8)
+
+(generate-huffman-tree
+ '((A 8) (B 3) (C 1) (D 1) (E 1) (F 1) (G 1) (H 1)))
+
+(generate-huffman-tree
+ (reverse '((A 8) (B 3) (C 1) (D 1) (E 1) (F 1) (G 1) (H 1))))
View
48 2.3/2.70.scm
@@ -0,0 +1,48 @@
+
+(define rock-vocabulary
+ (generate-huffman-tree
+ '((A 2) (NA 16) (BOOM 1) (SHA 3) (GET 2) (YIP 9) (JOB 2) (WAH 1))))
+
+;; Result:
+
+;; '((leaf NA 16)
+;; ((leaf YIP 9)
+;; (((leaf A 2) ((leaf WAH 1) (leaf BOOM 1) (WAH BOOM) 2) (A WAH BOOM) 4)
+;; ((leaf SHA 3) ((leaf JOB 2) (leaf GET 2) (JOB GET) 4) (SHA JOB GET) 7)
+;; (A WAH BOOM SHA JOB GET)
+;; 11)
+;; (YIP A WAH BOOM SHA JOB GET)
+;; 20)
+;; (NA YIP A WAH BOOM SHA JOB GET)
+;; 36)
+
+(define message
+ '(GET A JOB
+ SHA NA NA NA NA NA NA NA NA
+ GET A JOB
+ SHA NA NA NA NA NA NA NA NA
+ WAH YIP YIP YIP YIP YIP YIP YIP YIP YIP
+ SHA BOOM))
+
+(define encoded1
+ (encode message rock-vocabulary))
+;; '(1 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0
+;; 0 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 0 1 0 1 0 1 0
+;; 1 0 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1)
+
+;; Check
+(decode encoded1 rock-vocabulary) ; gives back original message
+
+;; Code efficiency
+
+(length encoded1) ; 84 bits required
+
+(length message) ; 36 symbols
+
+;; Smallest number of bits required to encode the message using fixed-length
+;; code, where each symbol needs three bits (to distinguish eight symbols):
+
+(* 3 (length message)) ; 108 bits
+
+;; Saving in space:
+(/ (- 108 84) 108.) ; 22%
View
107 2.3/2.71.scm
@@ -0,0 +1,107 @@
+
+;; With small modification
+(define (successive-merge leaf-set)
+ (if (= (length leaf-set) 1)
+ (car leaf-set)
+ (successive-merge
+ (adjoin-set (make-code-tree (cadr leaf-set) ; to place higher
+ (car leaf-set)) ; weight leaf left
+ (cddr leaf-set)))))
+
+;; n = 5:
+
+(define alphabet1 '((A 1) (B 2) (C 4) (D 8) (E 16)))
+
+;; Tree for this alphabet:
+
+;; {A B C D E} 31
+;; ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C D} 15
+;; 0 → E 16 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C} 7
+;; 10 → D 8 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B} 3
+;; 110 → C 4 ╱╲
+;; ╱ ╲
+;; ╱ ╲
+;; 1110 → B 2 A 1 ← 1111
+
+;; Generator makes equivalent tree:
+
+(generate-huffman-tree alphabet1)
+;; '((leaf E 16)
+;; ((leaf D 8)
+;; ((leaf C 4) ((leaf B 2) (leaf A 1) (B A) 3) (C B A) 7)
+;; (D C B A)
+;; 15)
+;; (E D C B A)
+;; 31)
+
+
+;; n = 10:
+
+(define alphabet2 '((A 1) (B 2) (C 4) (D 8) (E 16)
+ (F 32) (G 64) (H 128) (I 256) (J 512)))
+
+;; Tree for this alphabet:
+
+;; {A B C D E F G H I J} 1023
+;; ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C D E F G H I} 511
+;; 0 → J 512 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C D E F G H} 255
+;; 10 → I 256 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C D E F G} 127
+;; 110 → H 128 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C D E F} 63
+;; 1110 → G 64 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C D E} 31
+;; 11110 → F 32 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C D} 15
+;; 111110 → E 16 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B C} 7
+;; 1111110 → D 8 ╱╲
+;; ╱ ╲
+;; ╱ ╲ {A B} 3
+;; 11111110 → C 4 ╱╲
+;; ╱ ╲
+;; ╱ ╲
+;; 111111110 → B 2 A 1 ← 111111111
+
+;; Generator makes equivalent tree:
+
+(generate-huffman-tree alphabet2)
+;; '((leaf J 512)
+;; ((leaf I 256)
+;; ((leaf H 128)
+;; ((leaf G 64)
+;; ((leaf F 32)
+;; ((leaf E 16)
+;; ((leaf D 8)
+;; ((leaf C 4) ((leaf B 2) (leaf A 1) (B A) 3) (C B A) 7)
+;; (D C B A)
+;; 15)
+;; (E D C B A)
+;; 31)
+;; (F E D C B A)
+;; 63)
+;; (G F E D C B A)
+;; 127)
+;; (H G F E D C B A)
+;; 255)
+;; (I H G F E D C B A)
+;; 511)
+;; (J I H G F E D C B A)
+;; 1023)
+
+;; 1 bit for most frequent symbol, n-1 bits for least frequent symbol.
View
43 2.3/2.72.scm
@@ -0,0 +1,43 @@
+
+;; Look in 2.68 and 2.71 for dependencies
+
+(define huffman1
+ (generate-huffman-tree alphabet1))
+
+(define huffman2
+ (generate-huffman-tree alphabet2))
+
+(define counter 0)
+
+(define (encode-symbol symbol tree)
+ (set! counter (add1 counter))
+ (cond ((leaf? tree) empty)
+ ((contains? symbol (left-branch tree))
+ (cons 0 (encode-symbol symbol (left-branch tree))))
+ ((contains? symbol (right-branch tree))
+ (cons 1 (encode-symbol symbol (right-branch tree))))
+ (else (error "Unrecognized symbol:" symbol))))
+
+(define (contains? symbol tree)
+ (define (search symb lst)
+ (set! counter (add1 counter))
+ (cond ((empty? lst) false)
+ ((eq? symb (car lst)) true)
+ (else (search symb (cdr lst)))))
+ (search symbol (symbols tree)))
+
+(set! counter 0)
+
+(encode-symbol 'A huffman1) ; counter = 23
+(encode-symbol 'A huffman2) ; counter = 73
+
+(encode-symbol 'E huffman1) ; counter = 3
+(encode-symbol 'J huffman2) ; counter = 3
+
+;; We can encode the most frequent symbol in constant time.
+;; Encoding the least frequent symbol has order of O(n²) growth,
+;; because both 'encode-symbol' and 'contains?' has O(n) growth.
+
+;; In worst case scenario, 'encode-symbol' calls 'contains?' twice.
+;; Left branch takes constant time with this particular tree.
+;; Right branch takes at most O(n) time.

0 comments on commit 8bcaf3c

Please sign in to comment.
Something went wrong with that request. Please try again.