Skip to content

Commit

Permalink
Add Huffman tree encoding and generation
Browse files Browse the repository at this point in the history
  • Loading branch information
sjl committed Aug 23, 2016
1 parent 62a00c4 commit af244a1
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 7 deletions.
4 changes: 3 additions & 1 deletion Makefile
@@ -1,4 +1,6 @@
.PHONY:
.PHONY: vendor

vendor: vendor/quickutils.lisp

vendor/quickutils.lisp: vendor/make-quickutils.lisp
cd vendor && ros run -L sbcl --load make-quickutils.lisp --eval '(quit)'
86 changes: 85 additions & 1 deletion src/huffman-trees.lisp
Expand Up @@ -49,6 +49,8 @@
(symbols (required-argument) :type list)
(weight (required-argument) :type real))

(define-with-macro node left right)


(defun tree-symbols (tree)
(etypecase tree
Expand All @@ -69,6 +71,10 @@
:symbols (append (tree-symbols left)
(tree-symbols right))))

(defun length1p (list)
"Return whether `list` has length 1, without traversing it all the way."
(and (consp list) (null (cdr list))))


;;;; External Interface -------------------------------------------------------
(defun decode (bits tree)
Expand All @@ -85,6 +91,53 @@
(recur (rest bits) tree)))
(node (recur (rest bits) next-branch))))))))

(defun encode (message tree)
(labels
((fail (symbol)
(error "Unknown symbol ~S" symbol))
(encode-symbol (symbol tree)
(recursively ((tree tree))
(etypecase tree
(leaf
(if (eql symbol (leaf-symbol tree))
'()
(fail symbol)))
(node
(with-node (tree)
(cond
((member symbol (tree-symbols left)) (cons 0 (recur left)))
((member symbol (tree-symbols right)) (cons 1 (recur right)))
(t (fail symbol)))))))))
(if (null message)
'()
(append (encode-symbol (first message) tree)
(encode (rest message) tree)))))

(defun encode (message tree)
;; Alternate version
(flet ((encode-symbol (symbol tree)
(recursively ((tree tree))
(etypecase tree
(leaf (if (eql symbol (leaf-symbol tree))
'()
(error "Unknown symbol ~S" symbol)))
(node (with-node (tree)
;; If it's not in the left, assume it's in the right. If
;; it's not present at all we'll just recur all the way
;; down to the rightmost leaf and let that handle the
;; error.
;;
;; This saves a member check at each level, but doesn't
;; bail early on garbage data. One would hope garbage
;; data is rare.
(if (member symbol (tree-symbols left))
(cons 0 (recur left))
(cons 1 (recur right)))))))))
(if (null message)
'()
(append (encode-symbol (first message) tree)
(encode (rest message) tree)))))


(defun adjoin-set (tree set)
(cond
Expand All @@ -99,17 +152,48 @@
(defun make-leaf-set (pairs)
(if (null pairs)
'()
(destructuring-bind (symbol weight)
(destructuring-bind (symbol . weight)
(first pairs)
(adjoin-set (make-leaf symbol weight)
(make-leaf-set (rest pairs))))))


(defun generate-huffman-tree (data)
(check-type data cons)
(labels ((successive-merge (trees)
(if (length1p trees)
(first trees)
(destructuring-bind (a b . rest) trees
(successive-merge
(adjoin-set (make-node a b) rest))))))
(successive-merge (make-leaf-set (hash-table-alist (frequencies data))))))


;;;; Scratch ------------------------------------------------------------------
(defparameter *sample-tree*
(make-node (make-leaf 'a 4)
(make-node (make-leaf 'b 2)
(make-node (make-leaf 'D 1)
(make-leaf 'C 1)))))

(defparameter *song*
'(Well she was just seventeen
You know what I mean
And the way she looked was way beyond compare
So how could I dance with another
When I saw her standing there

Well she looked at me and I I could see
That before too long Id fall in love with her
She wouldnt dance with another
When I saw her standing there))

(defparameter *song-tree* (generate-huffman-tree *song*))



; (decode '(0 1 1 0 0 1 0 1 0 1 1 1 0) *sample-tree*)
; (encode '(A D A B B C A) *sample-tree*)
; (decode (encode '(d a b c a b) *sample-tree*) *sample-tree*)

; (decode (encode *song* *song-tree*) *song-tree*)
5 changes: 3 additions & 2 deletions src/markov.lisp
@@ -1,6 +1,7 @@
(in-package #:sand.markov)

(defparameter *text* (slurp "data/lightships-and-lighthouses.txt"))
(defparameter *text*
(read-file-into-string "data/lightships-and-lighthouses.txt"))

(defclass markov ()
((database :initarg :database :accessor markov-database)
Expand Down Expand Up @@ -47,7 +48,7 @@
(if-first-time (pushnew prefix beginnings :test 'equal))
(vector-push-extend
suffix
(gethash-or-init prefix database (make-vector))))))
(ensure-gethash prefix database (make-vector))))))
(make-instance 'markov
:database database
:beginnings (coerce beginnings 'vector))))
Expand Down
3 changes: 3 additions & 0 deletions vendor/make-quickutils.lisp
Expand Up @@ -14,6 +14,9 @@
:tree-collect
:ensure-gethash
:required-argument
:read-file-into-string
:hash-table-alist
:hash-table-plist
; :switch
; :while
; :ensure-boolean
Expand Down
82 changes: 79 additions & 3 deletions vendor/quickutils.lisp
Expand Up @@ -2,7 +2,7 @@
;;;; See http://quickutil.org for details.

;;;; To regenerate:
;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE :TREE-COLLECT :ENSURE-GETHASH :REQUIRED-ARGUMENT) :ensure-package T :package "SAND.QUICKUTILS")
;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE :TREE-COLLECT :ENSURE-GETHASH :REQUIRED-ARGUMENT :READ-FILE-INTO-STRING :HASH-TABLE-ALIST :HASH-TABLE-PLIST) :ensure-package T :package "SAND.QUICKUTILS")

(eval-when (:compile-toplevel :load-toplevel :execute)
(unless (find-package "SAND.QUICKUTILS")
Expand All @@ -18,7 +18,10 @@
:ENSURE-FUNCTION :COMPOSE :CURRY
:RCURRY :TAKE :N-GRAMS
:DEFINE-CONSTANT :RIFFLE :TREE-COLLECT
:ENSURE-GETHASH :REQUIRED-ARGUMENT))))
:ENSURE-GETHASH :REQUIRED-ARGUMENT
:WITH-OPEN-FILE* :WITH-INPUT-FROM-FILE
:READ-FILE-INTO-STRING
:HASH-TABLE-ALIST :HASH-TABLE-PLIST))))

(deftype string-designator ()
"A string designator type. A string designator is either a string, a symbol,
Expand Down Expand Up @@ -286,9 +289,82 @@ use as an initialization form for structure and class-slots, and
a default value for required keyword arguments."
(error "Required argument ~@[~S ~]missing." name))


(defmacro with-open-file* ((stream filespec &key direction element-type
if-exists if-does-not-exist external-format)
&body body)
"Just like `with-open-file`, but `nil` values in the keyword arguments mean to use
the default value specified for `open`."
(once-only (direction element-type if-exists if-does-not-exist external-format)
`(with-open-stream
(,stream (apply #'open ,filespec
(append
(when ,direction
(list :direction ,direction))
(when ,element-type
(list :element-type ,element-type))
(when ,if-exists
(list :if-exists ,if-exists))
(when ,if-does-not-exist
(list :if-does-not-exist ,if-does-not-exist))
(when ,external-format
(list :external-format ,external-format)))))
,@body)))


(defmacro with-input-from-file ((stream-name file-name &rest args
&key (direction nil direction-p)
&allow-other-keys)
&body body)
"Evaluate `body` with `stream-name` to an input stream on the file
`file-name`. `args` is sent as is to the call to `open` except `external-format`,
which is only sent to `with-open-file` when it's not `nil`."
(declare (ignore direction))
(when direction-p
(error "Can't specifiy :DIRECTION for WITH-INPUT-FROM-FILE."))
`(with-open-file* (,stream-name ,file-name :direction :input ,@args)
,@body))


(defun read-file-into-string (pathname &key (buffer-size 4096) external-format)
"Return the contents of the file denoted by `pathname` as a fresh string.
The `external-format` parameter will be passed directly to `with-open-file`
unless it's `nil`, which means the system default."
(with-input-from-file
(file-stream pathname :external-format external-format)
(let ((*print-pretty* nil))
(with-output-to-string (datum)
(let ((buffer (make-array buffer-size :element-type 'character)))
(loop
:for bytes-read = (read-sequence buffer file-stream)
:do (write-sequence buffer datum :start 0 :end bytes-read)
:while (= bytes-read buffer-size)))))))


(defun hash-table-alist (table)
"Returns an association list containing the keys and values of hash table
`table`."
(let ((alist nil))
(maphash (lambda (k v)
(push (cons k v) alist))
table)
alist))


(defun hash-table-plist (table)
"Returns a property list containing the keys and values of hash table
`table`."
(let ((plist nil))
(maphash (lambda (k v)
(setf plist (list* k v plist)))
table)
plist))

(eval-when (:compile-toplevel :load-toplevel :execute)
(export '(with-gensyms with-unique-names once-only compose curry rcurry
n-grams define-constant riffle tree-collect ensure-gethash
required-argument)))
required-argument read-file-into-string hash-table-alist
hash-table-plist)))

;;;; END OF quickutils.lisp ;;;;

0 comments on commit af244a1

Please sign in to comment.