From 137c70cb104b8ad606c8c1f857b1ff320635384a Mon Sep 17 00:00:00 2001 From: Takeshi Abe Date: Wed, 14 Oct 2009 21:33:16 +0900 Subject: [PATCH] initial import --- Makefile | 8 +++ README | 20 ++++++ lcs.scm | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ test.scm | 55 +++++++++++++++ 4 files changed, 293 insertions(+) create mode 100644 Makefile create mode 100644 README create mode 100644 lcs.scm create mode 100644 test.scm diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f8fc933 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +SCHEME = ypsilon --sitelib=. + +.PHONY: check test + +check: test + +test: + $(SCHEME) test.scm diff --git a/README b/README new file mode 100644 index 0000000..be86914 --- /dev/null +++ b/README @@ -0,0 +1,20 @@ +An R6RS library for Longest Common Subsequence (LCS) + +== License + +New BSD License + +== Acknowledgements + +Special thanks to the authors of Gauche[0]'s util.lcs. + +== Notes + +* Tests needs Ypsilon[1] with an external library (xunit)[2]; + Run `YPSILON_SITELIB="path of (xunit)" make test'. + +== References + +[0] http://practical-scheme.net/gauche/ +[1] http://code.google.com/p/ypsilon/ +[2] http://github.com/tabe/xunit/ diff --git a/lcs.scm b/lcs.scm new file mode 100644 index 0000000..f592cda --- /dev/null +++ b/lcs.scm @@ -0,0 +1,210 @@ +;; +;; Copyright (c) 2009 Takeshi Abe. All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions +;; are met: +;; +;; 1. Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; 2. Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; 3. Neither the name of the authors nor the names of its contributors +;; may be used to endorse or promote products derived from this +;; software without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(library (lcs) + (export ses + lcs-fold + lcs + lcs-with-positions + lcs-edit-list) + (import (rnrs)) + + ;; cf. S. Wu, U. Manber, G. Myers, and W. Miller, "An O(NP) Sequence Comparison Algorithm" (1989) + (define (ses a b . opt-eq) + (let* ((eq-proc (if (null? opt-eq) equal? (car opt-eq))) + (m (length a)) + (n (length b)) + (u (if (<= m n) 1 -1)) + (M (min m n)) + (N (max m n)) + (A (list->vector (cons #f (if (<= m n) a b)))) + (B (list->vector (cons #f (if (<= m n) b a)))) + (M+1 (+ M 1)) + (D (- N M)) + (fp (make-vector (+ M N 3) -1)) + (path (make-vector (+ M N 3) '()))) + + (letrec-syntax ((fp-ref (syntax-rules () ((_ i) (vector-ref fp (+ i M+1))))) + (fp-set! (syntax-rules () ((_ i v) (vector-set! fp (+ i M+1) v)))) + (path-ref (syntax-rules () ((_ i) (vector-ref path (+ i M+1))))) + (path-set! (syntax-rules () ((_ i v) (vector-set! path (+ i M+1) v)))) + (path-push! (syntax-rules () ((_ i v) (path-set! i (cons v (path-ref i))))))) + + (define (snake! k) + (let* ((y1 (+ (fp-ref (- k 1)) 1)) + (y2 (fp-ref (+ k 1))) + (y (max y1 y2))) + (cond ((> y1 y2) + (path-set! k (map values (path-ref (- k 1)))) + (path-push! k u)) + (else + (path-set! k (map values (path-ref (+ k 1)))) + (path-push! k (- u)))) + (let loop ((x (- y k)) + (y y)) + (cond ((and (< x M) + (< y N) + (eq-proc (vector-ref A (+ x 1)) + (vector-ref B (+ y 1)))) + (path-push! k 0) + (loop (+ x 1) (+ y 1))) + (else (fp-set! k y)))))) + + (let p-loop ((p 0)) + (do ((k (- p) (+ k 1))) + ((= k D)) + (snake! k)) + (do ((k (+ D p) (- k 1))) + ((= k D)) + (snake! k)) + (snake! D) + (if (= (fp-ref D) N) + (values (+ D (* 2 p)) (cdr (reverse (path-ref D)))) + (p-loop (+ p 1))))))) + + (define (lcs-fold a-proc b-proc both-proc seed a b . opt-eq) + (call-with-values + (lambda () (apply ses a b opt-eq)) + (lambda (_ s) + (let loop ((a a) + (i 0) + (b b) + (j 0) + (s s) + (seed seed)) + (if (null? s) + seed + (case (car s) + ((0) + (loop (cdr a) + (+ i 1) + (cdr b) + (+ j 1) + (cdr s) + (both-proc (car a) seed))) + ((1) + (loop a + i + (cdr b) + (+ j 1) + (cdr s) + (b-proc (car b) seed))) + (else + (loop (cdr a) + (+ i 1) + b + j + (cdr s) + (a-proc (car a) seed))))))))) + + (define (lcs a b . opt-eq) + (let ((pass (lambda (x seed) seed))) + (reverse (apply lcs-fold pass pass cons '() a b opt-eq)))) + + (define (lcs-with-positions a b . opt-eq) + (call-with-values + (lambda () (apply ses a b opt-eq)) + (lambda (_ s) + (let loop ((a a) + (i 0) + (b b) + (j 0) + (s s) + (n 0) + (r '())) + (if (null? s) + (list n (reverse r)) + (case (car s) + ((0) + (loop (cdr a) + (+ i 1) + (cdr b) + (+ j 1) + (cdr s) + (+ n 1) + (cons (list (car a) i j) r))) + ((1) + (loop a + i + (cdr b) + (+ j 1) + (cdr s) + n + r)) + (else + (loop (cdr a) + (+ i 1) + b + j + (cdr s) + n + r)))))))) + + (define (lcs-edit-list a b . opt-eq) + (call-with-values + (lambda () (apply ses a b opt-eq)) + (lambda (_ s) + (let loop ((a a) + (i 0) + (b b) + (j 0) + (s s) + (hunk '()) + (hunks '())) + (if (null? s) + (reverse + (if (null? hunk) hunks (cons (reverse hunk) hunks))) + (case (car s) + ((0) + (loop (cdr a) + (+ i 1) + (cdr b) + (+ j 1) + (cdr s) + '() + (if (null? hunk) hunks (cons (reverse hunk) hunks)))) + ((1) + (loop a + i + (cdr b) + (+ j 1) + (cdr s) + `((+ ,j ,(car b)) ,@hunk) + hunks)) + (else + (loop (cdr a) + (+ i 1) + b + j + (cdr s) + `((- ,i ,(car a)) ,@hunk) + hunks)))))))) + +) diff --git a/test.scm b/test.scm new file mode 100644 index 0000000..2b616ae --- /dev/null +++ b/test.scm @@ -0,0 +1,55 @@ +#!r6rs + +(import (rnrs) (lcs) (xunit)) + +(define-syntax assert-ses + (syntax-rules () + ((_ expected-n expected-s a b opt ...) + (call-with-values + (lambda () (ses a b opt ...)) + (lambda (n s) + (assert-= expected-n n) + (assert-equal? expected-s s)))))) + +(define-syntax assert-lcs + (syntax-rules () + ((_ expected a b opt ...) + (assert-equal? expected (lcs a b opt ...))))) + +(define-syntax assert-lcs-with-positions + (syntax-rules () + ((_ expected a b opt ...) + (assert-equal? expected (lcs-with-positions a b opt ...))))) + +(define-syntax assert-lcs-edit-list + (syntax-rules () + ((_ expected a b opt ...) + (assert-equal? expected (lcs-edit-list a b opt ...))))) + +(assert-ses 0 '() '() '()) +(assert-ses 1 '(-1) '(a) '()) +(assert-ses 1 '(1) '() '(a)) +(assert-ses 0 '(0) '(a) '(a)) +(assert-ses 7 '(1 0 0 1 1 -1 -1 -1 0 1) '(a b d e 2 f) '(0 a b 1 c f g)) + +(assert-lcs '(a b) '(a b c) '(a b a)) +(assert-lcs '(a b a d) '(a b a c d) '(a e b a d)) +(assert-lcs '(#\a #\b) '(#\a #\b #\c) '(#\A #\B #\A) char-ci=?) + +(assert-lcs-with-positions '(1 ((a 0 0))) '(a) '(a)) +(assert-lcs-with-positions '(2 ((a 1 2) (b 2 3))) '(x a b y) '(p q a b)) +(assert-lcs-with-positions '(2 ((a 1 1) (b 2 3))) '(x a b y) '(p a q b)) +(assert-lcs-with-positions '(0 ()) '(x y) '(p q)) + +(assert-lcs-edit-list + '(((- 0 "A")) + ((+ 2 "D")) + ((+ 4 "F") (- 4 "H")) + ((+ 6 "K")) + ((+ 9 "R") (+ 10 "S") (+ 11 "T") (- 8 "N") (- 9 "P")) + ) + '("A" "B" "C" "E" "H" "J" "L" "M" "N" "P") + '("B" "C" "D" "E" "F" "J" "K" "L" "M" "R" "S" "T") + ) + +(report)