diff --git a/Code/Compiler/code-generation.lisp b/Code/Compiler/code-generation.lisp index 403536a..e21fa7b 100644 --- a/Code/Compiler/code-generation.lisp +++ b/Code/Compiler/code-generation.lisp @@ -162,7 +162,7 @@ for n from 0 collect (alexandria:format-symbol nil "TRANSITION-~d" n)))) `(tagbody - (isum-case value + (csum-case value ,(layout-less *layout*) ,(layout-equal *layout*) ,@(loop for transition in (state-transitions state) diff --git a/Code/DFA-construction/derivative-classes.lisp b/Code/DFA-construction/derivative-classes.lisp index 6d10bba..295d2bd 100644 --- a/Code/DFA-construction/derivative-classes.lisp +++ b/Code/DFA-construction/derivative-classes.lisp @@ -5,7 +5,7 @@ (let ((sets (make-hash-table :test 'equal))) (loop for set1 in sets1 do (loop for set2 in sets2 - for intersection = (set-intersection set1 set2) + for intersection = (csum-intersection set1 set2) do (setf (gethash intersection sets) t))) (alexandria:hash-table-keys sets))) @@ -16,7 +16,7 @@ "Produce a list of the 'classes' (sets) of characters that compiling the regular expression would have to dispatch on." (with-hash-consing (*derivative-classes* re) (trivia:ematch re - ((literal set) (list set (set-inverse set))) + ((literal set) (list set (csum-complement set))) ((or (empty-string) (tag-set _)) (list +universal-set+)) diff --git a/Code/DFA-construction/derivative.lisp b/Code/DFA-construction/derivative.lisp index bad4356..7489771 100644 --- a/Code/DFA-construction/derivative.lisp +++ b/Code/DFA-construction/derivative.lisp @@ -8,7 +8,7 @@ (trivia:ematch re ((or (empty-string) (empty-set) (tag-set _)) (empty-set)) ((literal matching-set) - (if (set-null (set-intersection matching-set set)) + (if (csum-null-p (csum-intersection matching-set set)) (empty-set) (empty-string))) ((join r s) @@ -69,7 +69,7 @@ do (setf (gethash target variables) value)))) (map 'nil (lambda (element) - (let* ((new-re (derivative re (symbol-set (char-code element)))) + (let* ((new-re (derivative re (singleton-set (char-code element)))) (effects (remove-if (lambda (x) (equal (car x) (cdr x))) (effects re)))) (format t "~&~a~& ~:c ~a" diff --git a/Code/DFA-construction/make-dfa.lisp b/Code/DFA-construction/make-dfa.lisp index ed8da7d..bd5b4ea 100644 --- a/Code/DFA-construction/make-dfa.lisp +++ b/Code/DFA-construction/make-dfa.lisp @@ -50,8 +50,8 @@ (state-transitions last-state))) (t (setf (transition-class same-transition) - (set-union (transition-class same-transition) - class)))))) + (csum-union (transition-class same-transition) + class)))))) (trivia:defun-match re-stopped-p (re) ((alpha (empty-set) _) t) @@ -104,7 +104,7 @@ (t (let ((classes (derivative-classes expression))) (dolist (class classes) - (unless (set-null class) + (unless (csum-null-p class) (let* ((next-expression (derivative expression class)) (tags-to-set (keep-used-assignments next-expression diff --git a/Code/DFA-construction/re-types.lisp b/Code/DFA-construction/re-types.lisp index 0a8a91c..4022ff5 100644 --- a/Code/DFA-construction/re-types.lisp +++ b/Code/DFA-construction/re-types.lisp @@ -14,19 +14,18 @@ (define-rewrites (literal set) :printer ((literal set) - (print-isum set stream))) + (print-csum set stream))) (defun kleene (r) (repeat r 0 nil nil)) (trivia:defpattern kleene (r) `(repeat ,r 0 nil nil)) -(defun empty-set () - (literal (symbol-set))) +(defun empty-set () (literal +empty-set+)) (trivia:defpattern empty-set () (alexandria:with-gensyms (set) `(trivia:guard (literal ,set) - (set-null ,set)))) + (csum-null-p ,set)))) (defun universal-set () (repeat (literal +universal-set+) 0 nil nil)) @@ -94,7 +93,7 @@ ((either (empty-set) r) r) ((either r (empty-set)) r) ((either (literal s1) (literal s2)) - (literal (set-union s1 s2))) + (literal (csum-union s1 s2))) ((either r (universal-set)) (if (has-tags-p r) (trivia.next:next) ; Preserve tags then @@ -144,7 +143,7 @@ ((both (empty-string) (tag-set s)) (tag-set s)) ((both (literal s1) (literal s2)) - (literal (set-intersection s1 s2)))) + (literal (csum-intersection s1 s2)))) :printer ((both r s) (format stream "(~a) ∩ (~a)" r s))) (define-rewrites (invert r) @@ -186,7 +185,7 @@ (defun text (vector) (reduce #'join (map 'vector (lambda (e) - (literal (symbol-set (char-code e)))) + (literal (singleton-set (char-code e)))) vector) :initial-value (empty-string) :from-end t)) diff --git a/Code/DFA-construction/sets.lisp b/Code/DFA-construction/sets.lisp index 3ad7fa7..ca646e5 100644 --- a/Code/DFA-construction/sets.lisp +++ b/Code/DFA-construction/sets.lisp @@ -1,198 +1,208 @@ (in-package :one-more-re-nightmare) -;;; Gilbert Baumann's isum.lisp - -;; MAKE-TEST-FORM - -;; To support large character sets, we need an implemention of a set of -;; characters. Traditional scanner generators would at some place just -;; enumerate the alphabet \Sigma, which is not feasible with large character -;; sets like Unicode. - -;; We handle all transitions in the automaton as a set of of the codes of -;; characters, expressed by an ISUM. The representation of such a set is -;; best defined by the ISUM-MEMBER function, but here is an overview to get -;; the idea: - -;; () is the empty set -;; (a b) is the set [a, b) -;; (a b c d) is the set [a, b) u [c, d) -;; (nil) is everything -;; (nil a b) is everything but [a, b) - -;; An ISUM is a sequence of stricly monotonic increasing integers. The idea -;; is that when you sweep a pointer over the list at each element found the -;; membership in the set changes. Like (1 10 12 15). You start outside the -;; set, find 1 and say "above or equal 1 is in the set" and then find 10 and -;; say "above or equal 10 is not in the set" and so on. This way it is very -;; easy to implement Boolean operations on sets. - -(alexandria:define-constant +empty-set+ '() :test 'equal) -(alexandria:define-constant +universal-set+ '(nil) :test 'equal) - -(defun singleton-set (x) - "Returns the ISUM, that contains only /x/." - (list x (1+ x))) - -(defun symbol-range (from below) - "Returns the ISUM, that contains every code point that is in [from, below)" - (list from below)) - -;;; Boolean operation on ISUMs - -(defmacro isum-op (op A B) - "Combine the sets A and B by the Boolean operator op, which should be a -valid argument to the BOOLE function. An integer x is member of the -resulting set iff - (logbitp 0 (boole op (if (isum-member x A) 1 0) (if (isum-member x B) 1 0))) - is non-NIL. That way e.g. boole-ior denotes the union." - `(let ((A ,A) - (B ,B)) - (let* ((Ain 0) - (Bin 0) - (Cin 0) - (s nil) - (res (cons nil nil)) - (resf res)) - ;; Get rid of an initial NIL, which indicates a complemented set. - (when (and A (null (car A))) - (pop A) (setq Ain (- 1 Ain))) - (when (and B (null (car B))) - (pop B) (setq Bin (- 1 Bin))) - ;; Now traverse A and B in parallel and generate the resulting sequence. - (loop - (when (/= Cin (ldb (byte 1 0) (boole ,op Ain Bin))) - (setf resf (setf (cdr resf) (cons s nil))) - (setf Cin (- 1 Cin))) - (cond ((null A) - (cond ((null B) - (return)) - (t - (setq s (pop B)) - (setq Bin (- 1 Bin))))) - ((null B) - (setq s (pop A)) (setq Ain (- 1 Ain))) - ((< (car A) (car B)) - (setq s (pop A)) (setq Ain (- 1 Ain))) - ((< (car B) (car A)) - (setq s (pop B)) (setq Bin (- 1 Bin))) +;;;; Character sums +;;; Character sums (csums) represent a set of characters as +;;; a combination of ranges and "symbolic" classes (such as +;;; [:alpha:], [:digit:], etc). Their implementation is somewhat +;;; similar to Gilbert Baumann's "isum" integer sums, used +;;; in clex2 and earlier versions of one-more-re-nightmare. + +;;; Class sets +;; A class set is an element of ℙ(ℙ(classes)) i.e. a set of sets +;; of character classes that are part of a range. + +;; This gets normalised to P on SBCL, but it looks pretty. +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun ℙ (x) (expt 2 x))) +(alexandria:define-constant +classes+ + '((:alpha alpha-char-p) (:digit digit-char-p) (:lower lower-case-p) (:upper upper-case-p)) + :test 'equal) +(defconstant +empty-class-set+ 0) +(defconstant +class-set-bits+ (ℙ (length +classes+))) +(defconstant +universal-class-set+ (1- (ℙ (ℙ (length +classes+))))) +(defun class-set-complement (c) (logxor #xFFFF c)) + +;;; Character sets +;; A character set is a union of intersections of character ranges +;; and class sets. A character set has the form ((class-set start end) ...) +;; with the first start fixed to 0 and the last end fixed to *CODE-LIMIT*. + +(defvar *code-limit* char-code-limit) +(define-symbol-macro +empty-set+ (list (list +empty-class-set+ 0 char-code-limit))) +(define-symbol-macro +universal-set+ (list (list +universal-class-set+ 0 char-code-limit))) +(defun range (start limit) + "The character set for [START, LIMIT)" + (list (list +empty-class-set+ 0 start) + (list +universal-class-set+ start limit) + (list +empty-class-set+ limit *code-limit*))) +(defun singleton-set (x) (range x (1+ x))) +(defun class-set (class) + (let ((p (position class +classes+ :key #'first))) + (when (null p) (error "No class named ~S" class)) + (loop for i below +class-set-bits+ + when (logbitp p i) + sum (ash 1 i) into class-set + finally (return (list (list class-set 0 *code-limit*)))))) +(defun remove-empty-ranges (csum) + (remove 0 csum :key #'first)) + +(defun print-csum (csum stream) + (cond + ((equal csum +empty-set+) (write-string "[]" stream)) + ((equal csum +universal-set+) (write-string "Σ" stream)) + (t (let ((csum (remove-empty-ranges csum))) + (if (and (alexandria:length= 1 csum) (= (first (first csum)) +universal-class-set+)) + (if (= (1+ (second (first csum))) (third (first csum))) + (write-char (code-char (second (first csum))) stream) + (format stream "[~c-~c]" + (code-char (second (first csum))) + (code-char (1- (third (first csum)))))) + (format stream "[~s]" csum)))))) + +;;; Operations on character sets + +(defun coalesce-csum (cset) + "Coalesce adjacent ranges with the same class set in a character set." + (loop until (null cset) + collect (let* ((f (first cset)) + (l (member (first f) (rest cset) :key #'first :test #'/=))) + (setf cset l) + (if (null l) + (list (first f) (second f) *code-limit*) + (list (first f) (second f) (second (first l))))))) + +;; A set table is a list of lists of values, and a list of ranges. +;; We ensure that ranges line up by only storing the ranges +;; once. +(defun align-csums (csets) + "Align the ranges in a list of character sets, returning a list of lists of values, and a list of ranges." + (labels ((align (csets values ranges start) + (if (null (first csets)) + (values (reverse values) (reverse ranges)) + (let ((end (reduce #'min csets :key (lambda (c) (third (first c)))))) + ;; Take a step. + (align + (loop for c in csets + collect (if (= (third (first c)) end) (rest c) c)) + (cons (loop for c in csets collect (first (first c))) values) + (cons (list start end) ranges) + end))))) + (align csets '() '() 0))) + +(defmacro define-csum-op (name class-op arguments) + `(defun ,name ,arguments + (multiple-value-bind (values ranges) + (align-csums (list ,@arguments)) + (coalesce-csum + (mapcar (lambda (v r) (cons (apply #',class-op v) r)) + values ranges))))) + +(define-csum-op csum-union logior (a b)) +(define-csum-op csum-intersection logand (a b)) +(define-csum-op csum-complement class-set-complement (a)) +(define-csum-op csum-difference logandc2 (a b)) +(defun csum-null-p (csum) (equal csum +empty-set+)) + +;;; Character set dispatch +;; This could have element type (UNSIGNED-BYTE 4) but that'd take +;; more effort to decode; so we go with bytes. +;; Rhetorical question: Are there any other ways to compress this +;; table? +(alexandria:define-constant +character-class-table+ + (let ((table + (make-array char-code-limit + :element-type '(unsigned-byte 8) + :initial-element 0))) + (dotimes (i char-code-limit table) + (let ((character (code-char i))) + (unless (null character) + (loop for (nil predicate) in +classes+ + for x = 1 then (ash x 1) + do (when (funcall predicate (code-char i)) + (setf (aref table i) (logior x (aref table i))))))))) + :test 'equalp) +(declaim (inline lookup-class)) +(defun lookup-class (code) + (aref +character-class-table+ code)) + +(defmacro csum-case (var less-than equal &body cases) + (labels ((dispatch-classes (values) + (if (alexandria:length= 1 values) + `(progn ,@(cdar values)) + (alexandria:with-gensyms (result) + `(let ((,result (lookup-class ,var))) + (cond + ,@(loop for (class-set . body) in values + collect `(,(if (= +universal-class-set+ class-set) + 't + `(logbitp ,result ,class-set)) + ,@body))))))) + (singleton-p (range) (= (1+ (first range)) (second range))) + (middle (list) (butlast (rest list))) + (dispatch-csums (values ranges) + (cond + ((alexandria:length= 1 ranges) + ;; There's only one more range, so dispatch on classes. + (dispatch-classes (first values))) + ;; Detect singleton sets to use = on, e.g. [^ab], a and b. + ((and (equal (first values) (first (last values))) + (every #'singleton-p (middle ranges))) + `(cond + ,@(loop for r in (middle ranges) + for v in (middle values) + collect `((,equal ,var ,(first r)) ,(dispatch-classes v))) + (t ,(dispatch-classes (first values))))) + ;; Bisect and continue dispatching. (t - (setq s (pop A)) (setq Ain (- 1 Ain)) - (pop B) (setq Bin (- 1 Bin))))) - (cdr res)))) - -;; Now we could define interesting set operations in terms of ISUM-OP. - -(defun set-union (a b) (isum-op boole-ior a b)) -(defun set-intersection (a b) (isum-op boole-and a b)) -(defun symbol-set-difference (a b) (isum-op boole-andc2 a b)) -(defun set-inverse (a) (isum-op boole-c1 a nil)) -(defun set-null (isum) (null isum)) - -(defun symbol-set (&rest symbols) - (reduce #'set-union symbols :key #'singleton-set :initial-value +empty-set+)) - -(trivia:defpattern single-isum-case (a next) - (alexandria:with-gensyms (succ) - `(trivia:guard (list* ,a ,succ ,next) - (= ,succ (1+ ,a))))) - -(defun fold-or (form next) - "Manually constant fold out (OR A NIL) to A. The compiler can do this, but generated code looks nicer with folding." - (if (eql next 'nil) - form - `(or ,form ,next))) - -(defun make-test-form (isum variable less-or-equal equal) - (trivia:ematch isum - ('() 'nil) - ((list* nil next) - (trivia:match (make-test-form next variable less-or-equal equal) - ('nil 't) - (form `(not ,form)))) - ((single-isum-case a next) - (fold-or `(,equal ,a ,variable) - (make-test-form next variable less-or-equal equal))) - ((list* low high next) - (fold-or `(,less-or-equal ,low ,variable ,(1- high)) - (make-test-form next variable less-or-equal equal))))) - -(defun print-isum (isum stream) - (labels ((print-union (rest) - (trivia:ematch rest - ('()) - ((single-isum-case a next) - (write-char (code-char a) stream) - (print-union next)) - ((list* a b next) - (format stream "~c-~c" (code-char a) (code-char (1- b))) - (print-union next))))) - (trivia:ematch isum - ('() (write-string "ø" stream)) - ((single-isum-case a 'nil) - (write-char (code-char a) stream)) - ((list* 'nil (single-isum-case a 'nil)) - (format stream "[¬~c]" (code-char a))) - ((list* nil rest) - (write-string "[¬" stream) - (print-union rest) - (write-string "]" stream)) - (_ - (write-string "[" stream) - (print-union isum) - (write-string "]" stream))))) - -(defmacro isum-case (var less-than equal &body clauses) - ;; A variation on the theme, actually this is of more general use, since - ;; Common Lisp implementations lack a jump table based implementation of - ;; CASE. - (let* ((last-out nil) - (res '()) - (default (find nil clauses :key #'caar)) - (clauses (remove default clauses)) - (clauses (mapcar (lambda (clause) - (cond ((integerp (car clause)) - (cons (list (car clause) (1+ (car clause))) - (cdr clause))) - (t clause))) - clauses))) - (assert (every #'evenp (mapcar #'length (mapcar #'car clauses))) - () - "Multiple negative ISUMs in dispatch?") - (loop - (when (every #'null (mapcar #'car clauses)) - (return)) - (let ((pivot (reduce #'min (remove nil (mapcar #'caar clauses))))) - (setf clauses (mapcar (lambda (y) - (if (eql (caar y) pivot) (cons (cdar y) (cdr y)) y)) - clauses)) - (let ((out (or (find-if (lambda (y) (oddp (length (car y)))) clauses) - default))) - (unless (equal (cdr out) last-out) - (push pivot res) - (push (if (null (cddr out)) (cadr out) `(progn ,@(cdr out))) - res) - (setf last-out (cdr out)))))) - (labels ((cons-if (cond cons alt) - (cond ((null cons) `(unless ,cond ,alt)) - ((null alt) `(when ,cond ,cons)) - (t `(if ,cond ,cons ,alt)))) - (cons-progn (x) - (if (null (cdr x)) - (car x) - `(progn ,@x))) - (foo (xs default) - (cond ((null xs) default) - ;; Check for a singleton set. - ((and (= 4 (length xs)) - (= (1+ (first xs)) (third xs)) - (eq (fourth xs) default)) - (cons-if `(,equal ,var ,(first xs)) (second xs) (fourth xs))) - ((= 2 (length xs)) - (cons-if `(,less-than ,var ,(first xs)) default (second xs))) - (t - (let ((p (* 2 (floor (length xs) 4)))) - (cons-if `(,less-than ,var ,(elt xs p)) - (foo (subseq xs 0 p) default) - (foo (subseq xs (+ 2 p)) (elt xs (1+ p))))))))) - (foo (reverse res) (cons-progn (cdr default)))))) + (let* ((mid (floor (length values) 2))) + `(if (,less-than ,var ,(first (nth mid ranges))) + ,(dispatch-csums (subseq values 0 mid) + (subseq ranges 0 mid)) + ,(dispatch-csums (subseq values mid) + (subseq ranges mid)))))))) + (multiple-value-bind (values ranges) + (align-csums + (loop for (csum . body) in cases + collect (loop for (cl s e) in csum + collect `((,cl . ,body) ,s ,e)))) + ;; Remove unreachable values from the set table. + (let ((values (mapcar #'remove-empty-ranges values))) + (dispatch-csums values ranges))))) + +(defun csum-has-classes-p (csum) + "Does a character sum use any non-trivial character classes?" + (loop for (c s e) in csum + thereis (and (/= c +empty-class-set+) (/= c +universal-class-set+)))) + +(defun make-test-form (csum variable) + "Compute a form which tests if VARIABLE is an element of CSUM, using OR, <= and =" + (cond + ((equal csum +empty-set+) 'nil) + ((equal csum +universal-set+) 't) + (t + `(or ,@(loop for (c s e) in csum + unless (= c +empty-class-set+) + do (assert (= c +universal-class-set+)) + and collect (if (= (1+ s) e) + `(= ,s ,variable) + `(<= ,s ,variable ,(1- e)))))))) + +;;; Named sets + +(defun named-range (name) + (labels ((∪ (&rest rest) (reduce #'csum-union rest)) + (d (a b) (csum-intersection a (csum-complement b))) + (s (&rest rest) (reduce #'csum-union rest :key (alexandria:compose #'singleton-set #'char-code)))) + (alexandria:eswitch (name :test 'string=) + ("alpha" (class-set :alpha)) + ("alnum" (∪ (class-set :alpha) (class-set :digit))) + ("blank" (s #\Space #\Tab)) + ("cntrl" (∪ (range 0 32) (singleton-set 127))) + ("digit" (class-set :digit)) + ("graph" (csum-complement (∪ (named-range "cntrl") (s #\Space)))) + ("lower" (class-set :lower)) + ("print" (∪ (named-range "graph") (s #\Space))) + ("punct" (d (named-range "graph") (∪ (class-set :alpha) (class-set :digit)))) + ("space" (∪ (singleton-set 11) (s #\Space #\Return #\Newline #\Tab))) + ("upper" (class-set :upper)) + ("xdigit" (∪ (class-set "digit") (s #\A #\B #\C #\D #\E #\F #\a #\b #\c #\d #\e #\f)))))) diff --git a/Code/Interface/code-cache.lisp b/Code/Interface/code-cache.lisp index 147a6f1..013465f 100644 --- a/Code/Interface/code-cache.lisp +++ b/Code/Interface/code-cache.lisp @@ -57,7 +57,6 @@ *code-cache*) (cons function groups))))) -(declaim (ftype (function (string) t) string-type-of)) (defun string-type-of (string) (loop for type in *string-types* when (typep string type) diff --git a/Code/Interface/syntax.lisp b/Code/Interface/syntax.lisp index f5b56d0..31ca97b 100644 --- a/Code/Interface/syntax.lisp +++ b/Code/Interface/syntax.lisp @@ -99,8 +99,8 @@ (declare (ignore comma)) (assert (or (null max) (null min) (> max min)) (max min) - "The maximum repetition count should not be less than the minimum ~ -number; the maximum of ~d is less than the minimum of ~d." max min) + "The maximum repetition count ~d cannot be less than the minimum count ~d." + max min) (cons (or min 0) max))) (esrap:defrule repetition @@ -129,29 +129,6 @@ number; the maximum of ~d is less than the minimum of ~d." max min) (:lambda (list) (parse-integer (format nil "~{~A~}" list)))) -(esrap:defrule character-range-character - (not (or (or "-" "]" "[" "\\")))) - -(esrap:defrule character-range-range - (and character-range-character "-" character-range-character) - (:destructure (low dash high) - (declare (ignore dash)) - (symbol-range (char-code low) (1+ (char-code high))))) - -(esrap:defrule character-range-single - (or character-range-character escaped-character) - (:lambda (character) - (singleton-set (char-code character)))) - -(esrap:defrule character-range - (and "[" (esrap:? "¬") - (* (or character-range-range character-range-single)) - "]") - (:destructure (left invert ranges right) - (declare (ignore left right)) - (let ((sum (reduce #'set-union ranges - :initial-value +empty-set+))) - (literal (if invert (set-inverse sum) sum))))) (esrap:defrule escaped-character (and #\\ character) @@ -164,8 +141,50 @@ number; the maximum of ~d is less than the minimum of ~d." max min) (esrap:defrule literal (or escaped-character (not special-character)) - (:lambda (character) (literal (symbol-set (char-code character))))) + (:lambda (character) (literal (singleton-set (char-code character))))) (esrap:defrule empty-string "" (:constant (empty-string))) + +;;; Character ranges +(esrap:defrule character-range-escaped-constituent + (and #\\ (or #\[ #\] #\- #\^ #\¬)) + (:destructure (backslash char) + (declare (ignore backslash)) + (char char 0))) + +(esrap:defrule character-range-name-constituent (not (or ":" "[" "]"))) + +(esrap:defrule character-range-named + (and "[:" (+ character-range-name-constituent) ":]") + (:destructure (open characters close) + (declare (ignore open close)) + (named-range (coerce characters 'string)))) + +(esrap:defrule character-range-character + (or character-range-escaped-constituent (not (or "-" "]" "[" "\\")))) + +(esrap:defrule character-range-single + character-range-character + (:lambda (character) + (singleton-set (char-code character)))) + +(esrap:defrule character-range-range + (and character-range-character "-" character-range-character) + (:destructure (low dash high) + (declare (ignore dash)) + (range (char-code low) (1+ (char-code high))))) + +(esrap:defrule character-range + (and "[" + (esrap:? (or "^" "¬")) + (* (or character-range-named + character-range-range + character-range-single)) + "]") + (:destructure (left invert ranges right) + (declare (ignore left right)) + (let ((sum (reduce #'csum-union ranges + :initial-value +empty-set+))) + (literal (if invert (csum-complement sum) sum))))) diff --git a/Code/SIMD/code-generation.lisp b/Code/SIMD/code-generation.lisp index e844420..48ead0d 100644 --- a/Code/SIMD/code-generation.lisp +++ b/Code/SIMD/code-generation.lisp @@ -8,7 +8,7 @@ (make-symbol (format nil "BROADCAST-~d" value))))) (defun test-from-isum (variable isum) - (translate-scalar-code variable (make-test-form isum variable '<= '=))) + (translate-scalar-code variable (make-test-form isum variable))) (defun code-from-prefix (prefix) (assert (not (null prefix)) () "Why /even bother/ with a zero-length prefix?") diff --git a/Code/SIMD/loop.lisp b/Code/SIMD/loop.lisp index 9f29d08..0f3aa21 100644 --- a/Code/SIMD/loop.lisp +++ b/Code/SIMD/loop.lisp @@ -29,13 +29,14 @@ (re-empty-p next-expression) (not (eq next-state previous-state)) (not (assignments-idempotent-p - (transition-tags-to-set transition)))) + (transition-tags-to-set transition))) + (csum-has-classes-p (transition-class transition))) (return-from transition-code (call-next-method))) ;; Try to skip to the first character after for which this transition doesn't apply. (let* ((vector-length (/ one-more-re-nightmare.vector-primops:+v-length+ *bits*))) (trivia:ematch (test-from-isum 'loaded - (set-inverse (transition-class transition))) + (csum-complement (transition-class transition))) (:never (call-next-method)) (:always (error "Found a transition that is never taken.")) (test diff --git a/Code/SIMD/new-sbcl-x86-64.lisp b/Code/SIMD/new-sbcl-x86-64.lisp deleted file mode 100644 index d0affe3..0000000 --- a/Code/SIMD/new-sbcl-x86-64.lisp +++ /dev/null @@ -1,264 +0,0 @@ -(in-package :one-more-re-nightmare) - -(defun find-op (name) - (or (find-symbol (format nil "V-~a~d" name *bits*) - ':one-more-re-nightmare.vector-primops) - (error "No primop named ~a" name))) - -;; AVX2 is a pile of stink and only has instructions for signed -;; comparisons. So, in order to fake an unsigned comparison, we -;; subtract #x80 from everything. - -(defun find-8-bit-broadcast (n) - (find-broadcast (mod (- n #x80) #x100))) -(defvar *swizzled-name*) -(defun swizzle-8-bits () - (or *swizzled-name* - (setf *swizzled-name* (make-symbol "SWIZZLE")))) - -(defun translate-scalar-code (variable code) - (let* ((*swizzled-name* nil) - (translated (%translate-scalar-code code))) - (if (null *swizzled-name*) - translated - `(let ((,*swizzled-name* - (one-more-re-nightmare.vector-primops:v8- ,variable ,(find-broadcast #x80)))) - ,translated)))) - -(trivia:defun-ematch %translate-scalar-code (code) - "Translate some 'scalar' code generated by MAKE-TEST-FORM into a vectorised computation." - ('t :always) - ('nil :never) - ;; All the Boolean operators just map over their arguments. - ((list 'not thing) - `(,(find-op "NOT") - ,(%translate-scalar-code thing))) - ((list* 'or things) - `(,(find-op "OR") - ,@(mapcar #'%translate-scalar-code things))) - ;; Ditto for = really. - ((list '= value variable) - ;; Note that = works the same if it's signed or not; it's only > - ;; that requires more effort - `(,(ecase *bits* - (32 'one-more-re-nightmare.vector-primops:v32=) - (8 'one-more-re-nightmare.vector-primops:v8=)) - ,(find-broadcast value) ,variable)) - ;; Generating good code for <= is tricky though. Whoever designed - ;; SSE2 and AVX2 decided that just having = and > were good enough, - ;; so we need an efficient implementation of ≤ from those. - ((list '<= 0 value high) - ;; No lower bounds here. Note that X ≤ N ⇔ N + 1 > X - (ecase *bits* - (32 `(one-more-re-nightmare.vector-primops:v32> ,(find-broadcast (1+ high)) ,value)) - (8 `(one-more-re-nightmare.vector-primops:v8> ,(find-8-bit-broadcast (1+ high)) ,value)))) - ((list '<= low value high) - ;; Similarly, N ≤ X ⇔ X > N - 1 - (ecase *bits* - (32 - `(one-more-re-nightmare.vector-primops:v-and32 - ;; Similarly, N ≤ X ⇔ X > N - 1 - (one-more-re-nightmare.vector-primops:v32> ,value ,(find-broadcast (1- low))) - (one-more-re-nightmare.vector-primops:v32> ,(find-broadcast (1+ high)) ,value))) - (8 - `(one-more-re-nightmare.vector-primops:v-and8 - (one-more-re-nightmare.vector-primops:v8> ,(swizzle-8-bits) - ,(find-8-bit-broadcast (1- low))) - (one-more-re-nightmare.vector-primops:v8> ,(find-8-bit-broadcast (1+ high)) - ,(swizzle-8-bits))))))) - -(defmacro define-boring-vop (name args result &body generator) - `(progn - (sb-vm::define-vop (,name) - (:translate ,name) - (:policy :fast-safe) - (:args ,@(loop for (name nil . rest) in args - collect (cons name rest))) - (:arg-types ,@(mapcar #'second args)) - (:results (,(first result) ,@(rest (rest result)))) - (:result-types ,(second result)) - (:generator 0 ,@generator)))) - -(defmacro define-op (name bits args instruction-name) - (let ((primitive-type (ecase bits - (8 'sb-vm::simd-pack-256-ub8) - (32 'sb-vm::simd-pack-256-ub32)))) - `(progn - (sb-c:defknown ,name - ,(loop for nil in args collect `(sb-ext:simd-pack-256 (unsigned-byte ,bits))) - (sb-ext:simd-pack-256 (unsigned-byte ,bits)) - (sb-c:foldable sb-c:flushable sb-c:movable) - :overwrite-fndb-silently t) - (define-boring-vop ,name - ,(loop for arg in args - collect `(,arg ,primitive-type :scs (sb-vm::int-avx2-reg))) - (result ,primitive-type :scs (sb-vm::int-avx2-reg)) - (sb-vm::inst ,instruction-name result ,@args))))) - -(defun one-more-re-nightmare.vector-primops:all-of (variables) - (reduce (lambda (a b) `(,(find-op "AND") ,a ,b)) - variables)) - -(defconstant one-more-re-nightmare.vector-primops:+v-length+ 256) - -(in-package :sb-vm) - -;;;; Boolean operations - -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v-and8 8 (a b) vpand) -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v-and32 32 (a b) vpand) - -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v-or8 8 (a b) vpor) -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v-or32 32 (a b) vpor) - -(macrolet ((frob (name bits arg-type) - `(progn - (defknown ,name - ((simd-pack-256 (unsigned-byte ,bits))) - (simd-pack-256 (unsigned-byte ,bits)) - (foldable flushable movable) - :overwrite-fndb-silently t) - (define-vop (,name) - (:translate ,name) - (:policy :fast-safe) - (:args (value :scs (int-avx2-reg))) - (:arg-types ,arg-type) - (:results (result :scs (int-avx2-reg))) - (:result-types ,arg-type) - (:temporary (:sc int-avx2-reg) ones) - (:generator 0 - (inst vpcmpeqd ones ones ones) ; get all 1s - (inst vpxor result ones value)))))) ; 1111... (+) A = ¬A - (frob one-more-re-nightmare.vector-primops:v-not8 8 simd-pack-256-ub8) - (frob one-more-re-nightmare.vector-primops:v-not32 32 simd-pack-256-ub32)) - -;;;; Comparisons - -;; This is a signed comparison, but as there are fewer than 2³¹ -;; Unicode characters, no one needs to know that. -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v32> 32 (a b) vpcmpgtd) - -;; We do need to know that this is a signed comparison, since we do -;; want to target (UNSIGNED-BYTE 8) too, and we handle it above. -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v8> 8 (a b) vpcmpgtb) - -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v32= 32 (a b) vpcmpeqd) - -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v8= 8 (a b) vpcmpeqb) - -(one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v8- 8 (a b) vpsubb) - -;;;; Broadcasts - -(defknown one-more-re-nightmare.vector-primops:v-broadcast32 - ((unsigned-byte 32)) - (simd-pack-256 (unsigned-byte 32)) - ;; Not constant folding, because loading a folded broadcast is - ;; slower than reproducing it again. - (flushable movable) - :overwrite-fndb-silently t) - -(one-more-re-nightmare::define-boring-vop - one-more-re-nightmare.vector-primops:v-broadcast32 - ((integer unsigned-num :scs (unsigned-reg))) - (result simd-pack-256-ub32 :scs (int-avx2-reg)) - (inst movq result integer) - (inst vpbroadcastd result result)) - -(defknown one-more-re-nightmare.vector-primops:v-broadcast8 - ((unsigned-byte 8)) - (simd-pack-256 (unsigned-byte 8)) - (flushable movable) - :overwrite-fndb-silently t) - -(one-more-re-nightmare::define-boring-vop - one-more-re-nightmare.vector-primops:v-broadcast8 - ((integer unsigned-num :scs (unsigned-reg))) - (result simd-pack-256-ub8 :scs (int-avx2-reg)) - (inst movq result integer) - (inst vpbroadcastb result result)) - -;;;; Move mask - -(defknown one-more-re-nightmare.vector-primops:v-movemask32 - ((simd-pack-256 (unsigned-byte 32))) - (unsigned-byte 8) - (flushable movable) - :overwrite-fndb-silently t) - -(one-more-re-nightmare::define-boring-vop - one-more-re-nightmare.vector-primops:v-movemask32 - ((pack simd-pack-256-ub32 :scs (int-avx2-reg))) - (result unsigned-num :scs (unsigned-reg)) - (inst vmovmskps result pack)) - -(defknown one-more-re-nightmare.vector-primops:v-movemask8 - ((simd-pack-256 (unsigned-byte 8))) - (unsigned-byte 32) - (flushable movable) - :overwrite-fndb-silently t) - -(one-more-re-nightmare::define-boring-vop - one-more-re-nightmare.vector-primops:v-movemask8 - ((pack simd-pack-256-ub8 :scs (int-avx2-reg))) - (result unsigned-num :scs (unsigned-reg)) - (inst vpmovmskb result pack)) - -;;;; Load - -(defknown one-more-re-nightmare.vector-primops:v-load32 - ((simple-array character 1) sb-int:index) - (simd-pack-256 (unsigned-byte 32)) - (foldable flushable movable) - :overwrite-fndb-silently t) - -(one-more-re-nightmare::define-boring-vop - one-more-re-nightmare.vector-primops:v-load32 - ((string simple-character-string :scs (descriptor-reg)) - (index tagged-num :scs (any-reg))) - (result simd-pack-256-ub32 :scs (int-avx2-reg)) - (inst vmovdqu result - (ea (- (* vector-data-offset n-word-bytes) - other-pointer-lowtag) - ;; Characters are 4 bytes, fixnums have a trailing 0 so - ;; just multiply by 2. - string index 2))) - -(defknown one-more-re-nightmare.vector-primops:v-load8 - ((simple-array base-char 1) sb-int:index) - (simd-pack-256 (unsigned-byte 8)) - (foldable flushable movable) - :overwrite-fndb-silently t) - -(one-more-re-nightmare::define-boring-vop - one-more-re-nightmare.vector-primops:v-load8 - ((string simple-base-string :scs (descriptor-reg)) - (index unsigned-num :scs (unsigned-reg))) - (result simd-pack-256-ub8 :scs (int-avx2-reg)) - (inst vmovdqu result - (ea (- (* vector-data-offset n-word-bytes) - other-pointer-lowtag) - string index 1))) - -;;;; Find first set - -(defknown one-more-re-nightmare.vector-primops:find-first-set - ((unsigned-byte 64)) - (mod 64) - (foldable flushable movable) - :overwrite-fndb-silently t) - -(one-more-re-nightmare::define-boring-vop - one-more-re-nightmare.vector-primops:find-first-set - ((integer unsigned-num :scs (unsigned-reg))) - (result unsigned-num :scs (unsigned-reg)) - (inst bsf result integer)) diff --git a/Code/SIMD/one-more-re-nightmare-simd.asd b/Code/SIMD/one-more-re-nightmare-simd.asd index 630619c..8562d84 100644 --- a/Code/SIMD/one-more-re-nightmare-simd.asd +++ b/Code/SIMD/one-more-re-nightmare-simd.asd @@ -6,10 +6,7 @@ :serial t :components ((:file "package") (:file "code-generation") - #+(and sbcl x86-64) - #.(if (find-symbol "SIMD-PACK-256-UB8" "SB-VM") - '(:file "new-sbcl-x86-64") - '(:file "sbcl-x86-64")) + (:file "sbcl-x86-64") #-(and sbcl x86-64) (:file "i-got-nothing") (:file "prefix") diff --git a/Code/SIMD/prefix.lisp b/Code/SIMD/prefix.lisp index 01237c4..8d9d475 100644 --- a/Code/SIMD/prefix.lisp +++ b/Code/SIMD/prefix.lisp @@ -5,7 +5,9 @@ ((empty-set) (values '() (empty-string))) ((literal set) - (values `((:literal ,set)) (empty-string))) + (if (csum-has-classes-p set) + (values '() (empty-string)) + (values `((:literal ,set)) (empty-string)))) ((tag-set tags) (values `((:tags ,tags)) (empty-string))) ((join r s) diff --git a/Code/SIMD/sbcl-x86-64.lisp b/Code/SIMD/sbcl-x86-64.lisp index 92f4773..d2ab61b 100644 --- a/Code/SIMD/sbcl-x86-64.lisp +++ b/Code/SIMD/sbcl-x86-64.lisp @@ -31,11 +31,11 @@ ('nil :never) ;; All the Boolean operators just map over their arguments. ((list 'not thing) - `(one-more-re-nightmare.vector-primops:v-not + `(,(find-op "NOT") ,(%translate-scalar-code thing))) ((list* 'or things) - `(one-more-re-nightmare.vector-primops:v-or - ,@(mapcar #'%translate-scalar-code things))) + (reduce (lambda (a b) `(,(find-op "OR") ,a ,b)) + (mapcar #'%translate-scalar-code things))) ;; Ditto for = really. ((list '= value variable) ;; Note that = works the same if it's signed or not; it's only > @@ -56,12 +56,12 @@ ;; Similarly, N ≤ X ⇔ X > N - 1 (ecase *bits* (32 - `(one-more-re-nightmare.vector-primops:v-and + `(one-more-re-nightmare.vector-primops:v-and32 ;; Similarly, N ≤ X ⇔ X > N - 1 (one-more-re-nightmare.vector-primops:v32> ,value ,(find-broadcast (1- low))) (one-more-re-nightmare.vector-primops:v32> ,(find-broadcast (1+ high)) ,value))) (8 - `(one-more-re-nightmare.vector-primops:v-and + `(one-more-re-nightmare.vector-primops:v-and8 (one-more-re-nightmare.vector-primops:v8> ,(swizzle-8-bits) ,(find-8-bit-broadcast (1- low))) (one-more-re-nightmare.vector-primops:v8> ,(find-8-bit-broadcast (1+ high)) @@ -79,21 +79,24 @@ (:result-types ,(second result)) (:generator 0 ,@generator)))) -(defmacro define-op (name args instruction-name) - `(progn - (sb-c:defknown ,name - ,(loop for nil in args collect '(sb-ext:simd-pack-256 integer)) - (sb-ext:simd-pack-256 integer) - (sb-c:foldable sb-c:flushable sb-c:movable) - :overwrite-fndb-silently t) - (define-boring-vop ,name - ,(loop for arg in args - collect `(,arg sb-vm::simd-pack-256-int :scs (sb-vm::int-avx2-reg))) - (result sb-vm::simd-pack-256-int :scs (sb-vm::int-avx2-reg)) - (sb-vm::inst ,instruction-name result ,@args)))) +(defmacro define-op (name bits args instruction-name) + (let ((primitive-type (ecase bits + (8 'sb-vm::simd-pack-256-ub8) + (32 'sb-vm::simd-pack-256-ub32)))) + `(progn + (sb-c:defknown ,name + ,(loop for nil in args collect `(sb-ext:simd-pack-256 (unsigned-byte ,bits))) + (sb-ext:simd-pack-256 (unsigned-byte ,bits)) + (sb-c:foldable sb-c:flushable sb-c:movable) + :overwrite-fndb-silently t) + (define-boring-vop ,name + ,(loop for arg in args + collect `(,arg ,primitive-type :scs (sb-vm::int-avx2-reg))) + (result ,primitive-type :scs (sb-vm::int-avx2-reg)) + (sb-vm::inst ,instruction-name result ,@args))))) (defun one-more-re-nightmare.vector-primops:all-of (variables) - (reduce (lambda (a b) `(one-more-re-nightmare.vector-primops:v-and ,a ,b)) + (reduce (lambda (a b) `(,(find-op "AND") ,a ,b)) variables)) (defconstant one-more-re-nightmare.vector-primops:+v-length+ 256) @@ -103,55 +106,62 @@ ;;;; Boolean operations (one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v-and (a b) vpand) - + one-more-re-nightmare.vector-primops:v-and8 8 (a b) vpand) (one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v-or (a b) vpor) + one-more-re-nightmare.vector-primops:v-and32 32 (a b) vpand) -(defknown one-more-re-nightmare.vector-primops:v-not - ((simd-pack-256 integer)) - (simd-pack-256 integer) - (foldable flushable movable) - :overwrite-fndb-silently t) - -(define-vop (one-more-re-nightmare.vector-primops:v-not) - (:translate one-more-re-nightmare.vector-primops:v-not) - (:policy :fast-safe) - (:args (value :scs (int-avx2-reg))) - (:arg-types simd-pack-256-int) - (:results (result :scs (int-avx2-reg))) - (:result-types simd-pack-256-int) - (:temporary (:sc int-avx2-reg) ones) - (:generator 0 - (inst vpcmpeqd ones ones ones) ; get all 1s - (inst vpxor result ones value))) ; 1111... (+) A = ¬A +(one-more-re-nightmare::define-op + one-more-re-nightmare.vector-primops:v-or8 8 (a b) vpor) +(one-more-re-nightmare::define-op + one-more-re-nightmare.vector-primops:v-or32 32 (a b) vpor) + +(macrolet ((frob (name bits arg-type) + `(progn + (defknown ,name + ((simd-pack-256 (unsigned-byte ,bits))) + (simd-pack-256 (unsigned-byte ,bits)) + (foldable flushable movable) + :overwrite-fndb-silently t) + (define-vop (,name) + (:translate ,name) + (:policy :fast-safe) + (:args (value :scs (int-avx2-reg))) + (:arg-types ,arg-type) + (:results (result :scs (int-avx2-reg))) + (:result-types ,arg-type) + (:temporary (:sc int-avx2-reg) ones) + (:generator 0 + (inst vpcmpeqd ones ones ones) ; get all 1s + (inst vpxor result ones value)))))) ; 1111... (+) A = ¬A + (frob one-more-re-nightmare.vector-primops:v-not8 8 simd-pack-256-ub8) + (frob one-more-re-nightmare.vector-primops:v-not32 32 simd-pack-256-ub32)) ;;;; Comparisons ;; This is a signed comparison, but as there are fewer than 2³¹ ;; Unicode characters, no one needs to know that. (one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v32> (a b) vpcmpgtd) + one-more-re-nightmare.vector-primops:v32> 32 (a b) vpcmpgtd) ;; We do need to know that this is a signed comparison, since we do ;; want to target (UNSIGNED-BYTE 8) too, and we handle it above. (one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v8> (a b) vpcmpgtb) + one-more-re-nightmare.vector-primops:v8> 8 (a b) vpcmpgtb) (one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v32= (a b) vpcmpeqd) + one-more-re-nightmare.vector-primops:v32= 32 (a b) vpcmpeqd) (one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v8= (a b) vpcmpeqb) + one-more-re-nightmare.vector-primops:v8= 8 (a b) vpcmpeqb) (one-more-re-nightmare::define-op - one-more-re-nightmare.vector-primops:v8- (a b) vpsubb) + one-more-re-nightmare.vector-primops:v8- 8 (a b) vpsubb) ;;;; Broadcasts (defknown one-more-re-nightmare.vector-primops:v-broadcast32 ((unsigned-byte 32)) - (simd-pack-256 integer) + (simd-pack-256 (unsigned-byte 32)) ;; Not constant folding, because loading a folded broadcast is ;; slower than reproducing it again. (flushable movable) @@ -160,46 +170,46 @@ (one-more-re-nightmare::define-boring-vop one-more-re-nightmare.vector-primops:v-broadcast32 ((integer unsigned-num :scs (unsigned-reg))) - (result simd-pack-256-int :scs (int-avx2-reg)) + (result simd-pack-256-ub32 :scs (int-avx2-reg)) (inst movq result integer) (inst vpbroadcastd result result)) (defknown one-more-re-nightmare.vector-primops:v-broadcast8 ((unsigned-byte 8)) - (simd-pack-256 integer) + (simd-pack-256 (unsigned-byte 8)) (flushable movable) :overwrite-fndb-silently t) (one-more-re-nightmare::define-boring-vop one-more-re-nightmare.vector-primops:v-broadcast8 ((integer unsigned-num :scs (unsigned-reg))) - (result simd-pack-256-int :scs (int-avx2-reg)) + (result simd-pack-256-ub8 :scs (int-avx2-reg)) (inst movq result integer) (inst vpbroadcastb result result)) ;;;; Move mask (defknown one-more-re-nightmare.vector-primops:v-movemask32 - ((simd-pack-256 integer)) + ((simd-pack-256 (unsigned-byte 32))) (unsigned-byte 8) (flushable movable) :overwrite-fndb-silently t) (one-more-re-nightmare::define-boring-vop one-more-re-nightmare.vector-primops:v-movemask32 - ((pack simd-pack-256-int :scs (int-avx2-reg))) + ((pack simd-pack-256-ub32 :scs (int-avx2-reg))) (result unsigned-num :scs (unsigned-reg)) (inst vmovmskps result pack)) (defknown one-more-re-nightmare.vector-primops:v-movemask8 - ((simd-pack-256 integer)) + ((simd-pack-256 (unsigned-byte 8))) (unsigned-byte 32) (flushable movable) :overwrite-fndb-silently t) (one-more-re-nightmare::define-boring-vop one-more-re-nightmare.vector-primops:v-movemask8 - ((pack simd-pack-256-int :scs (int-avx2-reg))) + ((pack simd-pack-256-ub8 :scs (int-avx2-reg))) (result unsigned-num :scs (unsigned-reg)) (inst vpmovmskb result pack)) @@ -207,7 +217,7 @@ (defknown one-more-re-nightmare.vector-primops:v-load32 ((simple-array character 1) sb-int:index) - (simd-pack-256 integer) + (simd-pack-256 (unsigned-byte 32)) (foldable flushable movable) :overwrite-fndb-silently t) @@ -215,7 +225,7 @@ one-more-re-nightmare.vector-primops:v-load32 ((string simple-character-string :scs (descriptor-reg)) (index tagged-num :scs (any-reg))) - (result simd-pack-256-int :scs (int-avx2-reg)) + (result simd-pack-256-ub32 :scs (int-avx2-reg)) (inst vmovdqu result (ea (- (* vector-data-offset n-word-bytes) other-pointer-lowtag) @@ -225,7 +235,7 @@ (defknown one-more-re-nightmare.vector-primops:v-load8 ((simple-array base-char 1) sb-int:index) - (simd-pack-256 integer) + (simd-pack-256 (unsigned-byte 8)) (foldable flushable movable) :overwrite-fndb-silently t) @@ -233,7 +243,7 @@ one-more-re-nightmare.vector-primops:v-load8 ((string simple-base-string :scs (descriptor-reg)) (index unsigned-num :scs (unsigned-reg))) - (result simd-pack-256-int :scs (int-avx2-reg)) + (result simd-pack-256-ub8 :scs (int-avx2-reg)) (inst vmovdqu result (ea (- (* vector-data-offset n-word-bytes) other-pointer-lowtag) diff --git a/Documentation/interface.scrbl b/Documentation/interface.scrbl index 2db7995..b73e83f 100644 --- a/Documentation/interface.scrbl +++ b/Documentation/interface.scrbl @@ -23,14 +23,16 @@ intersections of regular expressions. "E{j,j}" "repeat" "«E»" "submatch" "(E)" "change precedence" - "[r+]" "character range" + "[r]" "character range" "[¬r]" "complement ranges" + "[^r]" "complement ranges" "$" "every character" "c" "literal character" "" ""] -@rule["r" "c" "single character" - "c-c" "character range"] -@rule["i" "" ""] +@rule["r" "" "empty range" + "cr" "single character" + "c-cr" "character range" + "[::]r" "character class"] @rule["j" "" "bound" "" "no bound"] @rule["c" "" ""] diff --git a/Tests/tests.lisp b/Tests/tests.lisp index 3b49b85..701bf89 100644 --- a/Tests/tests.lisp +++ b/Tests/tests.lisp @@ -122,5 +122,15 @@ full-warning "A full warning should be generated by trying to address registers that don't exist.")) +(parachute:define-test character-classes + :parent one-more-re-nightmare + ;; Per + (all-string-matches "'HI EVERYBODY!!!!!!!!!!' 'try pressing the the Caps Lock key'" + "[[:upper:]][[:lower:]]+" '(#("Caps") #("Lock")) + "[[:punct:]]{3,}" '(#("!!!!!!!!!!'")) + "[[:digit:]]" '() + "[[:digit:]y]" '(#("y") #("y")) + "[¬[:punct:]]+" '(#("HI EVERYBODY") #(" ") #("try pressing the the Caps Lock key")))) + (defun run-tests () (parachute:test 'one-more-re-nightmare))