Permalink
Browse files

Add old ABNF-PPCRE code.

Signed-off-by: Stelian Ionescu <sionescu@common-lisp.net>
  • Loading branch information...
1 parent 8575e3f commit ecaddc3de6222db5e6d831988cae0b29d53fa29d @sionescu committed with Aug 20, 2008
Showing with 247 additions and 0 deletions.
  1. +111 −0 base/abnf-ppcre.lisp
  2. +12 −0 io.uri.asd
  3. +10 −0 io.uri/pkgdcl.lisp
  4. +114 −0 io.uri/uri.lisp
View
@@ -0,0 +1,111 @@
+;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; indent-tabs-mode: nil -*-
+
+(cl:defpackage :iolib-utils.abnf-pcre
+ (:use :cl :alexandria)
+ (:export #:define-abnf-syntax))
+
+(in-package :iolib-utils.abnf-pcre)
+
+(declaim (optimize (debug 3)))
+
+(defparameter *default-non-terminals*
+ (alexandria:alist-hash-table
+ '((:digit . (:range #\0 #\9))
+ (:hexdig . (:or (:range #\0 #\9) (:range #\a #\f) (:range #\A #\F)))
+ (:alpha . (:or (:range #\a #\z) (:range #\A #\Z))))
+ :test #'eq))
+
+(defvar *non-terminals* nil)
+(defvar *non-terminals-normalized* nil)
+
+(defgeneric normalize-expression* (op expr))
+(defgeneric encode-expression* (op args))
+
+;;;
+;;; Normalizing
+;;;
+
+(defun only-char-elems-p (expr)
+ (loop :for e :in expr
+ :always (or (and (stringp e) (= 1 (length e)))
+ (and (consp e) (eql :range (car e))))))
+
+(defun normalize-expression (expr)
+ (etypecase expr
+ (cons (normalize-expression* (car expr) expr))
+ (keyword (gethash expr *default-non-terminals*))
+ (symbol (cond ((gethash expr *non-terminals-normalized*)
+ (gethash expr *non-terminals*))
+ (t
+ (setf (gethash expr *non-terminals*)
+ (normalize-expression (gethash expr *non-terminals*)))
+ (setf (gethash expr *non-terminals-normalized*) t))))
+ ((or string character) expr)))
+
+(defmacro define-normalizer (operator (&rest args) &body body)
+ `(defmethod normalize-expression* ((op (eql ,operator)) ,@args)
+ ,@body))
+
+(define-normalizer :range (expr)
+ (destructuring-bind (from to) (cdr expr)
+ (check-type from character)
+ (check-type to character)
+ (assert (<= (char-code from) (char-code to)))
+ `(:range ,from ,to)))
+
+(define-normalizer :any (expr)
+ (destructuring-bind (arg) (cdr expr)
+ `(:repeat (0) ,(normalize-expression arg))))
+
+(define-normalizer :req (expr)
+ (destructuring-bind (arg) (cdr expr)
+ `(:repeat (1) ,(normalize-expression arg))))
+
+(define-normalizer :opt (expr)
+ (destructuring-bind (arg) (cdr expr)
+ `(:repeat (0 1) ,(normalize-expression arg))))
+
+(define-normalizer :repeat (expr)
+ (destructuring-bind (params args) (cdr expr)
+ (ecase (car params)
+ (:= (destructuring-bind (val) (cdr params)
+ (check-type val unsigned-byte)
+ `(:repeat (,val ,val) ,(normalize-expression args))))
+ (:from (destructuring-bind (from &optional to) (cdr params)
+ (check-type from unsigned-byte)
+ (check-type to (or null unsigned-byte))
+ `(:repeat (,from ,to) ,(normalize-expression args))))
+ (:to (destructuring-bind (val) (cdr params)
+ (check-type val unsigned-byte)
+ `(:repeat (0 ,val) ,(normalize-expression args)))))))
+
+(define-normalizer :or (expr)
+ expr)
+
+(define-normalizer :not (expr)
+ (normalize-expression `(:or :not ,@(cdr expr))))
+
+(define-normalizer :conc (expr)
+ (list* :conc (mapcar #'normalize-expression (cdr expr))))
+
+(defmethod normalize-expression* (op expr)
+ (normalize-expression (list* :conc expr)))
+
+;;;
+;;; Encoding
+;;;
+
+(defun encode-expression (expr)
+ (let ((e (normalize-expression expr)))
+ (encode-expression* (car e) (cdr e))))
+
+(defmacro define-encoder (operator (&rest args) &body body)
+ `(defmethod encode-expression* ((op (eql ,operator)) ,@args)
+ ,@body))
+
+(define-encoder :range (args)
+ (destructuring-bind (from to) args
+ (format t "[~A-~A]" from to)))
+
+(defmacro define-abnf-syntax (name &body clauses)
+ )
View
@@ -0,0 +1,12 @@
+;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; indent-tabs-mode: nil -*-
+
+(asdf:defsystem :io.uri
+ :description "URI library."
+ :author "Stelian Ionescu <sionescu@common-lisp.net>"
+ :maintainer "Stelian Ionescu <sionescu@common-lisp.net>"
+ :licence "MIT"
+ :depends-on (:iolib.base :babel :cl-ppcre)
+ :pathname (merge-pathnames #p"io.uri/" *load-truename*)
+ :components
+ ((:file "pkgdcl")
+ (:file "uri" :depends-on ("pkgdcl"))))
View
@@ -0,0 +1,10 @@
+;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; indent-tabs-mode: nil -*-
+;;;
+;;; --- Package definition.
+;;;
+
+(in-package :common-lisp-user)
+
+(defpackage :io.uri
+ (:use :iolib.base :babel :cl-ppcre)
+ (:export))
View
@@ -0,0 +1,114 @@
+;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; indent-tabs-mode: nil -*-
+;;;
+;;; --- Parsing URIs.
+;;;
+
+(in-package :io.uri)
+
+(defmacro register-bind ((&rest bindings) (regex target-string &key (start 0) end) &body body)
+ (flet ((compute-bindings (fun)
+ (loop :for index :from 0 :for b :in bindings
+ :if (string/= b "_") :collect `(,b (,fun ,index)))))
+ (with-gensyms (string match-start match-end registers-start registers-end register-fun)
+ (let ((bindings (compute-bindings register-fun)))
+ `(let ((,string ,target-string))
+ (multiple-value-bind (,match-start ,match-end ,registers-start ,registers-end)
+ (scan ,regex ,string :start ,start :end (or ,end (length ,string)))
+ (declare (ignore ,match-end))
+ (flet ((,register-fun (index)
+ (when ,match-start
+ (let ((start (aref ,registers-start index))
+ (end (aref ,registers-end index)))
+ (when start (subseq ,string start end))))))
+ (let ,bindings
+ (declare (ignorable ,@(mapcar #'car bindings)))
+ ,@body))))))))
+
+(define-constant +uri-regexp+
+ "^<?(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?>?$"
+ :test #'string=)
+
+(defun %parse-uri (uri)
+ (multiple-value-bind (start end svec evec)
+ (scan +uri-regexp+ uri)
+ (declare (ignore end))
+ (flet ((reg (index)
+ (let ((s (aref svec index))
+ (e (aref evec index)))
+ (when s (subseq uri s e)))))
+ (when start
+ (values (reg 1) (reg 3) (reg 4) (reg 6) (reg 8))))))
+
+(defparameter +userinfo-regexp+ "(((%[0-9a-fA-F]{2}|[-$!&'()*+,;=:a-zA-Z0-9._~])*)@)?")
+(defparameter +host-regexp+ "([-~a-zA-Z0-9]+|\\[.*:.*:.*\\])")
+(defparameter +port-regexp+ "(:([0-9]*))?")
+(defparameter +authority-regexp+
+ (concatenate 'string +userinfo-regexp+ +host-regexp+ +port-regexp+))
+
+(defun %parse-authority (authority)
+ (multiple-value-bind (start end svec evec)
+ (scan +authority-regexp+ authority)
+ (declare (ignore end))
+ (flet ((reg (index)
+ (let ((s (aref svec index))
+ (e (aref evec index)))
+ (when s (subseq authority s e)))))
+ (when start
+ (values (reg 1) (reg 3) (reg 5))))))
+
+(define-abnf-syntax :uri
+ (uri . (scheme ":" hier-part (:opt ("?" query)) (:opt ("#" fragment))))
+ (hier-part . (:or ("//" authority path-abempty) path-absolute path-rootless path-empty))
+ (uri-reference . (:or uri relative-ref))
+ (absolute-uri . (scheme ":" hier-part (:opt ("?" query))))
+ (relative-ref . (relative-part (:opt ("?" query)) (:opt ("#" fragment))))
+ (relative-part . (:or ("//" authority path-abempty) path-absolute path-noscheme path-empty))
+ (scheme . (:alpha (:any (:or :alpha :digit "+" "-" "."))))
+ (authority . ((:opt (userinfo "@")) host (:opt (":" port))))
+ (userinfo . (:any (:or unreserved pct-encoded sub-delims ":")))
+ (host . (:or ip-literal ipv4-address reg-name))
+ (port . (:any :digit))
+ (ip-literal . ("[" (:or ipv6address ipvfuture) "]"))
+ (ipvfuture . ("v" (:req :hexdig) "." (:req (:or unreserved sub-delims ":"))))
+ (ipv6address . (:or ( (:repeat (:= 6) (h16 ":")) ls32)
+ ( "::" (:repeat (:= 5) (h16 ":")) ls32)
+ ((:opt h16) "::" (:repeat (:= 4) (h16 ":")) ls32)
+ ((:opt ((:repeat (:to 1) (h16 ":")) h16)) "::" (:repeat (:= 3) (h16 ":")) ls32)
+ ((:opt ((:repeat (:to 2) (h16 ":")) h16)) "::" (:repeat (:= 2) (h16 ":")) ls32)
+ ((:opt ((:repeat (:to 3) (h16 ":")) h16)) "::" (h16 ":") ls32)
+ ((:opt ((:repeat (:to 4) (h16 ":")) h16)) "::" ls32)
+ ((:opt ((:repeat (:to 5) (h16 ":")) h16)) "::" h16)
+ ((:opt ((:repeat (:to 6) (h16 ":")) h16)) "::")))
+ (h16 . (:repeat (:from 1 4) :hexdig))
+ (ls32 . (:or (h16 ":" h16) ipv4address))
+ (ipv4address . (dec-octet "." dec-octet "." dec-octet "." dec-octet))
+ (dec-octet . (:or :digit
+ ((:range #\1 #\9) :digit)
+ ("1" (:repeat (:= 2) :digit))
+ ("2" (:range #\0 #\4) :digit)
+ ("25" (:range #\0 #\5))))
+ (reg-name . (:any (:or unreserved pct-encoded sub-delims)))
+ (path . (:or path-abempty path-absolute path-noscheme path-rootless path-empty))
+ (path-abempty . (:any ("/" segment)))
+ (path-absolute . ("/" (:opt (segment-nz (:any ("/" segment))))))
+ (path-noscheme . (segment-nz-nc (:any ("/" segment))))
+ (path-rootless . (segment-nz (:any ("/" segment))))
+ (path-empty . (:not pchar))
+ (segment . (:any pchar))
+ (segment-nz . (:req pchar))
+ (segment-nz-nc . (:req (:or unreserved pct-encoded sub-delims "@")))
+ (pchar . (:or unreserved pct-encoded sub-delims ":" "@"))
+ (query . (:any (:or pchar "/" "?")))
+ (fragment . (:any (:or pchar "/" "?")))
+ (pct-encoded . ("%" :hexdig :hexdig))
+ (unreserved . (:or :alpha :digit "-" "." "_" "~"))
+ (reserved . (:or gen-delims sub-delims))
+ (gen-delims . (:or ":" "/" "?" "#" "[" "]" "@"))
+ (sub-delims . (:or "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=")))
+
+(define-condition uri-parse-error (parse-error) ())
+
+(defun uri-parse-error (message &rest args)
+ (error 'uri-parse-error
+ :format-control message
+ :format-arguments args))

0 comments on commit ecaddc3

Please sign in to comment.