Permalink
Browse files

Code is cleaner, commented, and tests were expanded.

Added report of theoretical fastest time.

Debugging is drastically different and resulted in better performance.

The function gen-ps was renamed to wrap-string.

IPosition was renamed to IAmotoen and significantly cleaned up.

Two new 'provided' symbols were added, to signify the end of input or to
accept any character.

The Not Predicate made it back in.

The function with-fns and post-process were added to facilitate wrapping grammars in
post-processing functions.

Grammars were updated to fit the new syntax, with json seeing a small
increase in tests and csv seeing significant increases.

The csv grammar namespace also demonstrates dynamically defined grammars
as well as wrapping a grammar in post-processing functions.
  • Loading branch information...
1 parent 984fe40 commit ee603053248dbb32917e6aeca3c18012e8298100 @richard-lyman committed Jul 21, 2012
View
@@ -1,4 +1,4 @@
-(defproject com.lithinos/amotoen "0.1.0-SNAPSHOT"
+(defproject com.lithinos/amotoen "0.2.0-SNAPSHOT"
:description "Amotoen is a Clojure library that supports PEG style definitions of grammars that can produce parsers."
:url "http://www.lithinos.com/amotoen"
:license {:name "EPL-v1.0" :distribution :repo :comments "same as Clojure" :url "http://www.eclipse.org/legal/epl-v10.html"}

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -6,35 +6,70 @@
; the terms of this license.
; You must not remove this notice, or any other, from this software.
-(ns com.lithinos.amotoen.grammars.csv)
+(ns com.lithinos.amotoen.grammars.csv
+ (:use [clojure.pprint])
+ (:use [com.lithinos.amotoen.core]))
(def grammar {
- :Start :Document
- :_* #"^[ \t]*"
- :Document ['(+ :Line) :$]
- :Line [:_* :Value '(* :_* "," :_* :Value) :_*]
- :Value '(| ["\"" :DoubleQuotedValue "\""]
- ["'" :SingleQuotedValue "'"]
- :VanillaValue)
- :DoubleQuotedValue '(* [(! "\"") (| :DEscapedChar :AnyChar)])
- :SingleQuotedValue '(* [(! "'") (| :SEscapedChar :AnyChar)])
- :VanillaValue '(* [(! ",") (| :VEscapedChar :AnyChar)])
- :DEscapedChar ["\\" '(| "\\" "\"")]
- :SEscapedChar ["\\" '(| "\\" "\'")]
- :VEscapedChar ["\\" '(| "\\" "\,")]
- :AnyChar #"^."
+ :Document [:Line '(* :Line) :$]
+ :Line [:_* :Value '(* [:_* \, :_* :Value]) :_* '(* :EndOfLine)]
+ :Value '(| [\" (* :DoubleQuotedValue) \"]
+ [\' (* :SingleQuotedValue) \']
+ (* :VanillaValue))
+ :DoubleQuotedValue '(| [\\ \"] [\\ \\] (% \"))
+ :SingleQuotedValue '(| [\\ \'] [\\ \\] (% \'))
+ :VanillaValue ['(! :EndOfLine) '(% \,)]
+ :_* '(* :Whitespace)
+ :Whitespace '(| \space \tab)
+ :EndOfLine '(| \newline \return)
})
(defn specified
- ([] (specified "," "\"" "\""))
- ([separator] (specified separator "\"" "\""))
+ ([] (specified \,\ \" \"))
+ ([separator] (specified separator \" \"))
([separator, wrapper] (specified separator wrapper wrapper))
([separator, left-wrapper, right-wrapper] {
- :Start :Document
- :_* #"^[ \t]*"
- :Document ['(+ :Line) :$]
- :Line [:_* :Value '(* :_* separator :_* :Value) :_*]
- :Value [left-wrapper :WrappedValue right-wrapper]
- :WrappedValue '(* [(! right-wrapper) (| :EscapedChar :AnyChar)])
- :EscapedChar ["\\" '(| "\\" right-wrapper)]
- :AnyChar #"^."}))
+ :Document [:Line '(* :Line) :$]
+ :Line [:_* :Value (list '* [:_* separator :_* :Value]) :_* '(* :EndOfLine)]
+ :Value (list '| [left-wrapper '(* :WrappedValue) right-wrapper]
+ '(* :VanillaValue))
+ :WrappedValue (list '| [\\ right-wrapper] [\\ \\] (list '% right-wrapper))
+ :VanillaValue ['(! :EndOfLine) (list '% separator)]
+ :_* '(* :Whitespace)
+ :Whitespace '(| \space \tab)
+ :EndOfLine '(| \newline \return)
+}))
+
+(defn ignore [_] "")
+(defn value-as-string [l]
+ (let [l (if (vector? l) (second l) l)]
+ (cond
+ (map? l) (str (first (vals l)))
+ true (apply str
+ (map #(first (vals %))
+ l)))))
+(defn without-guard [v]
+ (second v))
+(defn line-as-vec [v]
+ (let [others (map last (first (rest (rest v))))]
+ (reduce (fn [r i] (conj r (first (vals i))))
+ []
+ (conj others (second v)))))
+(defn lines-as-vec [v]
+ (let [v (vec (flatten v))]
+ (reduce (fn [r i] (if (empty? i) r (conj r (first (vals i)))))
+ []
+ (butlast v))))
+
+(def #^{:private true} to-clj-fns { :Document lines-as-vec
+ :Line line-as-vec
+ :Value value-as-string
+ :VanillaValue without-guard
+ :_* ignore
+ ;:EndOfLine (fn [_] nil)
+ :Whitespace ignore })
+
+(defn to-clj
+ ([s] (to-clj s grammar))
+ ([s g] (post-process :Document g (wrap-string s) to-clj-fns)))
+
@@ -6,42 +6,50 @@
; the terms of this license.
; You must not remove this notice, or any other, from this software.
-(ns com.lithinos.amotoen.grammars.json)
+(ns com.lithinos.amotoen.grammars.json
+ (:use [com.lithinos.amotoen.core]))
+
+(defn json-control-character [g w]
+ (let [s #{ \u0000 \u0001 \u0002 \u0003 \u0004 \u0005 \u0006 \u0007 \u0008 \u0009 \u000A \u000B \u000C \u000D \u000E \u000F
+ \u0010 \u0011 \u0012 \u0013 \u0014 \u0015 \u0016 \u0017 \u0018 \u0019 \u001A \u001B \u001C \u001D \u001E \u001F}]
+ (if (contains? s (c w))
+ (m w)
+ nil)))
(def grammar {
- :Start :JSONRoot
- :_* #"^[\n\r\t ]*"
- :JSONRoot [ :_* '(* :Value) :_* :$]
- :Value '(| :JSONString :JSONNumber :JSONObject :Array "true" "false" "null")
+ :_* '(* (| \newline \return \tab \space))
+ :JSONText [:_* '(| :JSONObject :Array) :_* :$]
+ :Value (list '| :JSONString :JSONObject :Array (pegs "true") (pegs "false") (pegs "null") :JSONNumber)
; Objects
:JSONObject '(| :EmptyObject :ContainingObject)
- :EmptyObject ["{" :_* "}"]
- :ContainingObject ["{" :_* :Members :_* "}"]
- :Members '(| [:Pair :_* "," :_* :Members] :Pair)
- :Pair [:JSONString :_* ":" :_* :Value]
+ :EmptyObject [\{ :_* \}]
+ :ContainingObject [\{ :_* :Members :_* \}]
+ :Members '(| [:Pair :_* \, :_* :Members] :Pair) ; Nests the structure significantly
+ :Pair [:JSONString :_* \: :_* :Value]
; Arrays
:Array '(| :EmptyArray :ContainingArray)
- :EmptyArray ["[" :_* "]"]
- :ContainingArray ["[" :_* :Elements :_* "]"]
- :Elements '(| [:Value :_* "," :_* :Elements] :Value)
+ :EmptyArray [\[ :_* \]]
+ :ContainingArray [\[ :_* :Elements :_* \]]
+ :Elements '(| [:Value :_* \, :_* :Elements] :Value) ; Nests the structure significantly
; Strings
:JSONString '(| :EmptyString :ContainingString)
- :EmptyString ["\"" "\""]
- :ContainingString ["\"" :Chars "\""]
- :Chars '(+ [(! :ControlCharacter) :Char])
- :ControlCharacter #"^[\u0000-\u001F]"
- :Char '(| :EscapedChar [(! "\"") :NonEscapedChar])
- :EscapedChar ["\\" '(| "\"" "\\" "/" "b" "f" "n" "r" "t" :Unicode)]
- :Unicode ["u" :HexDigit :HexDigit :HexDigit :HexDigit]
- :HexDigit #"^[0-9A-Fa-f]"
- :NonEscapedChar #"^." ; This is OK since the only way it's used is with appropriate guards.
+ :EmptyString [\" \"]
+ :ContainingString [\" :Chars \"]
+ :Chars [:GuardedChar '(* :GuardedChar)]
+ :GuardedChar ['(! :ControlCharacter) :Char]
+ :ControlCharacter (list 'a json-control-character)
+ :Char '(| :EscapedChar [(! \") :NonEscapedChar])
+ :EscapedChar [\\ '(| \" \\ \/ \b \f \n \r \t :Unicode)]
+ :Unicode [\u :HexDigit :HexDigit :HexDigit :HexDigit]
+ :HexDigit (lpegs '| "0123456789ABCDEFabcdef")
+ :NonEscapedChar :. ; This is OK since the only way it's used is with appropriate guards.
; Numbers
:JSONNumber '(| [:Int :Frac :Exp] [:Int :Exp] [:Int :Frac] :Int)
- :Int '(| ["-" :Digit1-9 :Digits] ["-" :Digit] [:Digit1-9 :Digits] :Digit)
- :Frac ["." :Digits]
+ :Int '(| [\- :Digit1-9 :Digits] [\- :Digit] [:Digit1-9 :Digits] :Digit)
+ :Frac [\. :Digits]
:Exp [:ExpLeader :Digits]
- :Digit #"^[0-9]"
- :Digit1-9 #"^[1-9]"
- :Digits #"^[0-9]+"
- :ExpLeader [#"^[eE]" #"^[+-]*"]
+ :Digit (lpegs '| "0123456789")
+ :Digit1-9 (lpegs '| "123456789")
+ :Digits [:Digit '(* :Digits)]
+ :ExpLeader ['(| \e \E) '(* (| \+ \-))]
})
@@ -6,7 +6,7 @@
; the terms of this license.
; You must not remove this notice, or any other, from this software.
-(ns com.lithinos.amotoen.markdown)
+(ns com.lithinos.amotoen.grammars.markdown)
(def grammar {
:Start {:Document #(let [ast (first (:Document %))]
@@ -6,53 +6,60 @@
; the terms of this license.
; You must not remove this notice, or any other, from this software.
-(ns com.lithinos.amotoen.minimark)
+(ns com.lithinos.amotoen.grammars.minimark
+ (:use [com.lithinos.amotoen.core]))
+
+(defn delimited
+ ([m] (delimited m m))
+ ([s e] [s [(list '% e) (list '* (list '% e))] e]))
(def grammar {
-; Start
- :Start :Content
+;Bulk
+ :Content [:Element '(* :Element) :$]
+ :Element '(| :_+ :Alphanumeric :Markup :.)
+ :Markup '(| :HRule :MDash :List :SS :U :H4 :H3 :H2 :H1 :B :I :Href :Pre)
+ :Markup-Guard (list '! (list '| :HRule :MDash :Unordered-List-Marker :Ordered-List-Marker
+ \^ (pegs "__") (pegs "====") (pegs "===") (pegs "==") \= (pegs "'''") (pegs "''") \[ (pegs "{{{")))
+ :Non-Markup [:Markup-Guard :Any-Char]
+ :NM+ [:Non-Markup '(* :Non-Markup)]
; Whitespace
- :N "\n"
- :_ " "
- :_* #"^\s*"
+ :N \newline
+ :_ \space
+ :_* '(* :_)
+ :_+ [:_ '(* :_)]
; Odds-n-Ends
- :Any-Char '(| :Empty-Line :Escaped-Char #"(?s)^.")
+ :Any-Char '(| :Empty-Line :Escaped-Char :.)
:Empty-Line [:N :N]
- :Escaped-Char ["!" #"^[A-Z!\[=]"]
- :Alphanumeric #"^[A-Za-z0-9]" ; No markup ever starts with an alphanumeric character
-;Bulk
- :Content ['(+ (| #"^\s+" :Alphanumeric :Markup #"^.")) :$]
- :Markup '(| :HRule :MDash :List :SS :U :H4 :H3 :H2 :H1 :B :I :Href :Pre)
- :Markup-Guard '(! (| :HRule :MDash :List-Marker "^" "__" "====" "===" "==" "=" "'''" "''" "[" "{{{"))
- :Non-Markup [:Markup-Guard '(| :Any-Char)]
- :NM+ '(+ :Non-Markup)
+ :Escaped-Char [\! (pegs "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\\[=]")]
+ :Alphanumeric (pegs "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") ; No markup ever starts with an alphanumeric character
;Links
:Href '(| :Href-Straight :Href-Explained)
- :Href-Straight ["[" :Href-Body "]"]
- :Href-Explained ["[" :Href-Text :_ :Href-Body "]"]
- :Href-Body #"^[^] ]+"
- :Href-Text #"^[^ ]+"
+ :Href-Straight [\[ :Href-Body \]]
+ :Href-Explained [\[ :Href-Text :_ :Href-Body \]]
+ :Href-Body [:Href-Body-Part '(* :Href-Body-Part)]
+ :Href-Body-Part '(% (| \] \space))
+ :Href-Text ['(% \space) '(* (% \space))]
;Lists
- :List-Marker '(| "-- " "-. ")
+ :Unordered-List-Marker (pegs "-- ")
+ :Ordered-List-Marker (pegs "-. ")
:List '(| :Ordered-List :Unordered-List)
- :Ordered-List '(+ [:_* "-. " :List-Body])
- :Unordered-List '(+ [:_* "-- " :List-Body])
- :List-Body '(+ :List-Chunk)
- :List-Chunk '(| :SS :U :B :I :Href :Pre #"^.")
+ :Ordered-List [:_* :Ordered-List-Marker :List-Body '(* [:_* :Ordered-List-Marker :List-Body])]
+ :Unordered-List [:_* :Unordered-List-Marker :List-Body '(* [:_* :Unordered-List-Marker :List-Body])]
+ :List-Body [:List-Chunk '(* :List-Chunk)]
+ :List-Chunk '(| :SS :U :B :I :Href :Pre :.)
;Superscript
- :SS ["^" :SS-Body "^"]
- :SS-Body '(+ [(! "^") :SS-Chunk])
- :SS-Chunk '(| :U :B :I :Href #"^.")
+ :SS [\^ :SS-Body \^]
+ :SS-Body ['(! \^) :SS-Chunk '(* [(! \^) :SS-Chunk])]
+ :SS-Chunk '(| :U :B :I :Href :.)
;Pre
- :Pre ["{{{" :Pre-Markup-Body "}}}"]
- :Pre-Markup-Body '(+ [(! "}}}") :Any-Char])
+ :Pre (delimited (pegs "{{{") (pegs "}}}"))
;Remaining
- :H1 ["=" #"^[^=]+" "="]
- :H2 ["==" '(+ [(! "==") #"^."]) "=="]
- :H3 ["===" '(+ [(! "===") #"^."]) "==="]
- :H4 ["====" '(+ [(! "====") #"^."]) "===="]
- :B ["'''" '(+ [(! "'''") #"^."]) "'''"]
- :I ["''" '(+ [(! "''") #"^."]) "''"]
- :U ["__" '(+ [(! "__") #"^."]) "__"]
- :HRule "----"
- :MDash "---"})
+ :H1 (delimited \=)
+ :H2 (delimited (pegs "=="))
+ :H3 (delimited (pegs "==="))
+ :H4 (delimited (pegs "===="))
+ :B (delimited (pegs "'''"))
+ :I (delimited (pegs "''"))
+ :U (delimited (pegs "__"))
+ :HRule (pegs "----")
+ :MDash (pegs "---")})
View
@@ -0,0 +1,22 @@
+{
+ "glossary": {
+ "title": "example glossary",
+ "GlossDiv": {
+ "title": "S",
+ "GlossList": {
+ "GlossEntry": {
+ "ID": "SGML",
+ "SortAs": "SGML",
+ "GlossTerm": "Standard Generalized Markup Language",
+ "Acronym": "SGML",
+ "Abbrev": "ISO 8879:1986",
+ "GlossDef": {
+ "para": "A meta-markup language, used to create markup languages such as DocBook.",
+ "GlossSeeAlso": ["GML", "XML"]
+ },
+ "GlossSee": "markup"
+ }
+ }
+ }
+ }
+}
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -0,0 +1,11 @@
+{"menu": {
+ "id": "file",
+ "value": "File",
+ "popup": {
+ "menuitem": [
+ {"value": "New", "onclick": "CreateNewDoc()"},
+ {"value": "Open", "onclick": "OpenDoc()"},
+ {"value": "Close", "onclick": "CloseDoc()"}
+ ]
+ }
+}}
Oops, something went wrong.

0 comments on commit ee60305

Please sign in to comment.