Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Pegasus speed. Minimark grammar working and related tests filled out.

  • Loading branch information...
commit 1fac7b2d952561eaf6794d94dcdd1165e4e1f3ce 1 parent 18c7e5e
@richard-lyman authored
View
76 src/com/lithinos/amotoen/core.clj
@@ -27,18 +27,18 @@
(defn wrap-string
"Reifies IAmotoen around a string 's', possibly at a given starting point 'j'.
- The function 'charAt' is the mechanism to walk through the string."
+ The function 'charAt' is part of the mechanism to walk through the string."
([#^String s] (wrap-string s 0))
([#^String s j] ; 'j' is where we currently are in the input... 'i' was skipped since it was too close to 'i'nput
- (let [j (ref j)]
+ (let [a (int-array 1 j)]
(reify IAmotoen
- (gp [t] @j)
- (sp [t k] (dosync (ref-set j k)))
- (end [t] (= @j (count s)))
- (c [t] (try (.charAt s @j) (catch Exception e nil)))
+ (gp [t] (aget a 0))
+ (sp [t k] (aset-int a 0 k))
+ (end [t] (= (aget a 0) (count s)))
+ (c [t] (try (.charAt s (aget a 0)) (catch Exception e nil)))
(m [t] (let [r (c t)]
(when (nil? r) (throw (Exception. "Consuming nil")))
- (dosync (alter j inc))
+ (aset-int a 0 (+ 1 (aget a 0)))
r))))))
(defn lpegs
@@ -46,34 +46,35 @@
[t s] (reverse (into '() (cons t (seq s)))))
(defn pegs
- "Produces a rule that consumes each character in the string 's'."
+ "Produces a rule that consumes each character in the string 's' in the order given in 's'."
[s] (vec (seq s)))
-(def #^{:private true :doc "Starts at :Grammar."} grammar-grammar {
- :_* '(* :Whitespace)
- :_ [:Whitespace '(* :Whitespace)]
- :Grammar [\{ :_* :Rule '(* [:_ :Rule]) :_* \}]
- :Rule [:Keyword :_ :Body]
- :Keyword [\: '(| :AmotoenSymbol :ProvidedSymbol)]
- :ProvidedSymbol '(| :EndOfInput :AcceptAnything)
- :EndOfInput \$ ; If the Keyword ':$' is encountered, the wrapped input should be at the end
- :AcceptAnything \. ; If the Keyword ':.' is encountered, any character is accepted
- :Body '(| :Keyword :Char :Grouping :NotPredicate :AnyNot :AwareFunction :Function)
- :Grouping '(| :Sequence :Either :ZeroOrMore)
- :Sequence [\[ :_* :Body '(* [:_* :Body]) :_* \]]
- :Either [\( \| :_ :Body '(* [:_* :Body]) :_* \)]
- :NotPredicate [\( \! :_ :Body :_* \)]
- :ZeroOrMore [\( \* :_ :Body :_* \)]
- :AnyNot [\( \% :_ :Body :_* \)]
- :AwareFunction [\( \a :_ :CljReaderFn :_* \)]
- :Function [\( \f :_ :CljReaderFn :_ :Body :_* \)]
- :CljReaderFn [\# \< '(% \>) '(* (% \>)) \>]
- :Whitespace '(| \space \newline \return \tab \,)
- :Char [\\ (list '| (pegs "tab") (pegs "space") (pegs "newline") (pegs "return") '(% \space))]
- :AmotoenSymbol [:NonNumericCharacter '(* :AlphanumericCharactersPlus)] ; _Not_ the same as a Clojure Symbol
- :NonNumericCharacter (list '% (lpegs '| "0123456789"))
- :AlphanumericCharactersPlus (lpegs '| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789:/*+!-_?.")
-})
+(def #^{:private true
+ :doc "This grammar is the grammar for Amotoen grammars. It starts at :Grammar."}
+ grammar-grammar {
+ :_* '(* :Whitespace)
+ :_ [:Whitespace '(* :Whitespace)]
+ :Grammar [\{ :_* :Rule '(* [:_ :Rule]) :_* \}]
+ :Rule [:Keyword :_ :Body]
+ :Keyword [\: '(| :AmotoenSymbol :ProvidedSymbol)]
+ :ProvidedSymbol '(| :EndOfInput :AcceptAnything)
+ :EndOfInput \$ ; If the Keyword ':$' is encountered, the wrapped input should be at the end
+ :AcceptAnything \. ; If the Keyword ':.' is encountered, any character is accepted
+ :Body '(| :Keyword :Char :Grouping :NotPredicate :AnyNot :AwareFunction :Function)
+ :Grouping '(| :Sequence :Either :ZeroOrMore)
+ :Sequence [\[ :_* :Body '(* [:_* :Body]) :_* \]]
+ :Either [\( \| :_ :Body '(* [:_* :Body]) :_* \)]
+ :NotPredicate [\( \! :_ :Body :_* \)]
+ :ZeroOrMore [\( \* :_ :Body :_* \)]
+ :AnyNot [\( \% :_ :Body :_* \)]
+ :AwareFunction [\( \a :_ :CljReaderFn :_* \)]
+ :Function [\( \f :_ :CljReaderFn :_ :Body :_* \)]
+ :CljReaderFn [\# \< '(% \>) '(* (% \>)) \>]
+ :Whitespace '(| \space \newline \return \tab \,)
+ :Char [\\ (list '| (pegs "tab") (pegs "space") (pegs "newline") (pegs "return") '(% \space))]
+ :AmotoenSymbol [:NonNumericCharacter '(* :AlphanumericCharactersPlus)] ; _Not_ the same as a Clojure Symbol, though it should be a proper subset
+ :NonNumericCharacter (list '% (lpegs '| "0123456789"))
+ :AlphanumericCharactersPlus (lpegs '| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789:/*+!-_?.")})
(defn- either
"Returns the result of the first element in 'n' to successfully process something from 'w'."
@@ -87,7 +88,10 @@
(rest n)))))
(defn- any-not
- "See comments in code. Success if failure, failure if success."
+ "This will accept anything that is not 'b'. For instance, '(% :B) would accept
+ any single character that is not whatever matches :B. Success if failure, failure if success.
+ It will successfully match any single character if :B fails, and it will fail to match any
+ single character if :B succeeds."
[b g w]
(let [p (gp w)]
(if (or (pegasus b g w) (end w))
@@ -95,12 +99,12 @@
(m w)))) ; If we fail and aren't at the end, then we accept the current char
(defn debug
- "Very very inefficient and useful in some cases.
+ "Very very inefficient but useful in some cases.
Prints out 'i' number of characters from 'w' followed by 'n',
and then resets the position in 'w' as if nothing had been consumed."
[n w i]
(let [p (gp w)]
- (println ">> " (pr-str (apply str (doall (take i (repeatedly #(try (m w) (catch Exception e ""))))))) ":" n)
+ (println ">> " (pr-str (apply str (doall (take i (repeatedly #(try (m w) (catch Exception e ""))))))) ":" n)
(sp w p)))
(defn- try-char [n w]
View
180 src/com/lithinos/amotoen/grammars/minimark.clj
@@ -7,59 +7,139 @@
; You must not remove this notice, or any other, from this software.
(ns com.lithinos.amotoen.grammars.minimark
+ (:use [clojure.pprint])
(:use [com.lithinos.amotoen.core]))
+(defn containing [s b e] [s b (list '* b) e])
+
(defn delimited
([m] (delimited m m))
- ([s e] [s [(list '% e) (list '* (list '% e))] e]))
+ ([b e] (delimited b e e))
+ ([s b e] (containing s (list '% b) e)))
+
+(defn delimited-body [d b] (delimited (pegs d) b (pegs d)))
+
+(defn a-frequency-ordered-char [] ; Semi-frequency-ordered
+ (lpegs '| "etaoinsrh,.?bcdfgjklmpquvwxyz023456789~`@#$%&*()+}]|:;<>/\""))
+
+(defn one-or-more [b] [b (list '* b)])
+
+(defn one-or-more-not [b] [(list '% b) (list '* (list '% b))])
(def grammar {
-;Bulk
- :Content [:Element '(* :Element) :$]
- :Element '(| :_+ :Alphanumeric :Markup :.)
- :Markup '(| :HRule :MDash :List :SS :U :H4 :H3 :H2 :H1 :B :I :Href :Pre)
- :Markup-Guard (list '! (list '| :HRule :MDash :Unordered-List-Marker :Ordered-List-Marker
- \^ (pegs "__") (pegs "====") (pegs "===") (pegs "==") \= (pegs "'''") (pegs "''") \[ (pegs "{{{")))
- :Non-Markup [:Markup-Guard :Any-Char]
- :NM+ [:Non-Markup '(* :Non-Markup)]
-; Whitespace
- :N \newline
- :_ \space
- :_* '(* :_)
- :_+ [:_ '(* :_)]
-; Odds-n-Ends
- :Any-Char '(| :Empty-Line :Escaped-Char :.)
- :Empty-Line [:N :N]
- :Escaped-Char [\! (pegs "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\\[=]")]
- :Alphanumeric (pegs "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") ; No markup ever starts with an alphanumeric character
-;Links
- :Href '(| :Href-Straight :Href-Explained)
- :Href-Straight [\[ :Href-Body \]]
- :Href-Explained [\[ :Href-Text :_ :Href-Body \]]
- :Href-Body [:Href-Body-Part '(* :Href-Body-Part)]
- :Href-Body-Part '(% (| \] \space))
- :Href-Text ['(% \space) '(* (% \space))]
-;Lists
- :Unordered-List-Marker (pegs "-- ")
- :Ordered-List-Marker (pegs "-. ")
- :List '(| :Ordered-List :Unordered-List)
- :Ordered-List [:_* :Ordered-List-Marker :List-Body '(* [:_* :Ordered-List-Marker :List-Body])]
- :Unordered-List [:_* :Unordered-List-Marker :List-Body '(* [:_* :Unordered-List-Marker :List-Body])]
- :List-Body [:List-Chunk '(* :List-Chunk)]
- :List-Chunk '(| :SS :U :B :I :Href :Pre :.)
-;Superscript
- :SS [\^ :SS-Body \^]
- :SS-Body ['(! \^) :SS-Chunk '(* [(! \^) :SS-Chunk])]
- :SS-Chunk '(| :U :B :I :Href :.)
-;Pre
- :Pre (delimited (pegs "{{{") (pegs "}}}"))
-;Remaining
- :H1 (delimited \=)
- :H2 (delimited (pegs "=="))
- :H3 (delimited (pegs "==="))
- :H4 (delimited (pegs "===="))
- :B (delimited (pegs "'''"))
- :I (delimited (pegs "''"))
- :U (delimited (pegs "__"))
- :HRule (pegs "----")
- :MDash (pegs "---")})
+ :Content '(* (| :SafeChar :Markup :UnsafeChar))
+ :Markup '(| :HRule :MDash :List :SS :U :B :I :Href :Pre :H4 :H3 :H2 :H1)
+ :HRule (pegs "----")
+ :MDash (pegs "---")
+ :List '(| :OrderedList :UnorderedList)
+ :OrderedList (one-or-more [(pegs "1. ") :ListBody])
+ :UnorderedList (one-or-more [(pegs "-- ") :ListBody])
+ :ListBody [:ListContent \newline '(* (| \newline \space \tab))]
+ :ListContent '(* (| :ListSafeChar :SS :U :B :I :Href :Pre (% \newline)))
+ :SS (delimited \^)
+ :H4 (delimited-body "====" \=)
+ :H3 (delimited-body "===" \=)
+ :H2 (delimited-body "==" \=)
+ :H1 (delimited-body "=" \=)
+ :B (delimited (pegs "'''"))
+ :I (delimited (pegs "''"))
+ :U (delimited (pegs "__"))
+ :Pre (containing (pegs "{{{") :PreContent (pegs "}}}"))
+ :PreContent '(| [\! \}] (% [\} \} \}]))
+ :Href [\[ (list '| (delimited \[ \]) :HrefExplained) \]]
+ :HrefExplained [(one-or-more-not \space) \space (one-or-more-not \])]
+ :ListSafeChar (list '| \space :EscapedChar (a-frequency-ordered-char) \tab)
+ :SafeChar (list '| :EmptyLine :ListSafeChar \newline)
+ :EmptyLine [\newline \newline]
+ :EscapedChar [\! :UnsafeChar]
+ :UnsafeChar (lpegs '| "ETAOINSRH1BCDFGJKLMPQUVWXYZ!\\[={_^'-")
+})
+
+(defn list-safe-to-html [l] (if (map? l) (first (vals l)) l))
+
+(defn safe-to-html [l] (if (map? l) (first (vals l)) l))
+
+(defn content-to-html [l]
+ (cond
+ (map? l) (first (vals l))
+ (seq? l) (apply str (map content-to-html l))
+ true l))
+
+(defn one-or-more-to-str [l]
+ (if (char? (second l))
+ (apply str l)
+ (reduce (fn [a b] (str a b))
+ (first l)
+ (second l))))
+
+(defn delimited-to-html
+ ([l c] (delimited-to-html l c "span"))
+ ([l c t] (if (not (nil? l))
+ (str "<" t " class='" c "'>" (one-or-more-to-str (butlast (rest l))) "</" t ">"))))
+
+(defn href-to-html [v]
+ (if (not (nil? v))
+ (if (vector? (second v))
+ (let [l (one-or-more-to-str (butlast (rest (second v))))]
+ (str "<a href='" l "'>" l "</a>"))
+ (let [inside (first (vals (second v)))
+ link (one-or-more-to-str (first inside))
+ explanation (one-or-more-to-str (last inside))]
+ (str "<a href='" link "'>" explanation "</a>")))))
+
+(defn pre-to-html[v]
+ (if (not (nil? v))
+ (let [v (butlast (rest v))]
+ (if (= \newline (first (vals (first v))))
+ (str "<div class='pre-block'>"
+ (apply str (map #(first (vals %))
+ (butlast (second v))))
+ "</div>")
+ (str "<div class='pre-inline'>"
+ (reduce (fn [a b] (str a (first (vals b))))
+ (first (vals (first v)))
+ (second v))
+ "</div>")))))
+
+(defn list-to-html
+ ([v] (list-to-html v "ol"))
+ ([v t]
+ (if (not (nil? v))
+ (str "<" t ">"
+ (reduce (fn [a b] (if (empty? b) a (str a (str "<li>" (first (vals (last b))) "</li>"))))
+ (str "<li>" (first (vals (last (first v)))) "</li>")
+ (rest v))
+ "</" t ">"))))
+
+(defn list-content-to-html [l]
+ (reduce (fn [a b] (str a (if (map? b) (first (vals b)) b)))
+ (first l)
+ (rest l)))
+
+(def #^{:private true} to-html-fns {:Content content-to-html
+ :EscapedChar #(first (vals (second %)))
+ :ListSafeChar list-safe-to-html
+ :SafeChar safe-to-html
+ :MDash #(if (not (nil? %)) "&mdash;")
+ :HRule #(if (not (nil? %)) "<hr />")
+ :SS #(delimited-to-html % "superscript")
+ :U #(delimited-to-html % "underline")
+ :B #(delimited-to-html % "bold")
+ :I #(delimited-to-html % "italic")
+ :H4 #(delimited-to-html % "H4" "div")
+ :H3 #(delimited-to-html % "H3" "div")
+ :H2 #(delimited-to-html % "H2" "div")
+ :H1 #(delimited-to-html % "H1" "div")
+ :Pre pre-to-html
+ :Href href-to-html
+ :List #(first (vals %))
+ :Markup #(first (vals %))
+ :ListBody #(first (vals (first %)))
+ :OrderedList list-to-html
+ :UnorderedList #(list-to-html % "ul")
+ :ListContent #(list-content-to-html %)
+ :EmptyLine #(if (not (nil? %)) "<div class='empty-line' />") })
+
+(defn to-html
+ ([s] (to-html s grammar))
+ ([s g] (post-process :Content g (wrap-string s) to-html-fns)))
View
49 test/com/lithinos/amotoen/test/minimark.clj
@@ -13,3 +13,52 @@
(:use [clojure.pprint])
(:use [com.lithinos.amotoen.grammars.minimark]))
+(deftest single-safe-char (is (= \a (to-html "a"))))
+(deftest single-escaped-char (is (= \A (to-html "!A"))))
+(deftest multiple-safe-char (is (= "aaa" (to-html "aaa"))))
+(deftest multiple-escaped-char (is (= "AAA" (to-html "!A!A!A"))))
+(deftest mixed-char (is (= "aA" (to-html "a!A"))))
+(deftest multiple-mixed-char (is (= "aAaAaA" (to-html "a!Aa!Aa!A"))))
+
+(deftest single-safe-newline (is (= \newline (to-html "\n"))))
+(deftest single-safe-empty-line (is (= "<div class='empty-line' />" (to-html "\n\n"))))
+(deftest single-safe-space (is (= \space (to-html " "))))
+(deftest single-safe-tab (is (= \tab (to-html "\t"))))
+
+(deftest mix-safe-chars-and-whitepsace1 (is (= " a\tAa aaa<div class='empty-line' />\t\taA\n" (to-html " a\t!Aa aaa\n\n\t\ta!A\n"))))
+
+(deftest hrule (is (= "<hr />" (to-html "----"))))
+(deftest mdash (is (= "&mdash;" (to-html "---"))))
+(deftest superscript (is (= "<span class='superscript'>SuperScript</span>" (to-html "^SuperScript^"))))
+(deftest h4 (is (= "<div class='H4'> H4 </div>" (to-html "==== H4 ===="))))
+(deftest h3 (is (= "<div class='H3'> H3 </div>" (to-html "=== H3 ==="))))
+(deftest h2 (is (= "<div class='H2'> H2 </div>" (to-html "== H2 =="))))
+(deftest h1 (is (= "<div class='H1'> H1 </div>" (to-html "= H1 ="))))
+(deftest bold (is (= "<span class='bold'>Bold</span>" (to-html "'''Bold'''"))))
+(deftest italics (is (= "<span class='italic'>Italics</span>" (to-html "''Italics''"))))
+(deftest underlined (is (= "<span class='underline'>underlined</span>" (to-html "__underlined__"))))
+(deftest inline-pre (is (= "<div class='pre-inline'>Inline Pre</div>" (to-html "{{{Inline Pre}}}"))))
+(deftest block-pre (is (= "<div class='pre-block'>Block\nPre</div>" (to-html "{{{\nBlock\nPre\n}}}"))))
+(deftest href (is (= "<a href='href'>href</a>" (to-html "[[href]]"))))
+(deftest href-explained (is (= "<a href='href'>explained</a>" (to-html "[href explained]"))))
+
+(deftest list-numbered (is (= "<ol><li>Item</li></ol>" (to-html "1. Item\n"))))
+(deftest list-unordered (is (= "<ul><li>Item</li></ul>" (to-html "-- Item\n"))))
+
+(deftest multi-list-numbered (is (= "<ol><li>Item 1</li><li>Item 2</li></ol>" (to-html "1. Item 1\n1. Item 2\n"))))
+(deftest multi-list-unordered (is (= "<ul><li>Item 1</li><li>Item 2</li></ul>" (to-html "-- Item 1\n-- Item 2\n"))))
+
+(deftest list-numbered-with-markup
+ (is (= "<ol><li>Item <span class='superscript'>ss</span> <span class='underline'>u</span> <span class='bold'>b</span> <span class='italic'>i</span> <a href='href'>href</a> <div class='pre-inline'>pre</div> 1</li></ol>"
+ (to-html "1. Item ^ss^ __u__ '''b''' ''i'' [[href]] {{{pre}}} 1\n"))))
+(deftest list-unordered-with-markup
+ (is (= "<ul><li>Item <span class='superscript'>ss</span> <span class='underline'>u</span> <span class='bold'>b</span> <span class='italic'>i</span> <a href='href'>href</a> <div class='pre-inline'>pre</div> 1</li></ul>"
+ (to-html "-- Item ^ss^ __u__ '''b''' ''i'' [[href]] {{{pre}}} 1\n"))))
+
+(deftest multi-list-numbered-with-markup
+ (is (= "<ol><li>Item <span class='superscript'>ss</span> <span class='underline'>u</span> <span class='bold'>b</span> <span class='italic'>i</span> <a href='href'>href</a> <div class='pre-inline'>pre</div> 1</li><li>Item <span class='superscript'>ss</span> <span class='underline'>u</span> <span class='bold'>b</span> <span class='italic'>i</span> <a href='href'>href</a> <div class='pre-inline'>pre</div> 1</li></ol>"
+ (to-html "1. Item ^ss^ __u__ '''b''' ''i'' [[href]] {{{pre}}} 1\n1. Item ^ss^ __u__ '''b''' ''i'' [[href]] {{{pre}}} 1\n"))))
+(deftest multi-list-unordered-with-markup
+ (is (= "<ul><li>Item <span class='superscript'>ss</span> <span class='underline'>u</span> <span class='bold'>b</span> <span class='italic'>i</span> <a href='href'>href</a> <div class='pre-inline'>pre</div> 1</li><li>Item <span class='superscript'>ss</span> <span class='underline'>u</span> <span class='bold'>b</span> <span class='italic'>i</span> <a href='href'>href</a> <div class='pre-inline'>pre</div> 1</li></ul>"
+ (to-html "-- Item ^ss^ __u__ '''b''' ''i'' [[href]] {{{pre}}} 1\n-- Item ^ss^ __u__ '''b''' ''i'' [[href]] {{{pre}}} 1\n"))))
+
Please sign in to comment.
Something went wrong with that request. Please try again.