Permalink
Browse files

More work on finding and resolving sore spots.

  • Loading branch information...
1 parent 50da08b commit 9151e176745ef7eac566dfba7211d167e5b97a71 @richard-lyman committed Jul 19, 2012
Showing with 196 additions and 87 deletions.
  1. +132 −86 src/com/lithinos/amotoen/core.clj
  2. +64 −1 test/com/lithinos/amotoen/test/core.clj
View
218 src/com/lithinos/amotoen/core.clj
@@ -15,7 +15,9 @@
; n - next character in input
(declare pegasus)
+
(def ^:dynamic *currentK* (ref nil))
+
(defprotocol IPosition
(psdebug [t] "Some form of helpful debug info")
(in [t] "Indent - for debugging")
@@ -26,19 +28,35 @@
(end [t] "End of input")
(m [t] "Returns the 'c' then (inc pos)")
(c [t] "The character at pos"))
-(defn gen-ps
- ([s] (gen-ps s 0))
- ([s j]
+
+(defn gen-ps ; 'ps' is for a 'p'eggable 's'tring
+ ([#^String s] (gen-ps s 0))
+ ([#^String s j] ; 'j' is where we currently are in the input... 'i' was skipped since it was too close to 'i'nput
(let [j (ref j)
indent (ref 0)]
(reify IPosition
(psdebug [t]
(let [indent-string (apply str (take @indent (repeat " ")))
padding (apply str (take 60 (repeat " ")))
- x (str "'" (pr-str (try (subs s (max 0 (- @j 30)) (max 0 @j)) (catch Exception e ""))) "'")
- y (str " " (pr-str (c t)) " ")
- z (str "'" (pr-str (try (subs s (inc @j) (min (+ @j 30) (count s))) (catch Exception e ""))) "'")]
- (str (subs (str (if (< @j 0) (str "<-" (subs s 0 20)) (str x y z)) padding) 0 60) indent-string)))
+ before_j (str "'" (pr-str (try
+ (subs s
+ (max 0 (- @j 30))
+ (max 0 @j))
+ (catch Exception e ""))) "'")
+ at_j (str " " (pr-str (c t)) " ")
+ after_j (str "'" (pr-str (try
+ (subs s
+ (inc @j)
+ (min (+ @j 30) (count s)))
+ (catch Exception e ""))) "'")]
+ (str (subs
+ (str (if (< @j 0)
+ (str "<-" (subs s 0 20))
+ (str before_j at_j after_j))
+ padding)
+ 0
+ 60)
+ indent-string)))
(in [t] (dosync (alter indent inc)))
(de [t] (dosync (alter indent dec)))
(gp [t] @j)
@@ -47,42 +65,84 @@
(end [t] (= @j (count s)))
(m [t] (let [r (c t)] (dosync (alter j inc)) r))
(c [t] (try (.charAt s @j) (catch Exception e nil)))))))
+
(defn lpegs [t s] (reverse (into '() (cons t (seq s))))) ; This doesn't need to be fast, but shouldn't the following work? (list (cons t (seq s)))
(defn pegs [s] (vec (seq s)))
(def ^:dynamic *debug* (ref false))
-(defn- debug [w & args] (when @*debug* (print (psdebug w)) (print @*currentK* " ") (apply println args) (flush)))
+(defn- debug [w & args]
+ (when @*debug*
+ (print (psdebug w))
+ (print @*currentK* " ")
+ (apply println args)
+ (flush)))
(def #^{:private true} grammar-grammar {
- :Whitespace '(| \space \newline \tab \,)
:_* '(* :Whitespace)
:_ [:Whitespace '(* :Whitespace)]
:Grammar [\{ :_* :Rule '(* [:_ :Rule]) :_* \}]
:Rule [:Keyword :_ :Body]
- :Keyword [\: :ValidKeywordChar '(* :ValidKeywordChar)]
- :Body '(| :Keyword :Char :Grouping)
- :Grouping '(| :Sequence :Either :ZeroOrMore :AnyNot)
- :Sequence [\[ :_* :Body '(* [:_* :Body]) :_* \]]
- :Either [\( \| :_ :Body '(* [:_* :Body]) :_* \)]
- :ZeroOrMore [\( \* :_ :Body :_* \)]
- :AnyNot [\( \% :_ :Body :_* \)]
- :Char [\\ (list '| (pegs "tab") (pegs "space") (pegs "newline") '(% \space))]
- :ValidKeywordChar (lpegs '| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789:/*+!_?-")
+ :Keyword [\: :AmotoenSymbol]
+ :Body '(| :Keyword :Char :Grouping :AnyNot :AwareFunction :Function)
+ :Grouping '(| :Sequence :Either :ZeroOrMore)
+ :Sequence [\[ :_* :Body '(* [:_* :Body]) :_* \]]
+ :Either [\( \| :_ :Body '(* [:_* :Body]) :_* \)]
+ :ZeroOrMore [\( \* :_ :Body :_* \)]
+ :AnyNot [\( \% :_ :Body :_* \)]
+ :AwareFunction [\( \a :_ :Symbol :_ :Body :_* \)]
+ :Function [\( \f :_ :Symbol :_ :Body :_* \)]
+ :Whitespace '(| \space \newline \tab \,)
+ :Char [\\ (list '| (pegs "tab") (pegs "space") (pegs "newline") '(% \space))]
+ :Symbol '(| \/ :AmotoenSymbol)
+ :AmotoenSymbol [:NonNumericCharacter '(* :AlphanumericCharactersPlus)] ; _Not_ the same as a Clojure Symbol
+ :NonNumericCharacter (list '% (lpegs '| "0123456789"))
+ :AlphanumericCharactersPlus (lpegs '| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789:/*+!-_?.")
})
-(defn- either [n g w] (let [original (gp w)] (first (keep #(do (sp w original) (pegasus % g w)) (rest n)))))
+(defn- either [n g w]
+ #_(let [original (gp w)]
+ (loop [remaining (rest n)]
+ (if (nil? (seq remaining))
+ nil
+ (do
+ (sp w original)
+ (let [result (pegasus (first remaining) g w)]
+ (if (nil? result)
+ (recur (rest remaining))
+ result))))))
+ (let [original (gp w)] ; Why is this still the fastest?
+ (first
+ (keep
+ #(do
+ (sp w original)
+ (pegasus % g w))
+ (rest n))))
+ #_(let [original (gp w)]
+ #_(println "Processing either:" (first n) (rest n))
+ (first
+ (filter #(do #_(println "checking for nil:" %) (not (nil? %)))
+ (doall
+ (pmap
+ #(do #_(println "Running with:" %) (pegasus % g (clone w)))
+ (rest n))))))
+)
(defn- any-not [b g w]
(let [c (c w) p (gp w)]
(if (pegasus b g w)
(do (sp w p) nil) ; If we succeed, then we fail - that's the point of AnyNot... and rollback
- (do (debug w "AnyNot MATCH:" (pr-str b) c) (m w) c)))); If we fail, then we accept the current char
+ (do
+ #_(debug w "AnyNot MATCH:" (pr-str b) c)
+ (m w)
+ c)))); If we fail, then we accept the current char
(defn- try-char [n w]
(if (= n (c w))
- (do (debug w (str "MATCH: '" (pr-str n) "' with '" (pr-str (c w)) "'"))
+ (do
+ #_(debug w (str "MATCH: '" (pr-str n) "' with '" (pr-str (c w)) "'"))
(m w))
- (do #_(debug w (str "FAIL: '" (pr-str n) "' with '" (pr-str (c w)) "'"))
+ (do
+ #_(debug w (str "FAIL: '" (pr-str n) "' with '" (pr-str (c w)) "'"))
nil)))
(defn- peg-vec [n g w]
@@ -92,32 +152,50 @@
(if (empty? remaining)
result
(let [temp (pegasus (first remaining) g w)]
- (if temp (recur (rest remaining)
- (conj result temp))
- (do (sp w p)
- nil)))))))
+ (if temp
+ (recur (rest remaining)
+ (conj result temp))
+ (do
+ (sp w p)
+ nil)))))))
(defn- typed-list [n g w]
(let [t (first n)
b (second n)
- result (cond (= t '|) (let [temp (either n g w)] #_(debug w "Either returning:" (pr-str temp)) temp)
- (= t '%) (any-not b g w)
- (= t '*) (doall (take-while #(if (keyword? b) (b %) %)
- (repeatedly #(pegasus b g w))))
- (ifn? t) (t (pegasus (if (symbol? b) (rest n) b) g w)))]
- (if (and (seq? result) (= 1 (count result)))
+ result (cond (= t '|) (let [temp (either n g w)]
+ #_(debug w "Either returning:" (pr-str temp))
+ temp)
+ (= t '%) (any-not b g w)
+ (= t '*) (doall (take-while #(if (keyword? b)
+ (b %)
+ %)
+ (repeatedly #(pegasus b g w))))
+ (= t 'a) (b g w (pegasus (first (rest (rest n))) g w))
+ (= t 'f) (b (pegasus (first (rest (rest n))) g w)))]
+ (if (and
+ (seq? result)
+ (nil? (seq (rest result)))
+ (not (nil? (first result)))
+ )
(first result)
result)))
-(defn- p [w s n] (debug w s (pr-str n)))
-(defn- fp [w s n] (dosync (ref-set *debug* true)) (p w "c:" n) (dosync (ref-set *debug* false)))
+(defn- p [w s n] #_(debug w s (pr-str n)))
+(defn- fp [w s n]
+ (dosync (ref-set *debug* true))
+ (p w "c:" n)
+ (dosync (ref-set *debug* false)))
-; Accept a 'debug limit' - if 0 then always dump everything. If more than 0, only keep limit number of lines of debug and print out at end
(defn pegasus [n g w]
(in w)
(when (keyword? n) (dosync (ref-set *currentK* n)))
(let [result (cond
- (keyword? n)(do (p w "k:" n) (let [temp (pegasus (n g) g w)] (if temp {n temp} nil)))
+ (keyword? n)(do (p w "k:" n)
+ (when (nil? (n g)) (throw (Error. (str "Keyword '" n "' does not exist in grammar"))))
+ (let [temp (pegasus (n g) g w)]
+ (if temp
+ {n temp}
+ nil)))
(vector? n) (do #_(p w "v:" n) (peg-vec n g w))
(list? n) (do #_(p w "l:" n) (typed-list n g w))
(char? n) (do #_(p w "c:" n) (try-char n w))
@@ -126,57 +204,25 @@
(de w)
result))
-; If pegasus is given a keyword, but it doesn't exist in the given grammar, a useful error should be thrown
-(defn validate ([g] (validate g false))
+(defn validate
+ ([g] (validate g false))
([g d]
(dosync (ref-set *debug* d))
- (let [w (gen-ps (pr-str g))]
- (if (or (nil? (pegasus :Grammar grammar-grammar w))
- (not (end w)))
- (println "Fail")
- (println "Pass")))
- (dosync (ref-set *debug* false))))
-
-(defn vectors-reset-pos []
- (let [g {:S [(list '* (list '% (pegs "}}}"))) (pegs "}}}")]}
- i "a}}b}}}"
- r (pegasus :S g (gen-ps i))]
- (when (not= '{:S [(\a \} \} \b) [\} \} \}]]} r)
- (throw (Error. "Failed Vectors are not resetting the pos.")))
- (println "Pass")))
-
-(defn collapse-pegs []
- (let [custom-collapse #(apply str %)
- g {:S [(list custom-collapse (pegs "abcabc"))]}
- i "abcabc"
- r (pegasus :S g (gen-ps i))]
- (when (not= '{:S ["abcabc"]} r)
- (throw (Error. (str "pegs didn't collapse: " r))))
- (println "Pass")))
-
-(defn collapse-lpegs []
- (let [custom-collapse #(apply str %)
- g {:S [(list custom-collapse '* (lpegs '| "abc"))]}
- i "aabbcc"
- r (pegasus :S g (gen-ps i))]
- (when (not= '{:S ["aabbcc"]} r)
- (throw (Error. (str "lpegs didn't collapse: " r))))
- (println "Pass")))
-
-(defn collapse-keywords []
- (let [custom-collapse (fn [r] (apply str (map #(first (vals %)) r)))
- g {:S [(list custom-collapse '* '(| :A :B :C))] :A \a :B \b :C \c }
- i "aabbcc"
- r (pegasus :S g (gen-ps i))]
- (when (not= '{:S ["aabbcc"]} r)
- (throw (Error. (str "keywords didn't collapse: " r))))
- (println "Pass")))
-
-(defn self-check []
- (validate grammar-grammar)
- (vectors-reset-pos)
- (collapse-lpegs)
- (collapse-keywords)
- (collapse-pegs)
+ (let [w (gen-ps (pr-str g))
+ temp (pegasus :Grammar grammar-grammar w)
+ r (or (nil? temp)
+ (not (end w)))]
+ (dosync (ref-set *debug* false))
+ [r, temp])))
+
+(defn self-check [] (validate grammar-grammar))
+(defn self-ast []
+ (dosync (ref-set *debug* false))
+ (let [r (pr-str (pegasus
+ :Grammar
+ grammar-grammar
+ (gen-ps (pr-str grammar-grammar))))]
+ (dosync (ref-set *debug* false))
+ r)
)
View
65 test/com/lithinos/amotoen/test/core.clj
@@ -38,6 +38,69 @@ a")))
a")))))
+(defn test-grammars []
+ (doseq [g [ {:S \a}
+ {:S (pegs "}}}")}
+ {:S (list '% (pegs "}}}"))}
+ {:S (list '* (list '% (pegs "}}}")))}]]
+ (when (not (first (validate g))) (throw (Error. (str "Invalid grammar: " (pr-str g)))))))
+
+(defn vectors-reset-pos []
+ (let [g {:S [(list '* (list '% (pegs "}}}"))) (pegs "}}}")]}
+ i "a}}b}}}"
+ r (pegasus :S g (gen-ps i))]
+ (when (not= '{:S [(\a \} \} \b) [\} \} \}]]} r) (throw (Error. "Failed Vectors are not resetting the pos.")))
+ true))
+
+(defn collapse-pegs []
+ (let [custom-collapse #(apply str %)
+ g {:S [(list 'f custom-collapse (pegs "abcabc"))]}
+ i "abcabc"
+ r (pegasus :S g (gen-ps i))]
+ (when (not= '{:S ["abcabc"]} r) (throw (Error. (str "pegs didn't collapse: " r))))
+ true))
+
+(defn collapse-lpegs []
+ (let [custom-collapse #(apply str %)
+ g {:S [(list 'f custom-collapse (list '* (lpegs '| "abc")))]}
+ i "aabbcc"
+ r (pegasus :S g (gen-ps i))]
+ (when (not= '{:S ["aabbcc"]} r) (throw (Error. (str "lpegs didn't collapse: " r))))
+ true))
+
+(defn collapse-keywords []
+ (let [custom-collapse (fn [r] (apply str (map #(first (vals %)) r)))
+ g {:S [(list 'f custom-collapse '(* (| :A :B :C)))] :A \a :B \b :C \c }
+ i "aabbcc"
+ r (pegasus :S g (gen-ps i))]
+ (when (not= '{:S ["aabbcc"]} r) (throw (Error. (str "keywords didn't collapse: " r))))
+ true))
+
;(dosync (ref-set *debug* true))
(println "Single run") (time (self-check))
-;(println "20 runs") (time (doall (take 20 (repeatedly #(self-check)))))
+;(println "Single run" (self-check))
+;(println "Dump" (time (self-ast)))
+;(println "10 runs") (time (doall (take 10 (repeatedly #(self-check)))))
+(println "40 runs") (time (doall (take 40 (repeatedly #(self-check)))))
+
+;(println "start")
+;(let [i (ref 0) j (ref 0)]
+; (println (first (keep #(do (dosync (alter i inc)) (if (even? %) :a nil)) [1 1 1 1 2 1 1 1 2])))
+; (println (loop []
+; (dosync (alter j inc))
+; (when (< (rand-int 100) 90)
+; (recur))))
+; (println "end: " @i @j))
+
+
+;(test-grammars)
+;(vectors-reset-pos)
+;(collapse-lpegs)
+;(collapse-keywords)
+;(collapse-pegs)
+
+#_(try
+ (pegasus :S {:A :B} (gen-ps "fail"))
+ (throw (Error. "A useful error should be thrown when a keyword doesn't exist in a grammar"))
+ (catch Error e))
+

0 comments on commit 9151e17

Please sign in to comment.