Skip to content

Commit

Permalink
More work on finding and resolving sore spots.
Browse files Browse the repository at this point in the history
  • Loading branch information
richard-lyman committed Jul 19, 2012
1 parent 50da08b commit 9151e17
Show file tree
Hide file tree
Showing 2 changed files with 196 additions and 87 deletions.
218 changes: 132 additions & 86 deletions src/com/lithinos/amotoen/core.clj
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
; n - next character in input ; n - next character in input


(declare pegasus) (declare pegasus)

(def ^:dynamic *currentK* (ref nil)) (def ^:dynamic *currentK* (ref nil))

(defprotocol IPosition (defprotocol IPosition
(psdebug [t] "Some form of helpful debug info") (psdebug [t] "Some form of helpful debug info")
(in [t] "Indent - for debugging") (in [t] "Indent - for debugging")
Expand All @@ -26,19 +28,35 @@
(end [t] "End of input") (end [t] "End of input")
(m [t] "Returns the 'c' then (inc pos)") (m [t] "Returns the 'c' then (inc pos)")
(c [t] "The character at pos")) (c [t] "The character at pos"))
(defn gen-ps
([s] (gen-ps s 0)) (defn gen-ps ; 'ps' is for a 'p'eggable 's'tring
([s j] ([#^String s] (gen-ps s 0))
([#^String s j] ; 'j' is where we currently are in the input... 'i' was skipped since it was too close to 'i'nput
(let [j (ref j) (let [j (ref j)
indent (ref 0)] indent (ref 0)]
(reify IPosition (reify IPosition
(psdebug [t] (psdebug [t]
(let [indent-string (apply str (take @indent (repeat " "))) (let [indent-string (apply str (take @indent (repeat " ")))
padding (apply str (take 60 (repeat " "))) padding (apply str (take 60 (repeat " ")))
x (str "'" (pr-str (try (subs s (max 0 (- @j 30)) (max 0 @j)) (catch Exception e ""))) "'") before_j (str "'" (pr-str (try
y (str " " (pr-str (c t)) " ") (subs s
z (str "'" (pr-str (try (subs s (inc @j) (min (+ @j 30) (count s))) (catch Exception e ""))) "'")] (max 0 (- @j 30))
(str (subs (str (if (< @j 0) (str "<-" (subs s 0 20)) (str x y z)) padding) 0 60) indent-string))) (max 0 @j))
(catch Exception e ""))) "'")
at_j (str " " (pr-str (c t)) " ")
after_j (str "'" (pr-str (try
(subs s
(inc @j)
(min (+ @j 30) (count s)))
(catch Exception e ""))) "'")]
(str (subs
(str (if (< @j 0)
(str "<-" (subs s 0 20))
(str before_j at_j after_j))
padding)
0
60)
indent-string)))
(in [t] (dosync (alter indent inc))) (in [t] (dosync (alter indent inc)))
(de [t] (dosync (alter indent dec))) (de [t] (dosync (alter indent dec)))
(gp [t] @j) (gp [t] @j)
Expand All @@ -47,42 +65,84 @@
(end [t] (= @j (count s))) (end [t] (= @j (count s)))
(m [t] (let [r (c t)] (dosync (alter j inc)) r)) (m [t] (let [r (c t)] (dosync (alter j inc)) r))
(c [t] (try (.charAt s @j) (catch Exception e nil))))))) (c [t] (try (.charAt s @j) (catch Exception e nil)))))))

(defn lpegs [t s] (reverse (into '() (cons t (seq s))))) ; This doesn't need to be fast, but shouldn't the following work? (list (cons t (seq s))) (defn lpegs [t s] (reverse (into '() (cons t (seq s))))) ; This doesn't need to be fast, but shouldn't the following work? (list (cons t (seq s)))
(defn pegs [s] (vec (seq s))) (defn pegs [s] (vec (seq s)))


(def ^:dynamic *debug* (ref false)) (def ^:dynamic *debug* (ref false))
(defn- debug [w & args] (when @*debug* (print (psdebug w)) (print @*currentK* " ") (apply println args) (flush))) (defn- debug [w & args]
(when @*debug*
(print (psdebug w))
(print @*currentK* " ")
(apply println args)
(flush)))


(def #^{:private true} grammar-grammar { (def #^{:private true} grammar-grammar {
:Whitespace '(| \space \newline \tab \,)
:_* '(* :Whitespace) :_* '(* :Whitespace)
:_ [:Whitespace '(* :Whitespace)] :_ [:Whitespace '(* :Whitespace)]
:Grammar [\{ :_* :Rule '(* [:_ :Rule]) :_* \}] :Grammar [\{ :_* :Rule '(* [:_ :Rule]) :_* \}]
:Rule [:Keyword :_ :Body] :Rule [:Keyword :_ :Body]
:Keyword [\: :ValidKeywordChar '(* :ValidKeywordChar)] :Keyword [\: :AmotoenSymbol]
:Body '(| :Keyword :Char :Grouping) :Body '(| :Keyword :Char :Grouping :AnyNot :AwareFunction :Function)
:Grouping '(| :Sequence :Either :ZeroOrMore :AnyNot) :Grouping '(| :Sequence :Either :ZeroOrMore)
:Sequence [\[ :_* :Body '(* [:_* :Body]) :_* \]] :Sequence [\[ :_* :Body '(* [:_* :Body]) :_* \]]
:Either [\( \| :_ :Body '(* [:_* :Body]) :_* \)] :Either [\( \| :_ :Body '(* [:_* :Body]) :_* \)]
:ZeroOrMore [\( \* :_ :Body :_* \)] :ZeroOrMore [\( \* :_ :Body :_* \)]
:AnyNot [\( \% :_ :Body :_* \)] :AnyNot [\( \% :_ :Body :_* \)]
:Char [\\ (list '| (pegs "tab") (pegs "space") (pegs "newline") '(% \space))] :AwareFunction [\( \a :_ :Symbol :_ :Body :_* \)]
:ValidKeywordChar (lpegs '| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789:/*+!_?-") :Function [\( \f :_ :Symbol :_ :Body :_* \)]
:Whitespace '(| \space \newline \tab \,)
:Char [\\ (list '| (pegs "tab") (pegs "space") (pegs "newline") '(% \space))]
:Symbol '(| \/ :AmotoenSymbol)
:AmotoenSymbol [:NonNumericCharacter '(* :AlphanumericCharactersPlus)] ; _Not_ the same as a Clojure Symbol
:NonNumericCharacter (list '% (lpegs '| "0123456789"))
:AlphanumericCharactersPlus (lpegs '| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789:/*+!-_?.")
}) })


(defn- either [n g w] (let [original (gp w)] (first (keep #(do (sp w original) (pegasus % g w)) (rest n))))) (defn- either [n g w]
#_(let [original (gp w)]
(loop [remaining (rest n)]
(if (nil? (seq remaining))
nil
(do
(sp w original)
(let [result (pegasus (first remaining) g w)]
(if (nil? result)
(recur (rest remaining))
result))))))
(let [original (gp w)] ; Why is this still the fastest?
(first
(keep
#(do
(sp w original)
(pegasus % g w))
(rest n))))
#_(let [original (gp w)]
#_(println "Processing either:" (first n) (rest n))
(first
(filter #(do #_(println "checking for nil:" %) (not (nil? %)))
(doall
(pmap
#(do #_(println "Running with:" %) (pegasus % g (clone w)))
(rest n))))))
)


(defn- any-not [b g w] (defn- any-not [b g w]
(let [c (c w) p (gp w)] (let [c (c w) p (gp w)]
(if (pegasus b g w) (if (pegasus b g w)
(do (sp w p) nil) ; If we succeed, then we fail - that's the point of AnyNot... and rollback (do (sp w p) nil) ; If we succeed, then we fail - that's the point of AnyNot... and rollback
(do (debug w "AnyNot MATCH:" (pr-str b) c) (m w) c)))); If we fail, then we accept the current char (do
#_(debug w "AnyNot MATCH:" (pr-str b) c)
(m w)
c)))); If we fail, then we accept the current char


(defn- try-char [n w] (defn- try-char [n w]
(if (= n (c w)) (if (= n (c w))
(do (debug w (str "MATCH: '" (pr-str n) "' with '" (pr-str (c w)) "'")) (do
#_(debug w (str "MATCH: '" (pr-str n) "' with '" (pr-str (c w)) "'"))
(m w)) (m w))
(do #_(debug w (str "FAIL: '" (pr-str n) "' with '" (pr-str (c w)) "'")) (do
#_(debug w (str "FAIL: '" (pr-str n) "' with '" (pr-str (c w)) "'"))
nil))) nil)))


(defn- peg-vec [n g w] (defn- peg-vec [n g w]
Expand All @@ -92,32 +152,50 @@
(if (empty? remaining) (if (empty? remaining)
result result
(let [temp (pegasus (first remaining) g w)] (let [temp (pegasus (first remaining) g w)]
(if temp (recur (rest remaining) (if temp
(conj result temp)) (recur (rest remaining)
(do (sp w p) (conj result temp))
nil))))))) (do
(sp w p)
nil)))))))


(defn- typed-list [n g w] (defn- typed-list [n g w]
(let [t (first n) (let [t (first n)
b (second n) b (second n)
result (cond (= t '|) (let [temp (either n g w)] #_(debug w "Either returning:" (pr-str temp)) temp) result (cond (= t '|) (let [temp (either n g w)]
(= t '%) (any-not b g w) #_(debug w "Either returning:" (pr-str temp))
(= t '*) (doall (take-while #(if (keyword? b) (b %) %) temp)
(repeatedly #(pegasus b g w)))) (= t '%) (any-not b g w)
(ifn? t) (t (pegasus (if (symbol? b) (rest n) b) g w)))] (= t '*) (doall (take-while #(if (keyword? b)
(if (and (seq? result) (= 1 (count result))) (b %)
%)
(repeatedly #(pegasus b g w))))
(= t 'a) (b g w (pegasus (first (rest (rest n))) g w))
(= t 'f) (b (pegasus (first (rest (rest n))) g w)))]
(if (and
(seq? result)
(nil? (seq (rest result)))
(not (nil? (first result)))
)
(first result) (first result)
result))) result)))


(defn- p [w s n] (debug w s (pr-str n))) (defn- p [w s n] #_(debug w s (pr-str n)))
(defn- fp [w s n] (dosync (ref-set *debug* true)) (p w "c:" n) (dosync (ref-set *debug* false))) (defn- fp [w s n]
(dosync (ref-set *debug* true))
(p w "c:" n)
(dosync (ref-set *debug* false)))


; Accept a 'debug limit' - if 0 then always dump everything. If more than 0, only keep limit number of lines of debug and print out at end
(defn pegasus [n g w] (defn pegasus [n g w]
(in w) (in w)
(when (keyword? n) (dosync (ref-set *currentK* n))) (when (keyword? n) (dosync (ref-set *currentK* n)))
(let [result (cond (let [result (cond
(keyword? n)(do (p w "k:" n) (let [temp (pegasus (n g) g w)] (if temp {n temp} nil))) (keyword? n)(do (p w "k:" n)
(when (nil? (n g)) (throw (Error. (str "Keyword '" n "' does not exist in grammar"))))
(let [temp (pegasus (n g) g w)]
(if temp
{n temp}
nil)))
(vector? n) (do #_(p w "v:" n) (peg-vec n g w)) (vector? n) (do #_(p w "v:" n) (peg-vec n g w))
(list? n) (do #_(p w "l:" n) (typed-list n g w)) (list? n) (do #_(p w "l:" n) (typed-list n g w))
(char? n) (do #_(p w "c:" n) (try-char n w)) (char? n) (do #_(p w "c:" n) (try-char n w))
Expand All @@ -126,57 +204,25 @@
(de w) (de w)
result)) result))


; If pegasus is given a keyword, but it doesn't exist in the given grammar, a useful error should be thrown (defn validate
(defn validate ([g] (validate g false)) ([g] (validate g false))
([g d] ([g d]
(dosync (ref-set *debug* d)) (dosync (ref-set *debug* d))
(let [w (gen-ps (pr-str g))] (let [w (gen-ps (pr-str g))
(if (or (nil? (pegasus :Grammar grammar-grammar w)) temp (pegasus :Grammar grammar-grammar w)
(not (end w))) r (or (nil? temp)
(println "Fail") (not (end w)))]
(println "Pass"))) (dosync (ref-set *debug* false))
(dosync (ref-set *debug* false)))) [r, temp])))


(defn vectors-reset-pos [] (defn self-check [] (validate grammar-grammar))
(let [g {:S [(list '* (list '% (pegs "}}}"))) (pegs "}}}")]} (defn self-ast []
i "a}}b}}}" (dosync (ref-set *debug* false))
r (pegasus :S g (gen-ps i))] (let [r (pr-str (pegasus
(when (not= '{:S [(\a \} \} \b) [\} \} \}]]} r) :Grammar
(throw (Error. "Failed Vectors are not resetting the pos."))) grammar-grammar
(println "Pass"))) (gen-ps (pr-str grammar-grammar))))]

(dosync (ref-set *debug* false))
(defn collapse-pegs [] r)
(let [custom-collapse #(apply str %)
g {:S [(list custom-collapse (pegs "abcabc"))]}
i "abcabc"
r (pegasus :S g (gen-ps i))]
(when (not= '{:S ["abcabc"]} r)
(throw (Error. (str "pegs didn't collapse: " r))))
(println "Pass")))

(defn collapse-lpegs []
(let [custom-collapse #(apply str %)
g {:S [(list custom-collapse '* (lpegs '| "abc"))]}
i "aabbcc"
r (pegasus :S g (gen-ps i))]
(when (not= '{:S ["aabbcc"]} r)
(throw (Error. (str "lpegs didn't collapse: " r))))
(println "Pass")))

(defn collapse-keywords []
(let [custom-collapse (fn [r] (apply str (map #(first (vals %)) r)))
g {:S [(list custom-collapse '* '(| :A :B :C))] :A \a :B \b :C \c }
i "aabbcc"
r (pegasus :S g (gen-ps i))]
(when (not= '{:S ["aabbcc"]} r)
(throw (Error. (str "keywords didn't collapse: " r))))
(println "Pass")))

(defn self-check []
(validate grammar-grammar)
(vectors-reset-pos)
(collapse-lpegs)
(collapse-keywords)
(collapse-pegs)
) )


65 changes: 64 additions & 1 deletion test/com/lithinos/amotoen/test/core.clj
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -38,6 +38,69 @@ a")))
a"))))) a")))))


(defn test-grammars []
(doseq [g [ {:S \a}
{:S (pegs "}}}")}
{:S (list '% (pegs "}}}"))}
{:S (list '* (list '% (pegs "}}}")))}]]
(when (not (first (validate g))) (throw (Error. (str "Invalid grammar: " (pr-str g)))))))

(defn vectors-reset-pos []
(let [g {:S [(list '* (list '% (pegs "}}}"))) (pegs "}}}")]}
i "a}}b}}}"
r (pegasus :S g (gen-ps i))]
(when (not= '{:S [(\a \} \} \b) [\} \} \}]]} r) (throw (Error. "Failed Vectors are not resetting the pos.")))
true))

(defn collapse-pegs []
(let [custom-collapse #(apply str %)
g {:S [(list 'f custom-collapse (pegs "abcabc"))]}
i "abcabc"
r (pegasus :S g (gen-ps i))]
(when (not= '{:S ["abcabc"]} r) (throw (Error. (str "pegs didn't collapse: " r))))
true))

(defn collapse-lpegs []
(let [custom-collapse #(apply str %)
g {:S [(list 'f custom-collapse (list '* (lpegs '| "abc")))]}
i "aabbcc"
r (pegasus :S g (gen-ps i))]
(when (not= '{:S ["aabbcc"]} r) (throw (Error. (str "lpegs didn't collapse: " r))))
true))

(defn collapse-keywords []
(let [custom-collapse (fn [r] (apply str (map #(first (vals %)) r)))
g {:S [(list 'f custom-collapse '(* (| :A :B :C)))] :A \a :B \b :C \c }
i "aabbcc"
r (pegasus :S g (gen-ps i))]
(when (not= '{:S ["aabbcc"]} r) (throw (Error. (str "keywords didn't collapse: " r))))
true))

;(dosync (ref-set *debug* true)) ;(dosync (ref-set *debug* true))
(println "Single run") (time (self-check)) (println "Single run") (time (self-check))
;(println "20 runs") (time (doall (take 20 (repeatedly #(self-check))))) ;(println "Single run" (self-check))
;(println "Dump" (time (self-ast)))
;(println "10 runs") (time (doall (take 10 (repeatedly #(self-check)))))
(println "40 runs") (time (doall (take 40 (repeatedly #(self-check)))))

;(println "start")
;(let [i (ref 0) j (ref 0)]
; (println (first (keep #(do (dosync (alter i inc)) (if (even? %) :a nil)) [1 1 1 1 2 1 1 1 2])))
; (println (loop []
; (dosync (alter j inc))
; (when (< (rand-int 100) 90)
; (recur))))
; (println "end: " @i @j))


;(test-grammars)
;(vectors-reset-pos)
;(collapse-lpegs)
;(collapse-keywords)
;(collapse-pegs)

#_(try
(pegasus :S {:A :B} (gen-ps "fail"))
(throw (Error. "A useful error should be thrown when a keyword doesn't exist in a grammar"))
(catch Error e))

0 comments on commit 9151e17

Please sign in to comment.