diff --git a/README.md b/README.md index b9cb198..cb9e9ec 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,65 @@ # Clearley -A flexible, programmable parser library in Clojure. +The better, easier way to parse, in Clojure. There's an Earley parser under the hood, +hence the name Clearley. +Clearley fits naturally in Clojure and handles all context-free grammars, +no strings attached. Just define your parse rules and go. + +## Example + +Here's a simple calculator written in Clearley. Parse rules and actions +are defined together in a style similar to defn. + +```clojure +(use 'clearley.core) + +(defrule sum + ([sum \+ term] (+ sum term)) ; left associative + ([sum \- term] (- sum term)) + ([term] term)) +(defrule term + ([term \* number] (* term number)) + ([term \/ number] (/ term number)) + ([number] number)) +(defrule number + ([\- number] (- number)) + ([number digit] (+ (* 10 number) digit)) + ([digit] digit)) +; The below converts a char digit to a Clojure number +(def digit (token-range \0 \9 (fn [c] (- (int c) (int \0))))) + +(def my-calculator (build-parser sum)) + +user=> (execute my-calculator "1+1") +2 +``` + +More examples can be found in test/clearley/examples. + +## Usage + +Clearley is alpha software. All the base planned functionality of a context-free +grammar parser is there. The next steps are to work on documentation, +verify edge cases, and work on internals. + +Documentation coming soon. + +## Why yet another parser library? + +Clearley arose when a few projects I'd worked on, and planned to work on in the future, +needed a parser. Unfortunately, parsing is "the solved problem that isn't"[1]. +The world of parsers is a hazardous one, full of booby traps like +left-recursion, "shift-reduce conflicts", lookahead sets, noncomposability of grammars; +and impeding you as you navigate this minefield are requirements to handle things +like tokenization, AST processing, and if you're unlucky a multi-stage build process. +This makes it difficult to use parsers in a exploratory, flexible, agile manner. +Yet the theory of context-free grammars is simple and provably tractable by computer-- +so why should parsing be hard? + +## References + +[1] "Parsing: the solved problem that isn't". Laurence Tratt. Retrieved June 18, 2012 at +. ## License diff --git a/TODO.md b/TODO.md index de90e93..96c389f 100644 --- a/TODO.md +++ b/TODO.md @@ -2,11 +2,11 @@ Alpha: -* Flesh out defrule unit tests. -* Less verbose tokenization and character classes. Top-down tokenizers. +* Edge case examination. Beta: +* Better error reporting for user parsers. * Grammar composition. * JSON example. * Clean up the software internals. diff --git a/src/clearley/core.clj b/src/clearley/core.clj index fb15edf..17192e2 100644 --- a/src/clearley/core.clj +++ b/src/clearley/core.clj @@ -2,8 +2,9 @@ "A generalized context-free grammar parser. It will accept any seq of inputs, not just text, and parse any context-free grammar. Emphasis is on ease of use, versatility, and dynamic/exploratory programming." - (:require (clojure string)) - (:use (clearley utils rules))) + (require [clojure string] + [clojure.pprint :as pp]) + (use [clearley utils rules])) ; TODO: empty rule? ; TODO: get rid of this protocol? @@ -19,6 +20,14 @@ (rule-str [_] (separate-str clauses " "))) +#_(defmethod clojure.core/print-method clearley.core.RuleImpl [rule writer] + (.write writer (rule-str rule))) + +(defmethod clojure.pprint/simple-dispatch clearley.rules.Rule [rule] + (clojure.pprint/write-out (rule-str rule))) + +(prefer-method clojure.pprint/simple-dispatch clearley.rules.Rule clojure.lang.IPersistentMap) + ; TODO: rule macro, rulefn fn (defn rule "Creates a rule associated with a parse action that can be called @@ -301,6 +310,11 @@ "was given " (count subactions)) e))))))) +(defn execute + "Parses some input and executes the parse actions." + [parser input] + (take-action (parse parser input))) + ; Defrule begins here. ; TODO: experiment with using a parser for defrule ; instead of all these macro helpers... would make a convincing POC for earley parsing! diff --git a/test/clearley/examples/calculator.clj b/test/clearley/examples/calculator.clj index 36d5b27..1eb7d31 100644 --- a/test/clearley/examples/calculator.clj +++ b/test/clearley/examples/calculator.clj @@ -1,7 +1,5 @@ (ns clearley.examples.calculator (use clearley.core - clearley.test.utils - lazytest.deftest clojure.math.numeric-tower)) (defrule sum @@ -25,13 +23,15 @@ (defrule parenexpr ([\( sum \)] sum)) (defrule number - ; TODO: error when it's numexpr not number? + ; TODO: explore error when given numexpr ([number digit] (+ (* 10 number) digit)) ([digit] digit)) (def digit (token-range \0 \9 (fn [c] (- (int c) (int \0))))) (def my-calculator (build-parser sum)) +(use 'lazytest.deftest 'clearley.test.utils) + (deftest simple-calculator-test (with-parser my-calculator (is-action 1 "1") diff --git a/test/clearley/examples/json.clj b/test/clearley/examples/json.clj index 3ca16e6..164a3e4 100644 --- a/test/clearley/examples/json.clj +++ b/test/clearley/examples/json.clj @@ -191,9 +191,9 @@ (is-action [1 [2]] "[1,[2]]") (is-action [[1] [2]] "[[1],[2]]") (is-action [1 true "yo" [2 3]] "[1,true,\"yo\" ,[2,3]]")) -; TODO: why doesn't this one work? HULK SMASH! - ;(is-action [1 true "yo" [2 3]] " [1,true ,\"yo\" , [ 2,3]] ")) +; (is-action [1 true "yo" [2 3]] " [1,true ,\"yo\" , [ 2,3]] ")) +; Only Objects are valid json parses. (def-parser-test json-test json-parser (not-parsing "1") (not-parsing "true") @@ -205,4 +205,4 @@ (is-action {:a 1 :b 2} "{\"a\" : 1, \"b\" : 2}") (is-action {:a 1 :b true :c "3"} "{\"a\" : 1, \"b\" : true, \"c\" : \"3\"}")) -; TODO fix the brokenness when vectors are embedded +; TODO fix the vector embedding reduction case diff --git a/test/clearley/examples/simplest_calculator.clj b/test/clearley/examples/simplest_calculator.clj index ca31ad2..bbf2b45 100644 --- a/test/clearley/examples/simplest_calculator.clj +++ b/test/clearley/examples/simplest_calculator.clj @@ -13,9 +13,8 @@ ([\- number] (- number)) ([number digit] (+ (* 10 number) digit)) ([digit] digit)) -; todo: 'or-token' or something instead -(defrule digit [(a-digit (map (comp token char) (range (int \0) (inc (int \9)))))] - (- (int a-digit) (int \0))) +; The below converts a char digit to a Clojure number +(def digit (token-range \0 \9 (fn [c] (- (int c) (int \0))))) (def my-calculator (build-parser sum))