diff --git a/benchmark/clearley/benchmark/json/small-test-formatted.json b/benchmark/clearley/benchmark/json/small-test-formatted.json new file mode 100644 index 0000000..fcd0846 --- /dev/null +++ b/benchmark/clearley/benchmark/json/small-test-formatted.json @@ -0,0 +1,77 @@ + + { + "id_str": "150739698292244480", + "entities": { + "hashtags": [], + "urls": [ + { + "url": "http://t.co/sEXkB2zC", + "indices": [ + 76, + 96 + ], + "expanded_url": "http://fb.me/XIaFd0yI", + "display_url": "fb.me/XIaFd0yI" + } + ], + "user_mentions": [] + }, + "in_reply_to_status_id": null, + "contributors": null, + "place": null, + "truncated": false, + "geo": null, + "favorited": false, + "created_at": "Sun Dec 25 00:48:39 +0000 2011", + "user": { + "id_str": "8394862", + "contributors_enabled": false, + "lang": "en", + "protected": false, + "url": "http://mrgray.com/$", + "profile_use_background_image": true, + "name": "mralexgray", + "default_profile_image": false, + "friends_count": 306, + "profile_text_color": "D95B43", + "statuses_count": 63, + "profile_background_image_url": "http://a2.twimg.com/profile_background_images/88810365/x86b7f9c12df0fa38ce1a4f29b759706.png", + "utc_offset": -18000, + "description": "Fierceness Incarnate", + "is_translator": false, + "created_at": "Fri Aug 24 01:00:57 +0000 2007", + "profile_link_color": "C02942", + "following": null, + "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/88810365/x86b7f9c12df0fa38ce1a4f29b759706.png", + "favourites_count": 4, + "follow_request_sent": null, + "geo_enabled": true, + "profile_background_color": "53777A", + "profile_background_tile": true, + "followers_count": 121, + "profile_image_url": "http://a2.twimg.com/profile_images/21449002/Yo.I.Ain_t.Barbie_normal.jpg", + "screen_name": "mralexgray", + "show_all_inline_media": true, + "profile_sidebar_fill_color": "ECD078", + "profile_image_url_https": "https://si0.twimg.com/profile_images/21449002/Yo.I.Ain_t.Barbie_normal.jpg", + "listed_count": 1, + "notifications": null, + "location": "NYC", + "id": 8394862, + "default_profile": false, + "verified": false, + "time_zone": "Quito", + "profile_sidebar_border_color": "542437" + }, + "in_reply_to_user_id": null, + "retweet_count": 0, + "in_reply_to_screen_name": null, + "in_reply_to_status_id_str": null, + "possibly_sensitive": false, + "retweeted": false, + "source": "Facebook", + "in_reply_to_user_id_str": null, + "coordinates": null, + "id": 150739698292244480, + "text": "let me put my hands together…. my thumbs were broken in an acting accident. http://t.co/sEXkB2zC" + } diff --git a/benchmark/clearley/benchmark/json/small-test.json b/benchmark/clearley/benchmark/json/small-test.json new file mode 100644 index 0000000..b99c45b --- /dev/null +++ b/benchmark/clearley/benchmark/json/small-test.json @@ -0,0 +1 @@ +{ "id_str": "150739698292244480", "entities": { "hashtags": [], "urls": [ { "url": "http://t.co/sEXkB2zC", "indices": [ 76, 96 ], "expanded_url": "http://fb.me/XIaFd0yI", "display_url": "fb.me/XIaFd0yI" } ], "user_mentions": [] }, "in_reply_to_status_id": null, "contributors": null, "place": null, "truncated": false, "geo": null, "favorited": false, "created_at": "Sun Dec 25 00:48:39 +0000 2011", "user": { "id_str": "8394862", "contributors_enabled": false, "lang": "en", "protected": false, "url": "http://mrgray.com/$", "profile_use_background_image": true, "name": "mralexgray", "default_profile_image": false, "friends_count": 306, "profile_text_color": "D95B43", "statuses_count": 63, "profile_background_image_url": "http://a2.twimg.com/profile_background_images/88810365/x86b7f9c12df0fa38ce1a4f29b759706.png", "utc_offset": -18000, "description": "Fierceness Incarnate", "is_translator": false, "created_at": "Fri Aug 24 01:00:57 +0000 2007", "profile_link_color": "C02942", "following": null, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/88810365/x86b7f9c12df0fa38ce1a4f29b759706.png", "favourites_count": 4, "follow_request_sent": null, "geo_enabled": true, "profile_background_color": "53777A", "profile_background_tile": true, "followers_count": 121, "profile_image_url": "http://a2.twimg.com/profile_images/21449002/Yo.I.Ain_t.Barbie_normal.jpg", "screen_name": "mralexgray", "show_all_inline_media": true, "profile_sidebar_fill_color": "ECD078", "profile_image_url_https": "https://si0.twimg.com/profile_images/21449002/Yo.I.Ain_t.Barbie_normal.jpg", "listed_count": 1, "notifications": null, "location": "NYC", "id": 8394862, "default_profile": false, "verified": false, "time_zone": "Quito", "profile_sidebar_border_color": "542437" }, "in_reply_to_user_id": null, "retweet_count": 0, "in_reply_to_screen_name": null, "in_reply_to_status_id_str": null, "possibly_sensitive": false, "retweeted": false, "source": "Facebook", "in_reply_to_user_id_str": null, "coordinates": null, "id": 150739698292244480, "text": "let me put my hands together…. my thumbs were broken in an acting accident. http://t.co/sEXkB2zC" } \ No newline at end of file diff --git a/benchmark/clearley/benchmark/json/test.clj b/benchmark/clearley/benchmark/json/test.clj new file mode 100644 index 0000000..124fbe8 --- /dev/null +++ b/benchmark/clearley/benchmark/json/test.clj @@ -0,0 +1,21 @@ +(ns clearley.benchmark.json.test + (require [clojure.java.io :as io] + [clearley.examples.json :as json]) + (use [clearley core] + [criterium core])) + +(def prefix "clearley/benchmark/json/") + +(defn bench-parser [filename] + (println "Loading" filename "into memory") + (let [loaded-file (-> (str prefix filename) + io/resource io/reader slurp)] + (println "Chars loaded:" (count loaded-file)) + (println "Benchmarking") + (bench (parse json/json-parser loaded-file)))) + +(defn -main [& args] + (bench-parser "small-test.json") + ; With no lookahead, each consecutive space adds O(1) ambiguity + (bench-parser "small-test-formatted.json") + ) diff --git a/project.clj b/project.clj index f106878..117b162 100644 --- a/project.clj +++ b/project.clj @@ -2,7 +2,11 @@ :description "FIXME: write description" :dependencies [[org.clojure/clojure "1.4.0"] [org.clojure/math.numeric-tower "0.0.1" :scope "test"] - [com.stuartsierra/lazytest "2.0.0-SNAPSHOT" :scope "test"]] - :profiles {:bigtest {:test-paths ["bigtest"]}} + [com.stuartsierra/lazytest "2.0.0-SNAPSHOT" :scope "test"] + [criterium "0.3.1" :scope "test"]] + :profiles {:bigtest {:test-paths ["bigtest"]} + :benchmark {:source-paths ["benchmark" "test"] + :resource-paths ["benchmark"] + :main clearley.benchmark.json.test}} :repositories {"stuartsierra-releases" "http://stuartsierra.com/maven2" "stuartsierra-snapshots" "http://stuartsierra.com/m2snapshots"}) diff --git a/src/clearley/earley.clj b/src/clearley/earley.clj index 5f5ce1a..afe4644 100644 --- a/src/clearley/earley.clj +++ b/src/clearley/earley.clj @@ -121,9 +121,13 @@ (defn reduce-ostream [ostream] (first (reduce reduce-ostream-helper '() ostream))) +#_(defn parse [input-str grammar tokenizer goal] + (npda/run-automaton-2 (new-item-set [(new-item ::goal goal)] grammar) + input-str tokenizer)) + (defn parse-charts [input-str grammar tokenizer goal] (npda/run-automaton (new-item-set [(new-item ::goal goal)] grammar) - input-str tokenizer)) + input-str tokenizer)) ; Searches states for completed parse of the goal rule, returning all matches (defn scan-goal [chart]