Permalink
Browse files

Add lazy lucene analyzer.

  • Loading branch information...
1 parent c5a0d07 commit 015d61f604fbf57c54cced9bdf69781a2b6e5510 @drewr drewr committed Sep 28, 2011
Showing with 23 additions and 12 deletions.
  1. +10 −2 src/esperanto/lucene.clj
  2. +13 −10 test/esperanto/test/lucene.clj
View
@@ -4,8 +4,7 @@
(org.apache.lucene.document Document Field
Field$Store Field$Index)
(org.apache.lucene.index IndexWriter IndexWriter$MaxFieldLength
- Term)
- (org.apache.lucene.util Version)))
+ Term)))
(defn index [writer fields]
(let [doc (Document.)]
@@ -21,3 +20,12 @@
(recur st (conj! v (.term term)))
(persistent! v)))))
+(defn token-seq [analyzer rdr]
+ (let [stream (.tokenStream analyzer "field" rdr)
+ term (.addAttribute stream TermAttribute)
+ step (fn step [st v]
+ (lazy-seq
+ (when (.incrementToken st)
+ (cons (.term term) (step st v)))))]
+ (step stream [])))
+
@@ -1,10 +1,14 @@
(ns esperanto.test.lucene
(:use [esperanto.lucene] :reload)
(:use [clojure.test])
- (:import (java.io File)))
+ (:import (java.io File StringReader)
+ (org.apache.lucene.analysis.standard StandardAnalyzer)
+ (org.apache.lucene.util Version)))
(def ^:dynamic *dir* nil)
+(def text "The quick brown fox jumps over the lazy dog")
+
(defn dir-fixture [f]
(binding [*dir* (-> (File. "tmp")
(File.
@@ -18,12 +22,11 @@
(use-fixtures :each dir-fixture)
-(deftest t-index-document-1
- (println "****" (str *dir*)))
-
-(deftest t-index-document-2
- (println "****" (str *dir*)))
-
-(deftest t-index-document-3
- (println "****" (str *dir*)))
-
+(deftest t-analyze
+ (let [tokens ["quick" "brown" "fox"
+ "jumps" "over" "lazy" "dog"]]
+ (is (= tokens (analyze
+ (StandardAnalyzer. Version/LUCENE_31) text)))
+ (is (= tokens (token-seq
+ (StandardAnalyzer. Version/LUCENE_31)
+ (StringReader. text))))))

0 comments on commit 015d61f

Please sign in to comment.