Permalink
Browse files

quick and dirty update to lucene 3.3

  • Loading branch information...
1 parent 173e61f commit 1268840c9421bf53c4c06f43f3c84dfbeec5bfc3 @markrmiller committed Jul 19, 2011
View
@@ -1,6 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
- <classpathentry kind="src" path="src"/>
+ <classpathentry kind="src" path="src/test"/>
+ <classpathentry kind="src" path="src/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+ <classpathentry kind="lib" path="lib/lucene-core-3.3.0.jar" sourcepath="/branch_3x"/>
+ <classpathentry kind="lib" path="lib/lucene-spellchecker-3.3.0.jar" sourcepath="/branch_3x"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3"/>
<classpathentry kind="output" path="bin"/>
</classpath>
View
@@ -133,7 +133,7 @@
<fileset dir="${build.classes.dir}" includes="**/*.properties" />
<manifest>
<attribute name="Main-Class" value="com.mhs.qsol.QsolParser" />
- <attribute name="Class-Path" value="lib/lucene-core-2.1-dev.jar" />
+ <attribute name="Class-Path" value="lib/lucene-core-3.3.0.jar" />
</manifest>
</jar>
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -21,8 +21,8 @@
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermRangeFilter;
import java.text.DateFormat;
import java.text.ParseException;
@@ -53,7 +53,7 @@ public Query buildDateQuery(String field, String date, Locale locale) {
throw new QsolParseException(e);
}
- final Filter filter = RangeFilter.Less(field, DateTools.dateToString(
+ final Filter filter = TermRangeFilter.Less(field, DateTools.dateToString(
date1, Resolution.DAY));
return new ConstantScoreQuery(filter);
@@ -66,7 +66,7 @@ public Query buildDateQuery(String field, String date, Locale locale) {
throw new QsolParseException("Could not parse date", e);
}
- final Filter filter = RangeFilter.More(field, DateTools.dateToString(
+ final Filter filter = TermRangeFilter.More(field, DateTools.dateToString(
date1, Resolution.DAY));
return new ConstantScoreQuery(filter);
@@ -84,7 +84,7 @@ public Query buildDateQuery(String field, String date, Locale locale) {
if ((date1 != null) && (date2 != null)) {
}
- return new ConstantScoreQuery(new RangeFilter(field, DateTools
+ return new ConstantScoreQuery(new TermRangeFilter(field, DateTools
.dateToString(date1, Resolution.DAY), DateTools.dateToString(date2,
Resolution.DAY), true, true));
} else {
@@ -15,22 +15,11 @@
*/
package com.mhs.qsol;
-import com.mhs.qsol.queryparser.QueryParser;
-import com.mhs.qsol.queryparser.QueryParserConstants;
-import com.mhs.qsol.syntaxtree.Node;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.store.Directory;
-
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
-
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -41,6 +30,16 @@
import java.util.logging.LogManager;
import java.util.regex.Pattern;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
+
+import com.mhs.qsol.queryparser.QueryParser;
+import com.mhs.qsol.queryparser.QueryParserConstants;
+import com.mhs.qsol.syntaxtree.Node;
+
/**
* Converts any valid Qsol syntax into an Apache Lucene Query object.
* <p>
@@ -185,7 +184,7 @@ public static void main(String[] args) {
Query result = null;
try {
- result = parser.parse("allFields", query, new StandardAnalyzer());
+ result = parser.parse("allFields", query, new StandardAnalyzer(Version.LUCENE_33));
} catch (QsolSyntaxException e) {
System.out.println("syntax exception:");
e.printStackTrace();
@@ -32,16 +32,18 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
@@ -437,7 +439,7 @@ public Query visit(SearchToken n, Query query) {
term2 = term2.toLowerCase();
}
- return new ConstantScoreRangeQuery(field, term1, term2, inclusive1,
+ return new TermRangeQuery(field, term1, term2, inclusive1,
inclusive2);
} else {
throw new RuntimeException(
@@ -664,9 +666,15 @@ protected Query tokenToQuery(String token) {
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
+ CharTermAttribute charTermAtrib = source.getAttribute(CharTermAttribute.class);
+ OffsetAttribute offsetAtrib = source.getAttribute(OffsetAttribute.class);
+
while (true) {
try {
- t = source.next();
+ if (!source.incrementToken()) {
+ break;
+ }
+ t = new Token(charTermAtrib.buffer(), 0, charTermAtrib.length(), offsetAtrib.startOffset(), offsetAtrib.endOffset());
} catch (IOException e) {
t = null;
}
@@ -697,7 +705,7 @@ protected Query tokenToQuery(String token) {
t = v.get(0);
- TermQuery termQuery = new TermQuery(new Term(field, t.termText()));
+ TermQuery termQuery = new TermQuery(new Term(field, new String(t.buffer(), 0, t.length())));
termQuery.setBoost(this.boost);
return termQuery;
@@ -711,7 +719,7 @@ protected Query tokenToQuery(String token) {
t = v.get(i);
TermQuery currentQuery = new TermQuery(
- new Term(field, t.termText()));
+ new Term(field, new String(t.buffer(), 0, t.length())));
currentQuery.setBoost(this.boost);
q.add(currentQuery, BooleanClause.Occur.SHOULD);
@@ -725,8 +733,8 @@ protected Query tokenToQuery(String token) {
// TODO: handle this?
// if (t.getPositionIncrement() == 0) {
// }
- SpanQuery termQuery = new SpanTermQuery(new Term(field, v.get(i)
- .termText()));
+ Token t2 = v.get(i);
+ SpanQuery termQuery = new SpanTermQuery(new Term(field, new String(t2.buffer(), 0, t2.length())));
termQuery.setBoost(this.boost);
clauses.set(i, termQuery);
}
@@ -740,8 +748,8 @@ protected Query tokenToQuery(String token) {
SpanTermQuery[] clauses = new SpanTermQuery[v.size()];
for (int i = 0; i < v.size(); i++) {
- SpanTermQuery spanQuery = new SpanTermQuery(new Term(field, v.get(i)
- .termText()));
+ Token t2 = v.get(i);
+ SpanTermQuery spanQuery = new SpanTermQuery(new Term(field, new String(t2.buffer(), 0, t2.length())));
spanQuery.setBoost(boost);
clauses[i] = spanQuery;
}
@@ -2,6 +2,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.Directory;
@@ -94,12 +95,13 @@ private String getTerm(String term) {
return "";
}
TokenStream source = analyzer.tokenStream("", new StringReader(term));
+ CharTermAttribute charTermAtrib = source.getAttribute(CharTermAttribute.class);
String anaTerm = null;
try {
- anaTerm = source.next().termText();
-
- if (source.next() != null) {
+ source.incrementToken();
+ anaTerm = charTermAtrib.toString();
+ if (source.incrementToken()) {
return term;
}
} catch (IOException e2) {
@@ -1,62 +0,0 @@
-package com.mhs.qsol.filters;
-
-/**
- * Copyright 2006 Mark Miller (markrmiller@gmail.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.WildcardTermEnum;
-
-import java.io.IOException;
-
-import java.util.BitSet;
-
-public class WildcardFilter extends Filter {
- private Term term;
-
- public WildcardFilter(Term term) {
- this.term = term;
- }
-
- @Override
- public BitSet bits(IndexReader reader) throws IOException {
- BitSet bits = new BitSet(reader.maxDoc());
- WildcardTermEnum enumerator = new WildcardTermEnum(reader, term);
- TermDocs termDocs = reader.termDocs();
-
- try {
- do {
- Term term = enumerator.term();
-
- if (term != null) {
- termDocs.seek(term);
-
- while (termDocs.next()) {
- bits.set(termDocs.doc());
- }
- } else {
- break;
- }
- } while (enumerator.next());
- } finally {
- termDocs.close();
- enumerator.close();
- }
-
- return bits;
- }
-}
@@ -15,6 +15,30 @@
*/
package com.mhs.qsol.proximity;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+
import com.mhs.qsol.QsolParseException;
import com.mhs.qsol.QsolParser.Operator;
import com.mhs.qsol.QsolToQueryVisitor;
@@ -47,29 +71,6 @@
import com.mhs.qsol.syntaxtree.SearchToken;
import com.mhs.qsol.visitor.GJDepthFirst;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
public class ProximityVisitor extends GJDepthFirst<Query, Query> {
private static final Pattern BOOST_EXTRACTOR = Pattern
.compile("^(.*?)\\^(\\d+(?:\\.\\d+)?)$");
@@ -463,14 +464,19 @@ protected Query tokenToQuery(String token) {
token = removeEscapeChars(token);
TokenStream source = analyzer.tokenStream(field, new StringReader(token));
+ CharTermAttribute charTermAtrib = source.getAttribute(CharTermAttribute.class);
+ OffsetAttribute offsetAtrib = source.getAttribute(OffsetAttribute.class);
ArrayList<Token> v = new ArrayList<Token>();
Token t;
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
while (true) {
try {
- t = source.next();
+ if (!source.incrementToken()) {
+ break;
+ }
+ t = new Token(charTermAtrib.buffer(), 0, charTermAtrib.length(), offsetAtrib.startOffset(), offsetAtrib.endOffset());
} catch (IOException e) {
t = null;
}
@@ -498,7 +504,7 @@ protected Query tokenToQuery(String token) {
return null;
} else if (v.size() == 1) {
t = v.get(0);
- SpanTermQuery stq = new SpanTermQuery(new Term(field, t.termText()));
+ SpanTermQuery stq = new SpanTermQuery(new Term(field, new String(t.buffer(), 0, t.length())));
stq.setBoost(this.boost);
return stq;
} else {
@@ -522,8 +528,8 @@ protected Query tokenToQuery(String token) {
// TODO: handle this?
// if (t.getPositionIncrement() == 0) {
// }
- clauses.set(i, new SpanTermQuery(new Term(field, v.get(i)
- .termText())));
+ Token t2 = v.get(i);
+ clauses.set(i, new SpanTermQuery(new Term(field, new String(t2.buffer(), 0, t2.length()))));
}
SpanNearQuery query = new SpanNearQuery((SpanQuery[]) clauses
@@ -533,9 +539,10 @@ protected Query tokenToQuery(String token) {
}
} else {
SpanTermQuery[] clauses = new SpanTermQuery[v.size()];
-
+
for (int i = 0; i < v.size(); i++) {
- clauses[i] = new SpanTermQuery(new Term(field, v.get(i).termText()));
+ Token t2 = v.get(i);
+ clauses[i] = new SpanTermQuery(new Term(field, new String(t2.buffer(), 0, t2.length())));
}
SpanNearQuery query = new SpanNearQuery(clauses, slop, true);
Oops, something went wrong.

0 comments on commit 1268840

Please sign in to comment.