+ Log.Debug( m=>m("result is {0}", random.NextDouble()) );
+ Log.Debug(delegate(m) { m("result is {0}", random.NextDouble()); });
+
+
+ // configure for capturing
+ CapturingLoggerFactoryAdapter adapter = new CapturingLoggerFactoryAdapter();
+ LogManager.Adapter = adapter;
+
+ // reset capture state
+ adapter.Clear();
+ // log something
+ ILog log = LogManager.GetCurrentClassLogger();
+ log.DebugFormat("Current Time:{0}", DateTime.Now);
+
+ // check logged data
+ Assert.AreEqual(1, adapter.LoggerEvents.Count);
+ Assert.AreEqual(LogLevel.Debug, adapter.LastEvent.Level);
+
+
+ <system.diagnostics>
+ <sharedListeners>
+ <add name="Diagnostics"
+ type="Common.Logging.Simple.CommonLoggingTraceListener, Common.Logging"
+ initializeData="DefaultTraceEventType=Information; LoggerNameFormat={listenerName}.{sourceName}">
+ <filter type="System.Diagnostics.EventTypeFilter" initializeData="Information"/>
+ </add>
+ </sharedListeners>
+ <trace>
+ <listeners>
+ <add name="Diagnostics" />
+ </listeners>
+ </trace>
+ </system.diagnostics>
+
+ + Primary purpose of this method is to allow us to parse and + load configuration sections using the same API regardless + of the .NET framework version. +
++ Primary purpose of this method is to allow us to parse and + load configuration sections using the same API regardless + of the .NET framework version. +
+
+ <configuration>
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" />
+ </sectionGroup>
+ </configSections>
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging">
+ <arg key="showLogName" value="true" />
+ <arg key="showDataTime" value="true" />
+ <arg key="level" value="ALL" />
+ <arg key="dateTimeFormat" value="yyyy/MM/dd HH:mm:ss:fff" />
+ </factoryAdapter>
+ </logging>
+ </common>
+ </configuration>
+
+
+
+ ILog log = LogManager.GetLogger(this.GetType());
+ ...
+ try
+ {
+ /* .... */
+ }
+ catch(Exception ex)
+ {
+ log.ErrorFormat("Hi {0}", ex, "dude");
+ }
+
+
+ The example below shows programmatic configuration of the underlying log system:
+
+
+ // create properties
+ NameValueCollection properties = new NameValueCollection();
+ properties["showDateTime"] = "true";
+
+ // set Adapter
+ Common.Logging.LogManager.Adapter = new
+ Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter(properties);
+
+
+
+ ILog log = LogManager.GetLogger(this.GetType());
+
+ log.DebugFormat("Hi {0}", "dude");
+
+
+ <configuration>
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" />
+ </sectionGroup>
+ </configSections>
+
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging">
+ <arg key="level" value="DEBUG" />
+ </factoryAdapter>
+ </logging>
+ </common>
+ </configuration>
+
+
+ public class ConsoleOutLogger : AbstractSimpleLogger
+ {
+ public ConsoleOutLogger(string logName, LogLevel logLevel, bool showLevel, bool showDateTime,
+ bool showLogName, string dateTimeFormat)
+ : base(logName, logLevel, showLevel, showDateTime, showLogName, dateTimeFormat)
+ {
+ }
+
+ protected override void WriteInternal(LogLevel level, object message, Exception e)
+ {
+ // Use a StringBuilder for better performance
+ StringBuilder sb = new StringBuilder();
+ FormatOutput(sb, level, message, e);
+
+ // Print to the appropriate destination
+ Console.Out.WriteLine(sb.ToString());
+ }
+ }
+
+ public class ConsoleOutLoggerFactoryAdapter : AbstractSimpleLoggerFactoryAdapter
+ {
+ public ConsoleOutLoggerFactoryAdapter(NameValueCollection properties)
+ : base(properties)
+ { }
+
+ protected override ILog CreateLogger(string name, LogLevel level, bool showLevel, bool
+ showDateTime, bool showLogName, string dateTimeFormat)
+ {
+ ILog log = new ConsoleOutLogger(name, level, showLevel, showDateTime, showLogName,
+ dateTimeFormat);
+ return log;
+ }
+ }
+
+
+ <configuration>
+
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging"
+ type="Common.Logging.ConfigurationSectionHandler, Common.Logging"
+ requirePermission="false" />
+ </sectionGroup>
+ </configSections>
+
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging">
+ <arg key="level" value="ALL" />
+ </factoryAdapter>
+ </logging>
+ </common>
+
+ </configuration>
+
+
+ <configuration>
+
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging"
+ type="Common.Logging.ConfigurationSectionHandler, Common.Logging"
+ requirePermission="false" />
+ </sectionGroup>
+ </configSections>
+
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.NoOpLoggerFactoryAdapter, Common.Logging">
+ <arg key="level" value="ALL" />
+ </factoryAdapter>
+ </logging>
+ </common>
+
+ </configuration>
+
+
+ <configuration>
+
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging"
+ type="Common.Logging.ConfigurationSectionHandler, Common.Logging"
+ requirePermission="false" />
+ </sectionGroup>
+ </configSections>
+
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.TraceLoggerFactoryAdapter, Common.Logging">
+ <arg key="level" value="ALL" />
+ </factoryAdapter>
+ </logging>
+ </common>
+
+ </configuration>
+
+
+ Log.Debug( m=>m("result is {0}", random.NextDouble()) );
+ Log.Debug(delegate(m) { m("result is {0}", random.NextDouble()); });
+
+
+ // configure for capturing
+ CapturingLoggerFactoryAdapter adapter = new CapturingLoggerFactoryAdapter();
+ LogManager.Adapter = adapter;
+
+ // reset capture state
+ adapter.Clear();
+ // log something
+ ILog log = LogManager.GetCurrentClassLogger();
+ log.DebugFormat("Current Time:{0}", DateTime.Now);
+
+ // check logged data
+ Assert.AreEqual(1, adapter.LoggerEvents.Count);
+ Assert.AreEqual(LogLevel.Debug, adapter.LastEvent.Level);
+
+
+ <system.diagnostics>
+ <sharedListeners>
+ <add name="Diagnostics"
+ type="Common.Logging.Simple.CommonLoggingTraceListener, Common.Logging"
+ initializeData="DefaultTraceEventType=Information; LoggerNameFormat={listenerName}.{sourceName}">
+ <filter type="System.Diagnostics.EventTypeFilter" initializeData="Information"/>
+ </add>
+ </sharedListeners>
+ <trace>
+ <listeners>
+ <add name="Diagnostics" />
+ </listeners>
+ </trace>
+ </system.diagnostics>
+
+ + Primary purpose of this method is to allow us to parse and + load configuration sections using the same API regardless + of the .NET framework version. +
++ Primary purpose of this method is to allow us to parse and + load configuration sections using the same API regardless + of the .NET framework version. +
+
+ <configuration>
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" />
+ </sectionGroup>
+ </configSections>
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging">
+ <arg key="showLogName" value="true" />
+ <arg key="showDataTime" value="true" />
+ <arg key="level" value="ALL" />
+ <arg key="dateTimeFormat" value="yyyy/MM/dd HH:mm:ss:fff" />
+ </factoryAdapter>
+ </logging>
+ </common>
+ </configuration>
+
+
+
+ ILog log = LogManager.GetLogger(this.GetType());
+ ...
+ try
+ {
+ /* .... */
+ }
+ catch(Exception ex)
+ {
+ log.ErrorFormat("Hi {0}", ex, "dude");
+ }
+
+
+ The example below shows programmatic configuration of the underlying log system:
+
+
+ // create properties
+ NameValueCollection properties = new NameValueCollection();
+ properties["showDateTime"] = "true";
+
+ // set Adapter
+ Common.Logging.LogManager.Adapter = new
+ Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter(properties);
+
+
+
+ ILog log = LogManager.GetLogger(this.GetType());
+
+ log.DebugFormat("Hi {0}", "dude");
+
+
+ <configuration>
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" />
+ </sectionGroup>
+ </configSections>
+
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging">
+ <arg key="level" value="DEBUG" />
+ </factoryAdapter>
+ </logging>
+ </common>
+ </configuration>
+
+
+ public class ConsoleOutLogger : AbstractSimpleLogger
+ {
+ public ConsoleOutLogger(string logName, LogLevel logLevel, bool showLevel, bool showDateTime,
+ bool showLogName, string dateTimeFormat)
+ : base(logName, logLevel, showLevel, showDateTime, showLogName, dateTimeFormat)
+ {
+ }
+
+ protected override void WriteInternal(LogLevel level, object message, Exception e)
+ {
+ // Use a StringBuilder for better performance
+ StringBuilder sb = new StringBuilder();
+ FormatOutput(sb, level, message, e);
+
+ // Print to the appropriate destination
+ Console.Out.WriteLine(sb.ToString());
+ }
+ }
+
+ public class ConsoleOutLoggerFactoryAdapter : AbstractSimpleLoggerFactoryAdapter
+ {
+ public ConsoleOutLoggerFactoryAdapter(NameValueCollection properties)
+ : base(properties)
+ { }
+
+ protected override ILog CreateLogger(string name, LogLevel level, bool showLevel, bool
+ showDateTime, bool showLogName, string dateTimeFormat)
+ {
+ ILog log = new ConsoleOutLogger(name, level, showLevel, showDateTime, showLogName,
+ dateTimeFormat);
+ return log;
+ }
+ }
+
+
+ <configuration>
+
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging"
+ type="Common.Logging.ConfigurationSectionHandler, Common.Logging"
+ requirePermission="false" />
+ </sectionGroup>
+ </configSections>
+
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging">
+ <arg key="level" value="ALL" />
+ </factoryAdapter>
+ </logging>
+ </common>
+
+ </configuration>
+
+
+ <configuration>
+
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging"
+ type="Common.Logging.ConfigurationSectionHandler, Common.Logging"
+ requirePermission="false" />
+ </sectionGroup>
+ </configSections>
+
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.NoOpLoggerFactoryAdapter, Common.Logging">
+ <arg key="level" value="ALL" />
+ </factoryAdapter>
+ </logging>
+ </common>
+
+ </configuration>
+
+
+ <configuration>
+
+ <configSections>
+ <sectionGroup name="common">
+ <section name="logging"
+ type="Common.Logging.ConfigurationSectionHandler, Common.Logging"
+ requirePermission="false" />
+ </sectionGroup>
+ </configSections>
+
+ <common>
+ <logging>
+ <factoryAdapter type="Common.Logging.Simple.TraceLoggerFactoryAdapter, Common.Logging">
+ <arg key="level" value="ALL" />
+ </factoryAdapter>
+ </logging>
+ </common>
+
+ </configuration>
+
+ Please note: It is not guaranteed, that
+ Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ field.setOmitNorms(true);
+ field.setOmitTermFreqAndPositions(true);
+ document.add(field);
+
+
+ For optimal performance, re-use the TokenStream and Field instance
+ for more than one document:
+
+
+ NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ Field field = new Field(name, stream);
+ field.setOmitNorms(true);
+ field.setOmitTermFreqAndPositions(true);
+ Document document = new Document();
+ document.add(field);
+
+ for(all documents) {
+ stream.setIntValue(value)
+ writer.addDocument(document);
+ }
+
+
+ This stream is not intended to be used in analyzers;
+ it's more for iterating the different precisions during
+ indexing a specific numeric value.
+
+ NOTE: as token streams are only consumed once
+ the document is added to the index, if you index more
+ than one numeric field, use a separate
+ PerFieldAnalyzerWrapper aWrapper =
+ new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+ aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+ aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+
+
+ In this example, StandardAnalyzer will be used for all fields except "firstname"
+ and "lastname", for which KeywordAnalyzer will be used.
+
+ A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+ and query parsing.
+
+ class MyAnalyzer extends Analyzer {
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return new PorterStemFilter(new LowerCaseTokenizer(reader));
+ }
+ }
+
+
+ TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
+ TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+ TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+ TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
+ source2.addSinkTokenStream(sink1);
+ source2.addSinkTokenStream(sink2);
+ TokenStream final1 = new LowerCaseFilter(source1);
+ TokenStream final2 = source2;
+ TokenStream final3 = new EntityDetect(sink1);
+ TokenStream final4 = new URLDetect(sink2);
+ d.add(new Field("f1", final1));
+ d.add(new Field("f2", final2));
+ d.add(new Field("f3", final3));
+ d.add(new Field("f4", final4));
+
+ In this example,
+ ...
+ TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
+ TokenStream final2 = source2.newSinkTokenStream();
+ sink1.consumeAllTokens();
+ sink2.consumeAllTokens();
+ ...
+
+ In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+ Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+
+ return reusableToken.reinit(string, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+
+
+ public String toString() {
+ return "start=" + startOffset + ",end=" + endOffset;
+ }
+
+
+ This method may be overridden by subclasses.
+
+ public int hashCode() {
+ int code = startOffset;
+ code = code * 31 + endOffset;
+ return code;
+ }
+
+
+ see also
+ document.add(new NumericField(name).setIntValue(value));
+
+
+ For optimal performance, re-use the
+
+ NumericField field = new NumericField(name);
+ Document document = new Document();
+ document.add(field);
+
+ for(all documents) {
+ ...
+ field.setIntValue(value)
+ writer.addDocument(document);
+ ...
+ }
+
+
+ The .Net native types
+ boolean skipTo(int target) {
+ do {
+ if (!next())
+ return false;
+ } while (target > doc());
+ return true;
+ }
+
+ Some implementations are considerably more efficient than that.
+
+ java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+
+
+ IndexReader reader = ...
+ ...
+ IndexReader newReader = r.reopen();
+ if (newReader != reader) {
+ ... // reader was reopened
+ reader.close();
+ }
+ reader = newReader;
+ ...
+
+
+ Be sure to synchronize that code so that other threads,
+ if present, can never use reader after it has been
+ closed and before it's switched to newReader.
+
+ NOTE: If this reader is a near real-time
+ reader (obtained from This is called each time the writer completed a commit. + This gives the policy a chance to remove old commit points + with each commit.
+ +The policy may now choose to delete old commit points
+ by calling method
This method is only called when
Note: the last CommitPoint is the most recent one, + i.e. the "front index state". Be careful not to delete it, + unless you know for sure what you are doing, and unless + you can afford to lose the index content while doing that.
+
+ try {
+ writer.close();
+ } finally {
+ if (IndexWriter.isLocked(directory)) {
+ IndexWriter.unlock(directory);
+ }
+ }
+
+
+ after which, you must be certain not to use the writer
+ instance anymore.
+
+ NOTE: if this method hits an OutOfMemoryError
+ you should immediately close the writer, again. See above for details.
+
+
+ try {
+ writer.close();
+ } finally {
+ if (IndexWriter.isLocked(directory)) {
+ IndexWriter.unlock(directory);
+ }
+ }
+
+
+ after which, you must be certain not to use the writer
+ instance anymore.
+
+ NOTE: if this method hits an OutOfMemoryError
+ you should immediately close the writer, again. See above for details.
+
+
+ pq.top().change();
+ pq.updateTop();
+
+ instead of
+
+ o = pq.pop();
+ o.change();
+ pq.push(o);
+
+
+ // extends getSentinelObject() to return a non-null value.
+ PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits);
+ // save the 'top' element, which is guaranteed to not be null.
+ MyObject pqTop = pq.top();
+ <...>
+ // now in order to add a new element, which is 'better' than top (after
+ // you've verified it is better), it is as simple as:
+ pqTop.change().
+ pqTop = pq.updateTop();
+
+
+ NOTE: if this method returns a non-null value, it will be called by
+
+ Query ::= ( Clause )*
+ Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+
+
+
+ Examples of appropriately formatted queries can be found in the query syntax
+ documentation.
+
+
+
+ In
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ When you pass a boost (title=>5 body=>10) you can get
+
+
+
+ +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+ (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+
+
+
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+
+
+ The code above would construct a query:
+
+
+ (filename:query) +(contents:query) -(description:query)
+
+
+
+ String[] query = {"query1", "query2", "query3"};
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+
+
+ The code above would construct a query:
+
+
+ (filename:query1) +(contents:query2) -(description:query3)
+
+
+
+
|
+ |
+ + frequency½ + | +
+ |
+ + 1 + log ( + | +
+
|
+ + ) + | +
+ queryNorm(q) =
+ |
+
+
|
+
+ |
+ + ∑ + | ++ ( + idf(t) · + t.Boost + ) 2 + | +
+ | t in q | ++ |
+ norm(t,d) =
+ |
+ + ∏ + | +
+ |
+
+ | field f in d named as t | ++ |
+ idf(searcher.docFreq(term), searcher.MaxDoc);
+
+
+ Note that
+ int advance(int target) {
+ int doc;
+ while ((doc = nextDoc()) < target) {
+ }
+ return doc;
+ }
+
+
+ Some implementations are considerably more efficient than that.
+
+ NOTE: certain implemenations may return a different value (each
+ time) if called several times in a row with the same target.
+
+ NOTE: this method may be called with
+ Searcher searcher = new IndexSearcher(indexReader);
+ final BitSet bits = new BitSet(indexReader.MaxDoc);
+ searcher.search(query, new Collector() {
+ private int docBase;
+
+ // ignore scorer
+ public void setScorer(Scorer scorer) {
+ }
+
+ // accept docs out of order (for a BitSet it doesn't matter)
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+
+ public void collect(int doc) {
+ bits.set(doc + docBase);
+ }
+
+ public void setNextReader(IndexReader reader, int docBase) {
+ this.docBase = docBase;
+ }
+ });
+
+
+ Not all collectors will need to rebase the docID. For
+ example, a collector that simply counts the total number
+ of hits would skip it.
+
+ NOTE: Prior to 2.9, Lucene silently filtered
+ out hits with score <= 0. As of 2.9, the core Collectors
+ no longer do that. It's very unusual to have such hits
+ (a negative query boost, or function query returning
+ negative custom scores, could cause it to happen). If
+ you need that behavior, use + ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ... ++
+ ModifiedScore = subQueryScore * valSrcScore ++
+ similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
+ return (similarity > minimumSimilarity);
+ where distance is the Levenshtein distance for the two words.
+
+ Levenshtein distance (also known as edit distance) is a measure of similiarity
+ between two strings where the distance is measured as the number of character
+ deletions, insertions or substitutions required to transform one string to
+ the other string.
+
+ PriorityQueue pq = new HitQueue(10, true); // pre-populate.
+ ScoreDoc top = pq.top();
+
+ // Add/Update one element.
+ top.score = 1.0f;
+ top.doc = 0;
+ top = (ScoreDoc) pq.updateTop();
+ int totalHits = 1;
+
+ // Now pop only the elements that were *truly* inserted.
+ // First, pop all the sentinel elements (there are pq.size() - totalHits).
+ for (int i = pq.size() - totalHits; i > 0; i--) pq.pop();
+
+ // Now pop the truly added elements.
+ ScoreDoc[] results = new ScoreDoc[totalHits];
+ for (int i = totalHits - 1; i >= 0; i--) {
+ results[i] = (ScoreDoc) pq.pop();
+ }
+
+
+ NOTE: This class pre-allocate a full array of
+ length
+ Filter f = NumericRangeFilter.newFloatRange("weight",
+ new Float(0.3f), new Float(0.10f),
+ true, true);
+
+
+ accepts all documents whose float valued "weight" field
+ ranges from 0.3 to 0.10, inclusive.
+ See
+ Query q = NumericRangeQuery.newFloatRange("weight",
+ new Float(0.3f), new Float(0.10f),
+ true, true);
+
+
+ matches all documents whose float valued "weight" field
+ ranges from 0.3 to 0.10, inclusive.
+
+ The performance of NumericRangeQuery is much better
+ than the corresponding Schindler, U, Diepenbroek, M, 2008. + Generic XML-based Framework for Metadata Portals. + Computers & Geosciences 34 (12), 1947-1955. + doi:10.1016/j.cageo.2008.02.023+ + A quote from this paper: Because Apache Lucene is a full-text + search engine and not a conventional database, it cannot handle numerical ranges + (e.g., field value is inside user defined bounds, even dates are numerical values). + We have developed an extension to Apache Lucene that stores + the numerical values in a special string-encoded format with variable precision + (all numerical values like doubles, longs, floats, and ints are converted to + lexicographic sortable string representations and stored with different precisions + (for a more detailed description of how the values are stored, + see
+ n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 )
+
+ (this formula is only correct, when + teacherid: 1 + studentfirstname: james + studentsurname: jones + + teacherid: 2 + studenfirstname: james + studentsurname: smith + studentfirstname: sally + studentsurname: jones ++ + a SpanNearQuery with a slop of 0 can be applied across two +
+ SpanQuery q1 = new SpanTermQuery(new Term("studentfirstname", "james"));
+ SpanQuery q2 = new SpanTermQuery(new Term("studentsurname", "jones"));
+ SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname");
+ Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false);
+
+ to search for 'studentfirstname:james studentsurname:jones' and find
+ teacherid 1 without matching teacherid 2 (which has a 'james' in position 0
+ and 'jones' in position 1).
+
+ Note: as
+ boolean skipTo(int target) {
+ do {
+ if (!next())
+ return false;
+ } while (target > doc());
+ return true;
+ }
+
+ Most implementations are considerably more efficient than that.
+
+ new Lock.With(directory.makeLock("my.lock")) {
+ public Object doBody() {
+ ... code to execute while locked ...
+ }
+ }.run();
+
+ cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 3.36 | 3.96 | 1.44 | 1.46 | 1.99 | 1.58 | +
1% full | 3.31 | 3.90 | 1.04 | 0.99 | +
cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 2.50 | 3.50 | 1.00 | 1.03 | 1.12 | 1.25 | +
1% full | 2.51 | 3.49 | 1.00 | 1.02 | +
Please note: It is not guaranteed, that
+ Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ field.setOmitNorms(true);
+ field.setOmitTermFreqAndPositions(true);
+ document.add(field);
+
+
+ For optimal performance, re-use the TokenStream and Field instance
+ for more than one document:
+
+
+ NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ Field field = new Field(name, stream);
+ field.setOmitNorms(true);
+ field.setOmitTermFreqAndPositions(true);
+ Document document = new Document();
+ document.add(field);
+
+ for(all documents) {
+ stream.setIntValue(value)
+ writer.addDocument(document);
+ }
+
+
+ This stream is not intended to be used in analyzers;
+ it's more for iterating the different precisions during
+ indexing a specific numeric value.
+
+ NOTE: as token streams are only consumed once
+ the document is added to the index, if you index more
+ than one numeric field, use a separate
+ PerFieldAnalyzerWrapper aWrapper =
+ new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+ aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+ aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+
+
+ In this example, StandardAnalyzer will be used for all fields except "firstname"
+ and "lastname", for which KeywordAnalyzer will be used.
+
+ A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+ and query parsing.
+
+ class MyAnalyzer extends Analyzer {
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return new PorterStemFilter(new LowerCaseTokenizer(reader));
+ }
+ }
+
+
+ TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
+ TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+ TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+ TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
+ source2.addSinkTokenStream(sink1);
+ source2.addSinkTokenStream(sink2);
+ TokenStream final1 = new LowerCaseFilter(source1);
+ TokenStream final2 = source2;
+ TokenStream final3 = new EntityDetect(sink1);
+ TokenStream final4 = new URLDetect(sink2);
+ d.add(new Field("f1", final1));
+ d.add(new Field("f2", final2));
+ d.add(new Field("f3", final3));
+ d.add(new Field("f4", final4));
+
+ In this example,
+ ...
+ TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
+ TokenStream final2 = source2.newSinkTokenStream();
+ sink1.consumeAllTokens();
+ sink2.consumeAllTokens();
+ ...
+
+ In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+ Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+
+ return reusableToken.reinit(string, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+
+
+ return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+
+
+ public String toString() {
+ return "start=" + startOffset + ",end=" + endOffset;
+ }
+
+
+ This method may be overridden by subclasses.
+
+ public int hashCode() {
+ int code = startOffset;
+ code = code * 31 + endOffset;
+ return code;
+ }
+
+
+ see also
+ document.add(new NumericField(name).setIntValue(value));
+
+
+ For optimal performance, re-use the
+
+ NumericField field = new NumericField(name);
+ Document document = new Document();
+ document.add(field);
+
+ for(all documents) {
+ ...
+ field.setIntValue(value)
+ writer.addDocument(document);
+ ...
+ }
+
+
+ The .Net native types
+ boolean skipTo(int target) {
+ do {
+ if (!next())
+ return false;
+ } while (target > doc());
+ return true;
+ }
+
+ Some implementations are considerably more efficient than that.
+
+ java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+
+
+ IndexReader reader = ...
+ ...
+ IndexReader newReader = r.reopen();
+ if (newReader != reader) {
+ ... // reader was reopened
+ reader.close();
+ }
+ reader = newReader;
+ ...
+
+
+ Be sure to synchronize that code so that other threads,
+ if present, can never use reader after it has been
+ closed and before it's switched to newReader.
+
+ NOTE: If this reader is a near real-time
+ reader (obtained from This is called each time the writer completed a commit. + This gives the policy a chance to remove old commit points + with each commit.
+ +The policy may now choose to delete old commit points
+ by calling method
This method is only called when
Note: the last CommitPoint is the most recent one, + i.e. the "front index state". Be careful not to delete it, + unless you know for sure what you are doing, and unless + you can afford to lose the index content while doing that.
+
+ try {
+ writer.close();
+ } finally {
+ if (IndexWriter.isLocked(directory)) {
+ IndexWriter.unlock(directory);
+ }
+ }
+
+
+ after which, you must be certain not to use the writer
+ instance anymore.
+
+ NOTE: if this method hits an OutOfMemoryError
+ you should immediately close the writer, again. See above for details.
+
+
+ try {
+ writer.close();
+ } finally {
+ if (IndexWriter.isLocked(directory)) {
+ IndexWriter.unlock(directory);
+ }
+ }
+
+
+ after which, you must be certain not to use the writer
+ instance anymore.
+
+ NOTE: if this method hits an OutOfMemoryError
+ you should immediately close the writer, again. See above for details.
+
+
+ pq.top().change();
+ pq.updateTop();
+
+ instead of
+
+ o = pq.pop();
+ o.change();
+ pq.push(o);
+
+
+ // extends getSentinelObject() to return a non-null value.
+ PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits);
+ // save the 'top' element, which is guaranteed to not be null.
+ MyObject pqTop = pq.top();
+ <...>
+ // now in order to add a new element, which is 'better' than top (after
+ // you've verified it is better), it is as simple as:
+ pqTop.change().
+ pqTop = pq.updateTop();
+
+
+ NOTE: if this method returns a non-null value, it will be called by
+
+ Query ::= ( Clause )*
+ Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+
+
+
+ Examples of appropriately formatted queries can be found in the query syntax
+ documentation.
+
+
+
+ In
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ When you pass a boost (title=>5 body=>10) you can get
+
+
+
+ +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+ (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+
+
+
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+
+
+ The code above would construct a query:
+
+
+ (filename:query) +(contents:query) -(description:query)
+
+
+
+ String[] query = {"query1", "query2", "query3"};
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+
+
+ The code above would construct a query:
+
+
+ (filename:query1) +(contents:query2) -(description:query3)
+
+
+
+
|
+ |
+ + frequency½ + | +
+ |
+ + 1 + log ( + | +
+
|
+ + ) + | +
+ queryNorm(q) =
+ |
+
+
|
+
+ |
+ + ∑ + | ++ ( + idf(t) · + t.Boost + ) 2 + | +
+ | t in q | ++ |
+ norm(t,d) =
+ |
+ + ∏ + | +
+ |
+
+ | field f in d named as t | ++ |
+ idf(searcher.docFreq(term), searcher.MaxDoc);
+
+
+ Note that
+ int advance(int target) {
+ int doc;
+ while ((doc = nextDoc()) < target) {
+ }
+ return doc;
+ }
+
+
+ Some implementations are considerably more efficient than that.
+
+ NOTE: certain implemenations may return a different value (each
+ time) if called several times in a row with the same target.
+
+ NOTE: this method may be called with
+ Searcher searcher = new IndexSearcher(indexReader);
+ final BitSet bits = new BitSet(indexReader.MaxDoc);
+ searcher.search(query, new Collector() {
+ private int docBase;
+
+ // ignore scorer
+ public void setScorer(Scorer scorer) {
+ }
+
+ // accept docs out of order (for a BitSet it doesn't matter)
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+
+ public void collect(int doc) {
+ bits.set(doc + docBase);
+ }
+
+ public void setNextReader(IndexReader reader, int docBase) {
+ this.docBase = docBase;
+ }
+ });
+
+
+ Not all collectors will need to rebase the docID. For
+ example, a collector that simply counts the total number
+ of hits would skip it.
+
+ NOTE: Prior to 2.9, Lucene silently filtered
+ out hits with score <= 0. As of 2.9, the core Collectors
+ no longer do that. It's very unusual to have such hits
+ (a negative query boost, or function query returning
+ negative custom scores, could cause it to happen). If
+ you need that behavior, use + ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ... ++
+ ModifiedScore = subQueryScore * valSrcScore ++
+ similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
+ return (similarity > minimumSimilarity);
+ where distance is the Levenshtein distance for the two words.
+
+ Levenshtein distance (also known as edit distance) is a measure of similiarity
+ between two strings where the distance is measured as the number of character
+ deletions, insertions or substitutions required to transform one string to
+ the other string.
+
+ PriorityQueue pq = new HitQueue(10, true); // pre-populate.
+ ScoreDoc top = pq.top();
+
+ // Add/Update one element.
+ top.score = 1.0f;
+ top.doc = 0;
+ top = (ScoreDoc) pq.updateTop();
+ int totalHits = 1;
+
+ // Now pop only the elements that were *truly* inserted.
+ // First, pop all the sentinel elements (there are pq.size() - totalHits).
+ for (int i = pq.size() - totalHits; i > 0; i--) pq.pop();
+
+ // Now pop the truly added elements.
+ ScoreDoc[] results = new ScoreDoc[totalHits];
+ for (int i = totalHits - 1; i >= 0; i--) {
+ results[i] = (ScoreDoc) pq.pop();
+ }
+
+
+ NOTE: This class pre-allocate a full array of
+ length
+ Filter f = NumericRangeFilter.newFloatRange("weight",
+ new Float(0.3f), new Float(0.10f),
+ true, true);
+
+
+ accepts all documents whose float valued "weight" field
+ ranges from 0.3 to 0.10, inclusive.
+ See
+ Query q = NumericRangeQuery.newFloatRange("weight",
+ new Float(0.3f), new Float(0.10f),
+ true, true);
+
+
+ matches all documents whose float valued "weight" field
+ ranges from 0.3 to 0.10, inclusive.
+
+ The performance of NumericRangeQuery is much better
+ than the corresponding Schindler, U, Diepenbroek, M, 2008. + Generic XML-based Framework for Metadata Portals. + Computers & Geosciences 34 (12), 1947-1955. + doi:10.1016/j.cageo.2008.02.023+ + A quote from this paper: Because Apache Lucene is a full-text + search engine and not a conventional database, it cannot handle numerical ranges + (e.g., field value is inside user defined bounds, even dates are numerical values). + We have developed an extension to Apache Lucene that stores + the numerical values in a special string-encoded format with variable precision + (all numerical values like doubles, longs, floats, and ints are converted to + lexicographic sortable string representations and stored with different precisions + (for a more detailed description of how the values are stored, + see
+ n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 )
+
+ (this formula is only correct, when + teacherid: 1 + studentfirstname: james + studentsurname: jones + + teacherid: 2 + studenfirstname: james + studentsurname: smith + studentfirstname: sally + studentsurname: jones ++ + a SpanNearQuery with a slop of 0 can be applied across two +
+ SpanQuery q1 = new SpanTermQuery(new Term("studentfirstname", "james"));
+ SpanQuery q2 = new SpanTermQuery(new Term("studentsurname", "jones"));
+ SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname");
+ Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false);
+
+ to search for 'studentfirstname:james studentsurname:jones' and find
+ teacherid 1 without matching teacherid 2 (which has a 'james' in position 0
+ and 'jones' in position 1).
+
+ Note: as
+ boolean skipTo(int target) {
+ do {
+ if (!next())
+ return false;
+ } while (target > doc());
+ return true;
+ }
+
+ Most implementations are considerably more efficient than that.
+
+ new Lock.With(directory.makeLock("my.lock")) {
+ public Object doBody() {
+ ... code to execute while locked ...
+ }
+ }.run();
+
+ cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 3.36 | 3.96 | 1.44 | 1.46 | 1.99 | 1.58 | +
1% full | 3.31 | 3.90 | 1.04 | 0.99 | +
cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 2.50 | 3.50 | 1.00 | 1.03 | 1.12 | 1.25 | +
1% full | 2.51 | 3.49 | 1.00 | 1.02 | +
Key = true
, represents a unique primary key to the document.
+
+ Key fields are used to replace or delete documents.
+
+ [DocumentKey(FieldName="Type", Value="Customer")]
+ public class Customer
+ {
+ }
+
+
+ public IStreamedData ExecuteInMemory (IStreamedData input)
+ {
+ ArgumentUtility.CheckNotNull ("input", input);
+ return InvokeGenericExecuteMethod<StreamedSequence, StreamedValue> (input, ExecuteInMemory<object>);
+ }
+
+ public StreamedValue ExecuteInMemory<T> (StreamedSequence input)
+ {
+ var sequence = input.GetTypedSequence<T> ();
+ var result = sequence.Sequence.Count ();
+ return new StreamedValue (result);
+ }
+
+
+ var result = (from s in Students
+ select s).Any();
+
+
+ var result = (from s in Students
+ select s).All();
+
+
+ var result = (from s in Students
+ select s.Name).Aggregate((allNames, name) => allNames + " " + name);
+
+
+ var result = (from s in Students
+ select s).Aggregate(0, (totalAge, s) => totalAge + s.Age);
+
+
+ var query = from s in Students
+ join a in Addresses on s.AdressID equals a.ID into addresses
+ from a in addresses
+ select new { s, a };
+
+
+ var query = (from s in Students
+ select s.ID).Average();
+
+
+ var query = (from s in Students
+ select s.ID).Cast<int>();
+
+
+ var query = (from s in Students
+ select s).Contains (student);
+
+
+ var query = (from s in Students
+ select s).DefaultIfEmpty ("student");
+
+
+ var query = (from s in Students
+ select s).Except(students2);
+
+
+ var query = (from s in Students
+ select s).Intersect(students2);
+
+
+ var query = (from s in Students
+ select s).LongCount();
+
+
+ var query = (from s in Students
+ select s.ID).OfType<int>();
+
+
+ var query = (from s in Students
+ select s).Reverse();
+
+
+ var query = (from s in Students
+ select s).Skip (3);
+
+
+ var query = (from s in Students
+ select s.ID).Sum();
+
+
+ var query = (from s in Students
+ select s.ID).Max();
+
+
+ var query = (from s in Students
+ select s.ID).Min();
+
+
+ var query = (from s in Students
+ select s).Last();
+
+
+ var query = (from s in Students
+ select s).Take(3);
+
+
+ var query = (from s in Students
+ select s).First();
+
+
+ var query = (from s in Students
+ select s).Single();
+
+
+ var query = (from s in Students
+ select s).Distinct();
+
+
+ var query = (from s in Students
+ select s).Count();
+
+
+ var query = (from s in Students
+ select s).Union(students2);
+
+
+ MainSource (...)
+ .Select (x => x)
+ .Distinct ()
+ .Select (x => x)
+
+
+ Naively, the last Select node would resolve (via Distinct and Select) to the
+ MainSource (MainSource (...).Select (x => x).Distinct ())
+ .Select (x => x)
+
+
+ Now, the last Select node resolves to the new
+ x.GroupBy (k => key, e => element, (k, g) => result)
+
+ is therefore equivalent to:
+
+ c.GroupBy (k => key, e => element).Select (grouping => resultSub)
+
+ where resultSub is the same as result with k and g substituted with grouping.Key and grouping, respectively.
+
+ from c in Customers
+ from o in (from oi in OrderInfos where oi.Customer == c orderby oi.OrderDate select oi.Order)
+ orderby o.Product.Name
+ select new { c, o }
+
+ This will be transformed into:
+
+ from c in Customers
+ from oi in OrderInfos
+ where oi.Customer == c
+ orderby oi.OrderDate
+ orderby oi.Order.Product.Name
+ select new { c, oi.Order }
+
+ As another example, take the following query:
+
+ from c in (from o in Orders select o.Customer)
+ where c.Name.StartsWith ("Miller")
+ select c
+
+ (This query is never produced by the
+ from o in Orders
+ where o.Customer.Name.StartsWith ("Miller")
+ select o
+
+
+ var query = from s in Students
+ where s.First == "Hugo"
+ group s by s.Country;
+
+
+ var query = from s in Students
+ join a in Addresses on s.AdressID equals a.ID
+ select new { s, a };
+
+
+ from order in ...
+ select order.OrderItems.Count()
+
+ In this query, the
+ var query = from s in Students
+ where s.First == "Hugo"
+ select s;
+
+ ("o", o);
+ }
+ ]]>
+ In some other cases, the input value is returned unmodified. This makes it easier to use the argument checks in calls to base class constructors
+ or property setters:
+
+