diff --git a/.gitignore b/.gitignore index bdc3535..c29d4cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,108 +1,49 @@ -# Build Folders (you can keep bin if you'd like, to store dlls and pdbs) -[Bb]in/ -[Oo]bj/ +#OS junk files +[Tt]humbs.db +*.DS_Store -# mstest test results -TestResults - -## Ignore Visual Studio temporary files, build results, and -## files generated by popular Visual Studio add-ons. - -# User-specific files -*.suo +#Visual Studio files +*.[Oo]bj *.user -*.sln.docstates - -# Build results -[Dd]ebug/ -[Rr]elease/ -x64/ +*.aps +*.pch +*.vspscc +*.vssscc *_i.c *_p.c -*.ilk -*.meta -*.obj -*.pch -*.pdb -*.pgc -*.pgd -*.rsp -*.sbr +*.ncb +*.suo *.tlb -*.tli *.tlh -*.tmp +*.bak +*.[Cc]ache +*.ilk *.log -*.vspscc -*.vssscc -.builds - -# Visual C++ cache files -ipch/ -*.aps -*.ncb -*.opensdf +*.lib +*.sbr *.sdf +*.opensdf +*.unsuccessfulbuild +ipch/ +obj/ +[Bb]in +[Dd]ebug*/ +[Rr]elease*/ +Ankh.NoLoad -# Visual Studio profiler -*.psess -*.vsp -*.vspx - -# Guidance Automation Toolkit -*.gpState - -# ReSharper is a .NET coding add-in -_ReSharper* - -# NCrunch -*.ncrunch* -.*crunch*.local.xml - -# Installshield output folder -[Ee]xpress - -# DocProject is a documentation generator add-in -DocProject/buildhelp/ -DocProject/Help/*.HxT -DocProject/Help/*.HxC -DocProject/Help/*.hhc -DocProject/Help/*.hhk -DocProject/Help/*.hhp -DocProject/Help/Html2 -DocProject/Help/html - -# Click-Once directory -publish - -# Publish Web Output -*.Publish.xml - -# NuGet Packages Directory -packages +#Tooling +_ReSharper*/ +*.resharper +[Tt]est[Rr]esult* -# Windows Azure Build Output -csx -*.build.csdef +#Project files +[Bb]uild/ -# Windows Store app package directory -AppPackages/ +#Subversion files +.svn -# Others -[Bb]in -[Oo]bj -sql -TestResults -[Tt]est[Rr]esult* -*.Cache -ClientBin -[Ss]tyle[Cc]op.* +# Office Temp Files ~$* -*.dbmdl -Generated_Code #added for RIA/Silverlight projects -# Backup & report files from converting an old project file to a newer -# Visual Studio version. Backup files are not needed, because we have git ;-) -_UpgradeReport_Files/ -Backup*/ -UpgradeLog*.XML + +*/App_Data/* diff --git a/packages/Common.Logging.2.1.2/Common.Logging.2.1.2.nupkg b/packages/Common.Logging.2.1.2/Common.Logging.2.1.2.nupkg new file mode 100644 index 0000000..80ffdfd Binary files /dev/null and b/packages/Common.Logging.2.1.2/Common.Logging.2.1.2.nupkg differ diff --git a/packages/Common.Logging.2.1.2/Common.Logging.2.1.2.nuspec b/packages/Common.Logging.2.1.2/Common.Logging.2.1.2.nuspec new file mode 100644 index 0000000..fca3365 --- /dev/null +++ b/packages/Common.Logging.2.1.2/Common.Logging.2.1.2.nuspec @@ -0,0 +1,16 @@ + + + + Common.Logging + 2.1.2 + Aleksandar Seovic, Mark Pollack, Erich Eichinger, Stephen Bohlen + Aleksandar Seovic, Mark Pollack, Erich Eichinger, Stephen Bohlen + http://netcommon.sourceforge.net/ + false + Common.Logging library introduces a simple abstraction to allow you to select a specific logging implementation at runtime. + en-US + + + + + \ No newline at end of file diff --git a/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.dll b/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.dll new file mode 100644 index 0000000..0c35fd3 Binary files /dev/null and b/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.dll differ diff --git a/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.pdb b/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.pdb new file mode 100644 index 0000000..d26b618 Binary files /dev/null and b/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.pdb differ diff --git a/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.xml b/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.xml new file mode 100644 index 0000000..caaa614 --- /dev/null +++ b/packages/Common.Logging.2.1.2/lib/net35/Common.Logging.xml @@ -0,0 +1,3304 @@ + + + + Common.Logging + + + + + This assembly contains the core functionality of the Common.Logging framework. + In particular, checkout and for usage information. + + + + + Provides base implementation suitable for almost all logger adapters + + Erich Eichinger + + + + A simple logging interface abstracting logging APIs. + + + + Implementations should defer calling a message's until the message really needs + to be logged to avoid performance penalties. + + + Each log method offers to pass in a instead of the actual message. + Using this style has the advantage to defer possibly expensive message argument evaluation and formatting (and formatting arguments!) until the message gets + actually logged. If the message is not logged at all (e.g. due to settings), + you won't have to pay the peformance penalty of creating the message. + + + + The example below demonstrates using callback style for creating the message, where the call to the + and the underlying only happens, if level is enabled: + + Log.Debug( m=>m("result is {0}", random.NextDouble()) ); + Log.Debug(delegate(m) { m("result is {0}", random.NextDouble()); }); + + + + Mark Pollack + Bruno Baia + Erich Eichinger + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Holds the method for writing a message to the log system. + + + + + Creates a new logger instance using for + writing log events to the underlying log system. + + + + + + Override this method to use a different method than + for writing log events to the underlying log system. + + + Usually you don't need to override thise method. The default implementation returns + null to indicate that the default handler should be + used. + + + + + Actually sends the message to the underlying log system. + + the level of this log event. + the message to log + the exception to log (may be null) + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Debug of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Debug. + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Info of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Info. + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Warn of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Warn. + + + + Log a message with the level. + + An that supplies culture-specific formatting Warnrmation. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting Warnrmation. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Error of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Error. + + + + Log a message with the level. + + An that supplies culture-specific formatting Errorrmation. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting Errorrmation. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Fatal of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Fatal. + + + + Log a message with the level. + + An that supplies culture-specific formatting Fatalrmation. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting Fatalrmation. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Format message on demand. + + + + + Initializes a new instance of the class. + + The format message callback. + + + + Initializes a new instance of the class. + + The format provider. + The format message callback. + + + + Calls and returns result. + + + + + + Format string on demand. + + + + + Initializes a new instance of the class. + + The format provider. + The message. + The args. + + + + Runs on supplied arguemnts. + + string + + + + Represents a method responsible for writing a message to the log system. + + + + + An implementation of that caches loggers handed out by this factory. + + + Implementors just need to override . + + Erich Eichinger + + + + LoggerFactoryAdapter interface is used internally by LogManager + Only developers wishing to write new Common.Logging adapters need to + worry about this interface. + + Gilles Bayon + + + + Get a ILog instance by type. + + The type to use for the logger + + + + + Get a ILog instance by name. + + The name of the logger + + + + + Creates a new instance, the logger cache being case-sensitive. + + + + + Creates a new instance, the logger cache being . + + + + + + Purges all loggers from cache + + + + + Create the specified named logger instance + + + Derived factories need to implement this method to create the + actual logger instance. + + + + + Get a ILog instance by . + + Usually the of the current class. + + An ILog instance either obtained from the internal cache or created by a call to . + + + + + Get a ILog instance by name. + + Usually a 's Name or FullName property. + + An ILog instance either obtained from the internal cache or created by a call to . + + + + + Get or create a ILog instance by name. + + Usually a 's Name or FullName property. + + An ILog instance either obtained from the internal cache or created by a call to . + + + + + A logger created by that + sends all log events to the owning adapter's + + Erich Eichinger + + + + Abstract class providing a standard implementation of simple loggers. + + Erich Eichinger + + + + Creates and initializes a the simple logger. + + The name, usually type name of the calling class, of the logger. + The current logging threshold. Messages recieved that are beneath this threshold will not be logged. + Include level in the log message. + Include the current time in the log message. + Include the instance name in the log message. + The date and time format to use in the log message. + + + + Appends the formatted message to the specified . + + the that receíves the formatted message. + + + + + + + Determines if the given log level is currently enabled. + + + + + + + The name of the logger. + + + + + Include the current log level in the log message. + + + + + Include the current time in the log message. + + + + + Include the instance name in the log message. + + + + + The current logging threshold. Messages recieved that are beneath this threshold will not be logged. + + + + + The date and time format to use in the log message. + + + + + Determines Whether is set. + + + + + Returns if the current is greater than or + equal to . If it is, all messages will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, all messages will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, only messages with a of + , , , and + will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, only messages with a of + , , and + will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, only messages with a of + and will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, only messages with a of + will be sent to . + + + + + The adapter that created this logger instance. + + + + + Clears all captured events + + + + + Resets the to null. + + + + + Holds the list of logged events. + + + To access this collection in a multithreaded application, put a lock on the list instance. + + + + + instances send their captured log events to this method. + + + + + Create a new logger instance. + + + + + Create a new and send it to + + + + + + + + Holds the last log event received from any of this adapter's loggers. + + + + + A logging event captured by + + Erich Eichinger + + + + The logger that logged this event + + + + + The level used to log this event + + + + + The raw message object + + + + + A logged exception + + + + + Create a new event instance + + + + + Retrieves the formatted message text + + + + + An adapter, who's loggers capture all log events and send them to . + Retrieve the list of log events from . + + + This logger factory is mainly for debugging and test purposes. + + This is an example how you might use this adapter for testing: + + // configure for capturing + CapturingLoggerFactoryAdapter adapter = new CapturingLoggerFactoryAdapter(); + LogManager.Adapter = adapter; + + // reset capture state + adapter.Clear(); + // log something + ILog log = LogManager.GetCurrentClassLogger(); + log.DebugFormat("Current Time:{0}", DateTime.Now); + + // check logged data + Assert.AreEqual(1, adapter.LoggerEvents.Count); + Assert.AreEqual(LogLevel.Debug, adapter.LastEvent.Level); + + + + Erich Eichinger + + + + Clears all captured events + + + + + Resets the to null. + + + + + Holds the list of logged events. + + + To access this collection in a multithreaded application, put a lock on the list instance. + + + + + instances send their captured log events to this method. + + + + + Get a instance for the given type. + + + + + Get a instance for the given name. + + + + + Holds the last log event received from any of this adapter's loggers. + + + + + A implementation sending all System.Diagnostics.Trace output to + the Common.Logging infrastructure. + + + This listener captures all output sent by calls to System.Diagnostics.Trace and + and and sends it to an instance.
+ The instance to be used is obtained by calling + . The name of the logger is created by passing + this listener's and any source or category passed + into this listener (see or for example). +
+ + The snippet below shows how to add and configure this listener to your app.config: + + <system.diagnostics> + <sharedListeners> + <add name="Diagnostics" + type="Common.Logging.Simple.CommonLoggingTraceListener, Common.Logging" + initializeData="DefaultTraceEventType=Information; LoggerNameFormat={listenerName}.{sourceName}"> + <filter type="System.Diagnostics.EventTypeFilter" initializeData="Information"/> + </add> + </sharedListeners> + <trace> + <listeners> + <add name="Diagnostics" /> + </listeners> + </trace> + </system.diagnostics> + + + Erich Eichinger +
+ + + Creates a new instance with the default name "Diagnostics" and "Trace". + + + + + Creates a new instance initialized with properties from the . string. + + + is a semicolon separated string of name/value pairs, where each pair has + the form key=value. E.g. + "Name=MyLoggerName;LogLevel=Debug" + + a semicolon separated list of name/value pairs. + + + + Creates a new instance initialized with the specified properties. + + name/value configuration properties. + + + + Logs the given message to the Common.Logging infrastructure. + + the eventType + the name or category name passed into e.g. . + the id of this event + the message format + the message arguments + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Sets the default to use for logging + all events emitted by .Write(...) and + .WriteLine(...) methods. + + + This listener captures all output sent by calls to and + sends it to an instance using the specified + on . + + + + + Format to use for creating the logger name. Defaults to "{listenerName}.{sourceName}". + + + Available placeholders are: + + {listenerName}: the configured name of this listener instance. + {sourceName}: the trace source name an event originates from (see e.g. . + + + + + + The exception that is thrown when a configuration system error has occurred with Common.Logging + + Mark Pollack + + + Creates a new instance of the ObjectsException class. + + + + Creates a new instance of the ConfigurationException class. with the specified message. + + + A message about the exception. + + + + + Creates a new instance of the ConfigurationException class with the specified message + and root cause. + + + A message about the exception. + + + The root exception that is being wrapped. + + + + + Creates a new instance of the ConfigurationException class. + + + The + that holds the serialized object data about the exception being thrown. + + + The + that contains contextual information about the source or destination. + + + + + Implementation of that uses the standard .NET + configuration APIs, ConfigurationSettings in 1.x and ConfigurationManager in 2.0 + + Mark Pollack + + + + Interface for basic operations to read .NET application configuration information. + + Provides a simple abstraction to handle BCL API differences between .NET 1.x and 2.0. Also + useful for testing scenarios. + Mark Pollack + + + + Parses the configuration section and returns the resulting object. + + +

+ Primary purpose of this method is to allow us to parse and + load configuration sections using the same API regardless + of the .NET framework version. +

+
+ Name of the configuration section. + Object created by a corresponding . + +
+ + + Parses the configuration section and returns the resulting object. + + Name of the configuration section. + + Object created by a corresponding . + + +

+ Primary purpose of this method is to allow us to parse and + load configuration sections using the same API regardless + of the .NET framework version. +

+
+ +
+ + + This namespace contains convenience base classes for implementing your own s. + + + + + Various utility methods for using during factory and logger instance configuration + + Erich Eichinger + + + + Initialize all members before any of this class' methods can be accessed (avoids beforeFieldInit) + + + + + Adds the parser to the list of known type parsers. + + + .NET intrinsic types are pre-registerd: short, int, long, float, double, decimal, bool + + + + + Retrieves the named value from the specified . + + may be null + the value's key + if is not null, the value returned by values[name]. null otherwise. + + + + Retrieves the named value from the specified . + + may be null + the value's key + the default value, if not found + if is not null, the value returned by values[name]. null otherwise. + + + + Returns the first nonnull, nonempty value among its arguments. + + + Returns null, if the initial list was null or empty. + + + + + + Returns the first nonnull, nonempty value among its arguments. + + + Also + + + + + Tries parsing into an enum of the type of . + + the default value to return if parsing fails + the string value to parse + the successfully parsed value, otherwise. + + + + Tries parsing into the specified return type. + + the default value to return if parsing fails + the string value to parse + the successfully parsed value, otherwise. + + + + Throws a if is null. + + + + + Throws a if is null. + + + + + Throws a if an object of type is not + assignable to type . + + + + + Throws a if an object of type is not + assignable to type . + + + + + Ensures any exception thrown by the given is wrapped with an + . + + + If already throws a ConfigurationException, it will not be wrapped. + + the action to execute + the message to be set on the thrown + args to be passed to to format the message + + + + Ensures any exception thrown by the given is wrapped with an + . + + + If already throws a ConfigurationException, it will not be wrapped. + + the action to execute + the message to be set on the thrown + args to be passed to to format the message + + + + A delegate converting a string representation into the target type + + + + + An anonymous action delegate with no arguments and no return value. + + + + + + An anonymous action delegate with no arguments and no return value. + + + + + + The type of method that is passed into e.g. + and allows the callback method to "submit" it's message to the underlying output system. + + the format argument as in + the argument list as in + + Erich Eichinger + + + + Used in an application's configuration file (App.Config or Web.Config) to configure the logging subsystem. + + + An example configuration section that writes log messages to the Console using the + built-in Console Logger. + + <configuration> + <configSections> + <sectionGroup name="common"> + <section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" /> + </sectionGroup> + </configSections> + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging"> + <arg key="showLogName" value="true" /> + <arg key="showDataTime" value="true" /> + <arg key="level" value="ALL" /> + <arg key="dateTimeFormat" value="yyyy/MM/dd HH:mm:ss:fff" /> + </factoryAdapter> + </logging> + </common> + </configuration> + + + + + + Ensure static fields get initialized before any class member + can be accessed (avoids beforeFieldInit) + + + + + Constructor + + + + + Retrieves the of the logger the use by looking at the logFactoryAdapter element + of the logging configuration element. + + + + A object containing the specified type that implements + along with zero or more properties that will be + passed to the logger factory adapter's constructor as an . + + + + + Verifies that the logFactoryAdapter element appears once in the configuration section. + + settings of a parent section - atm this must always be null + Additional information about the configuration process. + The configuration section to apply an XPath query too. + + A object containing the specified logFactoryAdapter type + along with user supplied configuration properties. + + + + + Verifies that the logFactoryAdapter element appears once in the configuration section. + + The parent of the current item. + Additional information about the configuration process. + The configuration section to apply an XPath query too. + + A object containing the specified logFactoryAdapter type + along with user supplied configuration properties. + + + + + The 7 possible logging levels + + Gilles Bayon + + + + All logging levels + + + + + A trace logging level + + + + + A debug logging level + + + + + A info logging level + + + + + A warn logging level + + + + + An error logging level + + + + + A fatal logging level + + + + + Do not log anything. + + + + + Use the LogManager's or + methods to obtain instances for logging. + + + For configuring the underlying log system using application configuration, see the example + at . + For configuring programmatically, see the example section below. + + + The example below shows the typical use of LogManager to obtain a reference to a logger + and log an exception: + + + ILog log = LogManager.GetLogger(this.GetType()); + ... + try + { + /* .... */ + } + catch(Exception ex) + { + log.ErrorFormat("Hi {0}", ex, "dude"); + } + + + The example below shows programmatic configuration of the underlying log system: + + + // create properties + NameValueCollection properties = new NameValueCollection(); + properties["showDateTime"] = "true"; + + // set Adapter + Common.Logging.LogManager.Adapter = new + Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter(properties); + + + + + + + + Gilles Bayon + + + + The name of the default configuration section to read settings from. + + + You can always change the source of your configuration settings by setting another instance + on . + + + + + Performs static 1-time init of LogManager by calling + + + + + Reset the infrastructure to its default settings. This means, that configuration settings + will be re-read from section <common/logging> of your app.config. + + + This is mainly used for unit testing, you wouldn't normally use this in your applications.
+ Note: instances already handed out from this LogManager are not(!) affected. + Resetting LogManager only affects new instances being handed out. +
+
+ + + Reset the infrastructure to its default settings. This means, that configuration settings + will be re-read from section <common/logging> of your app.config. + + + This is mainly used for unit testing, you wouldn't normally use this in your applications.
+ Note: instances already handed out from this LogManager are not(!) affected. + Resetting LogManager only affects new instances being handed out. +
+ + the instance to obtain settings for + re-initializing the LogManager. + +
+ + + Gets the logger by calling + on the currently configured using the type of the calling class. + + + This method needs to inspect the in order to determine the calling + class. This of course comes with a performance penalty, thus you shouldn't call it too + often in your application. + + + the logger instance obtained from the current + + + + Gets the logger by calling + on the currently configured using the specified type. + + the logger instance obtained from the current + + + + Gets the logger by calling + on the currently configured using the specified type. + + The type. + the logger instance obtained from the current + + + + Gets the logger by calling + on the currently configured using the specified name. + + The name. + the logger instance obtained from the current + + + + Builds the logger factory adapter. + + a factory adapter instance. Is never null. + + + + Builds a instance from the given + using . + + + the instance. Is never null + + + + Gets the configuration reader used to initialize the LogManager. + + Primarily used for testing purposes but maybe useful to obtain configuration + information from some place other than the .NET application configuration file. + The configuration reader. + + + + Gets or sets the adapter. + + The adapter. + + + + Container used to hold configuration information from config file. + + Gilles Bayon + + + + + + + The type + that will be used for creating + + + Additional user supplied properties that are passed to the + 's constructor. + + + + + The type that will be used for creating + instances. + + + + + Additional user supplied properties that are passed to the 's constructor. + + + + + This namespace contains all core classes making up the Common.Logging framework. + + + + + This namespace contains out-of-the-box adapters to intrinsic systems, namely + , and the + all output suppressing . + For unit testing, you may also want to have a look at + that allows to easily inspect logged messages. + To route messages logged through the infrastructure back into + Common.Logging, you can use + + + + + This namespace contains various utility classes. + + + + +

Overview

+ + There are a variety of logging implementations for .NET currently in use, log4net, Enterprise + Library Logging, NLog, to name the most popular. The downside of having differerent implementation + is that they do not share a common interface and therefore impose a particular logging + implementation on the users of your library. To solve this dependency problem the Common.Logging + library introduces a simple abstraction to allow you to select a specific logging implementation at + runtime. + + + The library is based on work done by the developers of IBatis.NET and it's usage is inspired by + log4net. Many thanks to the developers of those projects! + +

Usage

+ + The core logging library Common.Logging provides the base logging interface as + well as the global that you use to instrument your code: + + + ILog log = LogManager.GetLogger(this.GetType()); + + log.DebugFormat("Hi {0}", "dude"); + + + To output the information logged, you need to tell Common.Logging, what underlying logging system + to use. Common.Logging already includes simple console and trace based logger implementations + usable out of the box. Adding the following configuration snippet to your app.config causes + Common.Logging to output all information to the console: + + + <configuration> + <configSections> + <sectionGroup name="common"> + <section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" /> + </sectionGroup> + </configSections> + + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging"> + <arg key="level" value="DEBUG" /> + </factoryAdapter> + </logging> + </common> + </configuration> + +

Customizing

+ + In the case you want to integrate your own logging system that is not supported by Common.Logging yet, it is easily + possible to implement your own plugin by implementing . + For convenience there is a base implementation available that usually + makes implementing your own adapter a breeze. + +

<system.diagnostics> Integration

+ + If your code already uses the .NET framework's built-in System.Diagnostics.Trace + system, you can use to redirect all trace output to the + Common.Logging infrastructure. + +
+
+ + + Base factory implementation for creating simple instances. + + Default settings are LogLevel.All, showDateTime = true, showLogName = true, and no DateTimeFormat. + The keys in the NameValueCollection to configure this adapter are the following + + level + showDateTime + showLogName + dateTimeFormat + + + Here is an example how to implement your own logging adapter: + + public class ConsoleOutLogger : AbstractSimpleLogger + { + public ConsoleOutLogger(string logName, LogLevel logLevel, bool showLevel, bool showDateTime, + bool showLogName, string dateTimeFormat) + : base(logName, logLevel, showLevel, showDateTime, showLogName, dateTimeFormat) + { + } + + protected override void WriteInternal(LogLevel level, object message, Exception e) + { + // Use a StringBuilder for better performance + StringBuilder sb = new StringBuilder(); + FormatOutput(sb, level, message, e); + + // Print to the appropriate destination + Console.Out.WriteLine(sb.ToString()); + } + } + + public class ConsoleOutLoggerFactoryAdapter : AbstractSimpleLoggerFactoryAdapter + { + public ConsoleOutLoggerFactoryAdapter(NameValueCollection properties) + : base(properties) + { } + + protected override ILog CreateLogger(string name, LogLevel level, bool showLevel, bool + showDateTime, bool showLogName, string dateTimeFormat) + { + ILog log = new ConsoleOutLogger(name, level, showLevel, showDateTime, showLogName, + dateTimeFormat); + return log; + } + } + + + + + + Gilles Bayon + Mark Pollack + Erich Eichinger + + + + Initializes a new instance of the class. + + + Looks for level, showDateTime, showLogName, dateTimeFormat items from + for use when the GetLogger methods are called. + for more information on how to use the + standard .NET application configuraiton file (App.config/Web.config) + to configure this adapter. + + The name value collection, typically specified by the user in + a configuration section named common/logging. + + + + Initializes a new instance of the class with + default settings for the loggers created by this factory. + + + + + Create the specified logger instance + + + + + Derived factories need to implement this method to create the + actual logger instance. + + a new logger instance. Must never be null! + + + + The default to use when creating new instances. + + + + + The default setting to use when creating new instances. + + + + + The default setting to use when creating new instances. + + + + + The default setting to use when creating new instances. + + + + + The default setting to use when creating new instances. + + + + + Sends log messages to . + + Gilles Bayon + + + + Creates and initializes a logger that writes messages to . + + The name, usually type name of the calling class, of the logger. + The current logging threshold. Messages recieved that are beneath this threshold will not be logged. + Include the current log level in the log message. + Include the current time in the log message. + Include the instance name in the log message. + The date and time format to use in the log message. + + + + Do the actual logging by constructing the log message using a then + sending the output to . + + The of the message. + The log message. + An optional associated with the message. + + + + Factory for creating instances that write data to . + + + + Below is an example how to configure this adapter: + + <configuration> + + <configSections> + <sectionGroup name="common"> + <section name="logging" + type="Common.Logging.ConfigurationSectionHandler, Common.Logging" + requirePermission="false" /> + </sectionGroup> + </configSections> + + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging"> + <arg key="level" value="ALL" /> + </factoryAdapter> + </logging> + </common> + + </configuration> + + + + + + + Gilles Bayon + Mark Pollack + Erich Eichinger + + + + Initializes a new instance of the class using default + settings. + + + + + Initializes a new instance of the class. + + + Looks for level, showDateTime, showLogName, dateTimeFormat items from + for use when the GetLogger methods are called. + for more information on how to use the + standard .NET application configuraiton file (App.config/Web.config) + to configure this adapter. + + The name value collection, typically specified by the user in + a configuration section named common/logging. + + + + Initializes a new instance of the class with + default settings for the loggers created by this factory. + + + + + Creates a new instance. + + + + + Silently ignores all log messages. + + Gilles Bayon + Erich Eichinger + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Warnrmation. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Warnrmation. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Errorrmation. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Errorrmation. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Fatalrmation. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Fatalrmation. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Always returns . + + + + + Always returns . + + + + + Always returns . + + + + + Always returns . + + + + + Always returns . + + + + + Always returns . + + + + + Factory for creating instances that silently ignores + logging requests. + + + This logger adapter is the default used by Common.Logging if unconfigured. Using this logger adapter is the most efficient + way to suppress any logging output. + + Below is an example how to configure this adapter: + + <configuration> + + <configSections> + <sectionGroup name="common"> + <section name="logging" + type="Common.Logging.ConfigurationSectionHandler, Common.Logging" + requirePermission="false" /> + </sectionGroup> + </configSections> + + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.NoOpLoggerFactoryAdapter, Common.Logging"> + <arg key="level" value="ALL" /> + </factoryAdapter> + </logging> + </common> + + </configuration> + + + + + + Gilles Bayon + + + + Constructor + + + + + Constructor + + + + + Get a ILog instance by type + + + + + + + Get a ILog instance by type name + + + + + + + Logger sending everything to the trace output stream using . + + + Beware not to use in combination with this logger as + this would result in an endless loop for obvious reasons! + + + + Gilles Bayon + Erich Eichinger + + + + Creates a new TraceLogger instance. + + whether to use or for logging. + the name of this logger + the default log level to use + Include the current log level in the log message. + Include the current time in the log message. + Include the instance name in the log message. + The date and time format to use in the log message. + + + + Determines if the given log level is currently enabled. + checks if is true. + + + + + Do the actual logging. + + + + + + + + Called after deserialization completed. + + + + + Used to defer message formatting until it is really needed. + + + This class also improves performance when multiple + s are configured. + + + + + Factory for creating instances that send + everything to the output stream. + + + Beware not to use in combination with this logger factory + as this would result in an endless loop for obvious reasons! + + Below is an example how to configure this adapter: + + <configuration> + + <configSections> + <sectionGroup name="common"> + <section name="logging" + type="Common.Logging.ConfigurationSectionHandler, Common.Logging" + requirePermission="false" /> + </sectionGroup> + </configSections> + + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.TraceLoggerFactoryAdapter, Common.Logging"> + <arg key="level" value="ALL" /> + </factoryAdapter> + </logging> + </common> + + </configuration> + + + + + + + Gilles Bayon + Mark Pollack + Erich Eichinger + + + + Initializes a new instance of the class using default settings. + + + + + Initializes a new instance of the class. + + + Looks for level, showDateTime, showLogName, dateTimeFormat items from + for use when the GetLogger methods are called. + for more information on how to use the + standard .NET application configuraiton file (App.config/Web.config) + to configure this adapter. + + The name value collection, typically specified by the user in + a configuration section named common/logging. + + + + Initializes a new instance of the class with + default settings for the loggers created by this factory. + + + + + Creates a new instance. + + + + + Whether to use .TraceXXXX(string,object[]) methods for logging + or . + + + + + Indicates classes or members to be ignored by NCover + + + Note, the name is chosen, because TestDriven.NET uses it as //ea argument to "Test With... Coverage" + + Erich Eichinger + +
+
diff --git a/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.dll b/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.dll new file mode 100644 index 0000000..d5a9c97 Binary files /dev/null and b/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.dll differ diff --git a/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.pdb b/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.pdb new file mode 100644 index 0000000..c0a1bda Binary files /dev/null and b/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.pdb differ diff --git a/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.xml b/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.xml new file mode 100644 index 0000000..caaa614 --- /dev/null +++ b/packages/Common.Logging.2.1.2/lib/net40/Common.Logging.xml @@ -0,0 +1,3304 @@ + + + + Common.Logging + + + + + This assembly contains the core functionality of the Common.Logging framework. + In particular, checkout and for usage information. + + + + + Provides base implementation suitable for almost all logger adapters + + Erich Eichinger + + + + A simple logging interface abstracting logging APIs. + + + + Implementations should defer calling a message's until the message really needs + to be logged to avoid performance penalties. + + + Each log method offers to pass in a instead of the actual message. + Using this style has the advantage to defer possibly expensive message argument evaluation and formatting (and formatting arguments!) until the message gets + actually logged. If the message is not logged at all (e.g. due to settings), + you won't have to pay the peformance penalty of creating the message. + + + + The example below demonstrates using callback style for creating the message, where the call to the + and the underlying only happens, if level is enabled: + + Log.Debug( m=>m("result is {0}", random.NextDouble()) ); + Log.Debug(delegate(m) { m("result is {0}", random.NextDouble()); }); + + + + Mark Pollack + Bruno Baia + Erich Eichinger + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Checks if this logger is enabled for the level. + + + + + Holds the method for writing a message to the log system. + + + + + Creates a new logger instance using for + writing log events to the underlying log system. + + + + + + Override this method to use a different method than + for writing log events to the underlying log system. + + + Usually you don't need to override thise method. The default implementation returns + null to indicate that the default handler should be + used. + + + + + Actually sends the message to the underlying log system. + + the level of this log event. + the message to log + the exception to log (may be null) + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack trace of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack trace. + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Debug of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Debug. + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Info of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Info. + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Warn of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Warn. + + + + Log a message with the level. + + An that supplies culture-specific formatting Warnrmation. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting Warnrmation. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Error of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Error. + + + + Log a message with the level. + + An that supplies culture-specific formatting Errorrmation. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting Errorrmation. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Log a message object with the level. + + The message object to log. + + + + Log a message object with the level including + the stack Fatal of the passed + as a parameter. + + The message object to log. + The exception to log, including its stack Fatal. + + + + Log a message with the level. + + An that supplies culture-specific formatting Fatalrmation. + The format of the message object to log. + + + + + Log a message with the level. + + An that supplies culture-specific formatting Fatalrmation. + The format of the message object to log. + The exception to log. + + + + + Log a message with the level. + + The format of the message object to log. + the list of format arguments + + + + Log a message with the level. + + The format of the message object to log. + The exception to log. + the list of format arguments + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Log a message with the level using a callback to obtain the message + + + Using this method avoids the cost of creating a message and evaluating message arguments + that probably won't be logged due to loglevel settings. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Checks if this logger is enabled for the level. + + + Override this in your derived class to comply with the underlying logging system + + + + + Format message on demand. + + + + + Initializes a new instance of the class. + + The format message callback. + + + + Initializes a new instance of the class. + + The format provider. + The format message callback. + + + + Calls and returns result. + + + + + + Format string on demand. + + + + + Initializes a new instance of the class. + + The format provider. + The message. + The args. + + + + Runs on supplied arguemnts. + + string + + + + Represents a method responsible for writing a message to the log system. + + + + + An implementation of that caches loggers handed out by this factory. + + + Implementors just need to override . + + Erich Eichinger + + + + LoggerFactoryAdapter interface is used internally by LogManager + Only developers wishing to write new Common.Logging adapters need to + worry about this interface. + + Gilles Bayon + + + + Get a ILog instance by type. + + The type to use for the logger + + + + + Get a ILog instance by name. + + The name of the logger + + + + + Creates a new instance, the logger cache being case-sensitive. + + + + + Creates a new instance, the logger cache being . + + + + + + Purges all loggers from cache + + + + + Create the specified named logger instance + + + Derived factories need to implement this method to create the + actual logger instance. + + + + + Get a ILog instance by . + + Usually the of the current class. + + An ILog instance either obtained from the internal cache or created by a call to . + + + + + Get a ILog instance by name. + + Usually a 's Name or FullName property. + + An ILog instance either obtained from the internal cache or created by a call to . + + + + + Get or create a ILog instance by name. + + Usually a 's Name or FullName property. + + An ILog instance either obtained from the internal cache or created by a call to . + + + + + A logger created by that + sends all log events to the owning adapter's + + Erich Eichinger + + + + Abstract class providing a standard implementation of simple loggers. + + Erich Eichinger + + + + Creates and initializes a the simple logger. + + The name, usually type name of the calling class, of the logger. + The current logging threshold. Messages recieved that are beneath this threshold will not be logged. + Include level in the log message. + Include the current time in the log message. + Include the instance name in the log message. + The date and time format to use in the log message. + + + + Appends the formatted message to the specified . + + the that receíves the formatted message. + + + + + + + Determines if the given log level is currently enabled. + + + + + + + The name of the logger. + + + + + Include the current log level in the log message. + + + + + Include the current time in the log message. + + + + + Include the instance name in the log message. + + + + + The current logging threshold. Messages recieved that are beneath this threshold will not be logged. + + + + + The date and time format to use in the log message. + + + + + Determines Whether is set. + + + + + Returns if the current is greater than or + equal to . If it is, all messages will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, all messages will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, only messages with a of + , , , and + will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, only messages with a of + , , and + will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, only messages with a of + and will be sent to . + + + + + Returns if the current is greater than or + equal to . If it is, only messages with a of + will be sent to . + + + + + The adapter that created this logger instance. + + + + + Clears all captured events + + + + + Resets the to null. + + + + + Holds the list of logged events. + + + To access this collection in a multithreaded application, put a lock on the list instance. + + + + + instances send their captured log events to this method. + + + + + Create a new logger instance. + + + + + Create a new and send it to + + + + + + + + Holds the last log event received from any of this adapter's loggers. + + + + + A logging event captured by + + Erich Eichinger + + + + The logger that logged this event + + + + + The level used to log this event + + + + + The raw message object + + + + + A logged exception + + + + + Create a new event instance + + + + + Retrieves the formatted message text + + + + + An adapter, who's loggers capture all log events and send them to . + Retrieve the list of log events from . + + + This logger factory is mainly for debugging and test purposes. + + This is an example how you might use this adapter for testing: + + // configure for capturing + CapturingLoggerFactoryAdapter adapter = new CapturingLoggerFactoryAdapter(); + LogManager.Adapter = adapter; + + // reset capture state + adapter.Clear(); + // log something + ILog log = LogManager.GetCurrentClassLogger(); + log.DebugFormat("Current Time:{0}", DateTime.Now); + + // check logged data + Assert.AreEqual(1, adapter.LoggerEvents.Count); + Assert.AreEqual(LogLevel.Debug, adapter.LastEvent.Level); + + + + Erich Eichinger + + + + Clears all captured events + + + + + Resets the to null. + + + + + Holds the list of logged events. + + + To access this collection in a multithreaded application, put a lock on the list instance. + + + + + instances send their captured log events to this method. + + + + + Get a instance for the given type. + + + + + Get a instance for the given name. + + + + + Holds the last log event received from any of this adapter's loggers. + + + + + A implementation sending all System.Diagnostics.Trace output to + the Common.Logging infrastructure. + + + This listener captures all output sent by calls to System.Diagnostics.Trace and + and and sends it to an instance.
+ The instance to be used is obtained by calling + . The name of the logger is created by passing + this listener's and any source or category passed + into this listener (see or for example). +
+ + The snippet below shows how to add and configure this listener to your app.config: + + <system.diagnostics> + <sharedListeners> + <add name="Diagnostics" + type="Common.Logging.Simple.CommonLoggingTraceListener, Common.Logging" + initializeData="DefaultTraceEventType=Information; LoggerNameFormat={listenerName}.{sourceName}"> + <filter type="System.Diagnostics.EventTypeFilter" initializeData="Information"/> + </add> + </sharedListeners> + <trace> + <listeners> + <add name="Diagnostics" /> + </listeners> + </trace> + </system.diagnostics> + + + Erich Eichinger +
+ + + Creates a new instance with the default name "Diagnostics" and "Trace". + + + + + Creates a new instance initialized with properties from the . string. + + + is a semicolon separated string of name/value pairs, where each pair has + the form key=value. E.g. + "Name=MyLoggerName;LogLevel=Debug" + + a semicolon separated list of name/value pairs. + + + + Creates a new instance initialized with the specified properties. + + name/value configuration properties. + + + + Logs the given message to the Common.Logging infrastructure. + + the eventType + the name or category name passed into e.g. . + the id of this event + the message format + the message arguments + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by . + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Writes message to logger provided by + + + + + Sets the default to use for logging + all events emitted by .Write(...) and + .WriteLine(...) methods. + + + This listener captures all output sent by calls to and + sends it to an instance using the specified + on . + + + + + Format to use for creating the logger name. Defaults to "{listenerName}.{sourceName}". + + + Available placeholders are: + + {listenerName}: the configured name of this listener instance. + {sourceName}: the trace source name an event originates from (see e.g. . + + + + + + The exception that is thrown when a configuration system error has occurred with Common.Logging + + Mark Pollack + + + Creates a new instance of the ObjectsException class. + + + + Creates a new instance of the ConfigurationException class. with the specified message. + + + A message about the exception. + + + + + Creates a new instance of the ConfigurationException class with the specified message + and root cause. + + + A message about the exception. + + + The root exception that is being wrapped. + + + + + Creates a new instance of the ConfigurationException class. + + + The + that holds the serialized object data about the exception being thrown. + + + The + that contains contextual information about the source or destination. + + + + + Implementation of that uses the standard .NET + configuration APIs, ConfigurationSettings in 1.x and ConfigurationManager in 2.0 + + Mark Pollack + + + + Interface for basic operations to read .NET application configuration information. + + Provides a simple abstraction to handle BCL API differences between .NET 1.x and 2.0. Also + useful for testing scenarios. + Mark Pollack + + + + Parses the configuration section and returns the resulting object. + + +

+ Primary purpose of this method is to allow us to parse and + load configuration sections using the same API regardless + of the .NET framework version. +

+
+ Name of the configuration section. + Object created by a corresponding . + +
+ + + Parses the configuration section and returns the resulting object. + + Name of the configuration section. + + Object created by a corresponding . + + +

+ Primary purpose of this method is to allow us to parse and + load configuration sections using the same API regardless + of the .NET framework version. +

+
+ +
+ + + This namespace contains convenience base classes for implementing your own s. + + + + + Various utility methods for using during factory and logger instance configuration + + Erich Eichinger + + + + Initialize all members before any of this class' methods can be accessed (avoids beforeFieldInit) + + + + + Adds the parser to the list of known type parsers. + + + .NET intrinsic types are pre-registerd: short, int, long, float, double, decimal, bool + + + + + Retrieves the named value from the specified . + + may be null + the value's key + if is not null, the value returned by values[name]. null otherwise. + + + + Retrieves the named value from the specified . + + may be null + the value's key + the default value, if not found + if is not null, the value returned by values[name]. null otherwise. + + + + Returns the first nonnull, nonempty value among its arguments. + + + Returns null, if the initial list was null or empty. + + + + + + Returns the first nonnull, nonempty value among its arguments. + + + Also + + + + + Tries parsing into an enum of the type of . + + the default value to return if parsing fails + the string value to parse + the successfully parsed value, otherwise. + + + + Tries parsing into the specified return type. + + the default value to return if parsing fails + the string value to parse + the successfully parsed value, otherwise. + + + + Throws a if is null. + + + + + Throws a if is null. + + + + + Throws a if an object of type is not + assignable to type . + + + + + Throws a if an object of type is not + assignable to type . + + + + + Ensures any exception thrown by the given is wrapped with an + . + + + If already throws a ConfigurationException, it will not be wrapped. + + the action to execute + the message to be set on the thrown + args to be passed to to format the message + + + + Ensures any exception thrown by the given is wrapped with an + . + + + If already throws a ConfigurationException, it will not be wrapped. + + the action to execute + the message to be set on the thrown + args to be passed to to format the message + + + + A delegate converting a string representation into the target type + + + + + An anonymous action delegate with no arguments and no return value. + + + + + + An anonymous action delegate with no arguments and no return value. + + + + + + The type of method that is passed into e.g. + and allows the callback method to "submit" it's message to the underlying output system. + + the format argument as in + the argument list as in + + Erich Eichinger + + + + Used in an application's configuration file (App.Config or Web.Config) to configure the logging subsystem. + + + An example configuration section that writes log messages to the Console using the + built-in Console Logger. + + <configuration> + <configSections> + <sectionGroup name="common"> + <section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" /> + </sectionGroup> + </configSections> + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging"> + <arg key="showLogName" value="true" /> + <arg key="showDataTime" value="true" /> + <arg key="level" value="ALL" /> + <arg key="dateTimeFormat" value="yyyy/MM/dd HH:mm:ss:fff" /> + </factoryAdapter> + </logging> + </common> + </configuration> + + + + + + Ensure static fields get initialized before any class member + can be accessed (avoids beforeFieldInit) + + + + + Constructor + + + + + Retrieves the of the logger the use by looking at the logFactoryAdapter element + of the logging configuration element. + + + + A object containing the specified type that implements + along with zero or more properties that will be + passed to the logger factory adapter's constructor as an . + + + + + Verifies that the logFactoryAdapter element appears once in the configuration section. + + settings of a parent section - atm this must always be null + Additional information about the configuration process. + The configuration section to apply an XPath query too. + + A object containing the specified logFactoryAdapter type + along with user supplied configuration properties. + + + + + Verifies that the logFactoryAdapter element appears once in the configuration section. + + The parent of the current item. + Additional information about the configuration process. + The configuration section to apply an XPath query too. + + A object containing the specified logFactoryAdapter type + along with user supplied configuration properties. + + + + + The 7 possible logging levels + + Gilles Bayon + + + + All logging levels + + + + + A trace logging level + + + + + A debug logging level + + + + + A info logging level + + + + + A warn logging level + + + + + An error logging level + + + + + A fatal logging level + + + + + Do not log anything. + + + + + Use the LogManager's or + methods to obtain instances for logging. + + + For configuring the underlying log system using application configuration, see the example + at . + For configuring programmatically, see the example section below. + + + The example below shows the typical use of LogManager to obtain a reference to a logger + and log an exception: + + + ILog log = LogManager.GetLogger(this.GetType()); + ... + try + { + /* .... */ + } + catch(Exception ex) + { + log.ErrorFormat("Hi {0}", ex, "dude"); + } + + + The example below shows programmatic configuration of the underlying log system: + + + // create properties + NameValueCollection properties = new NameValueCollection(); + properties["showDateTime"] = "true"; + + // set Adapter + Common.Logging.LogManager.Adapter = new + Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter(properties); + + + + + + + + Gilles Bayon + + + + The name of the default configuration section to read settings from. + + + You can always change the source of your configuration settings by setting another instance + on . + + + + + Performs static 1-time init of LogManager by calling + + + + + Reset the infrastructure to its default settings. This means, that configuration settings + will be re-read from section <common/logging> of your app.config. + + + This is mainly used for unit testing, you wouldn't normally use this in your applications.
+ Note: instances already handed out from this LogManager are not(!) affected. + Resetting LogManager only affects new instances being handed out. +
+
+ + + Reset the infrastructure to its default settings. This means, that configuration settings + will be re-read from section <common/logging> of your app.config. + + + This is mainly used for unit testing, you wouldn't normally use this in your applications.
+ Note: instances already handed out from this LogManager are not(!) affected. + Resetting LogManager only affects new instances being handed out. +
+ + the instance to obtain settings for + re-initializing the LogManager. + +
+ + + Gets the logger by calling + on the currently configured using the type of the calling class. + + + This method needs to inspect the in order to determine the calling + class. This of course comes with a performance penalty, thus you shouldn't call it too + often in your application. + + + the logger instance obtained from the current + + + + Gets the logger by calling + on the currently configured using the specified type. + + the logger instance obtained from the current + + + + Gets the logger by calling + on the currently configured using the specified type. + + The type. + the logger instance obtained from the current + + + + Gets the logger by calling + on the currently configured using the specified name. + + The name. + the logger instance obtained from the current + + + + Builds the logger factory adapter. + + a factory adapter instance. Is never null. + + + + Builds a instance from the given + using . + + + the instance. Is never null + + + + Gets the configuration reader used to initialize the LogManager. + + Primarily used for testing purposes but maybe useful to obtain configuration + information from some place other than the .NET application configuration file. + The configuration reader. + + + + Gets or sets the adapter. + + The adapter. + + + + Container used to hold configuration information from config file. + + Gilles Bayon + + + + + + + The type + that will be used for creating + + + Additional user supplied properties that are passed to the + 's constructor. + + + + + The type that will be used for creating + instances. + + + + + Additional user supplied properties that are passed to the 's constructor. + + + + + This namespace contains all core classes making up the Common.Logging framework. + + + + + This namespace contains out-of-the-box adapters to intrinsic systems, namely + , and the + all output suppressing . + For unit testing, you may also want to have a look at + that allows to easily inspect logged messages. + To route messages logged through the infrastructure back into + Common.Logging, you can use + + + + + This namespace contains various utility classes. + + + + +

Overview

+ + There are a variety of logging implementations for .NET currently in use, log4net, Enterprise + Library Logging, NLog, to name the most popular. The downside of having differerent implementation + is that they do not share a common interface and therefore impose a particular logging + implementation on the users of your library. To solve this dependency problem the Common.Logging + library introduces a simple abstraction to allow you to select a specific logging implementation at + runtime. + + + The library is based on work done by the developers of IBatis.NET and it's usage is inspired by + log4net. Many thanks to the developers of those projects! + +

Usage

+ + The core logging library Common.Logging provides the base logging interface as + well as the global that you use to instrument your code: + + + ILog log = LogManager.GetLogger(this.GetType()); + + log.DebugFormat("Hi {0}", "dude"); + + + To output the information logged, you need to tell Common.Logging, what underlying logging system + to use. Common.Logging already includes simple console and trace based logger implementations + usable out of the box. Adding the following configuration snippet to your app.config causes + Common.Logging to output all information to the console: + + + <configuration> + <configSections> + <sectionGroup name="common"> + <section name="logging" type="Common.Logging.ConfigurationSectionHandler, Common.Logging" /> + </sectionGroup> + </configSections> + + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging"> + <arg key="level" value="DEBUG" /> + </factoryAdapter> + </logging> + </common> + </configuration> + +

Customizing

+ + In the case you want to integrate your own logging system that is not supported by Common.Logging yet, it is easily + possible to implement your own plugin by implementing . + For convenience there is a base implementation available that usually + makes implementing your own adapter a breeze. + +

<system.diagnostics> Integration

+ + If your code already uses the .NET framework's built-in System.Diagnostics.Trace + system, you can use to redirect all trace output to the + Common.Logging infrastructure. + +
+
+ + + Base factory implementation for creating simple instances. + + Default settings are LogLevel.All, showDateTime = true, showLogName = true, and no DateTimeFormat. + The keys in the NameValueCollection to configure this adapter are the following + + level + showDateTime + showLogName + dateTimeFormat + + + Here is an example how to implement your own logging adapter: + + public class ConsoleOutLogger : AbstractSimpleLogger + { + public ConsoleOutLogger(string logName, LogLevel logLevel, bool showLevel, bool showDateTime, + bool showLogName, string dateTimeFormat) + : base(logName, logLevel, showLevel, showDateTime, showLogName, dateTimeFormat) + { + } + + protected override void WriteInternal(LogLevel level, object message, Exception e) + { + // Use a StringBuilder for better performance + StringBuilder sb = new StringBuilder(); + FormatOutput(sb, level, message, e); + + // Print to the appropriate destination + Console.Out.WriteLine(sb.ToString()); + } + } + + public class ConsoleOutLoggerFactoryAdapter : AbstractSimpleLoggerFactoryAdapter + { + public ConsoleOutLoggerFactoryAdapter(NameValueCollection properties) + : base(properties) + { } + + protected override ILog CreateLogger(string name, LogLevel level, bool showLevel, bool + showDateTime, bool showLogName, string dateTimeFormat) + { + ILog log = new ConsoleOutLogger(name, level, showLevel, showDateTime, showLogName, + dateTimeFormat); + return log; + } + } + + + + + + Gilles Bayon + Mark Pollack + Erich Eichinger + + + + Initializes a new instance of the class. + + + Looks for level, showDateTime, showLogName, dateTimeFormat items from + for use when the GetLogger methods are called. + for more information on how to use the + standard .NET application configuraiton file (App.config/Web.config) + to configure this adapter. + + The name value collection, typically specified by the user in + a configuration section named common/logging. + + + + Initializes a new instance of the class with + default settings for the loggers created by this factory. + + + + + Create the specified logger instance + + + + + Derived factories need to implement this method to create the + actual logger instance. + + a new logger instance. Must never be null! + + + + The default to use when creating new instances. + + + + + The default setting to use when creating new instances. + + + + + The default setting to use when creating new instances. + + + + + The default setting to use when creating new instances. + + + + + The default setting to use when creating new instances. + + + + + Sends log messages to . + + Gilles Bayon + + + + Creates and initializes a logger that writes messages to . + + The name, usually type name of the calling class, of the logger. + The current logging threshold. Messages recieved that are beneath this threshold will not be logged. + Include the current log level in the log message. + Include the current time in the log message. + Include the instance name in the log message. + The date and time format to use in the log message. + + + + Do the actual logging by constructing the log message using a then + sending the output to . + + The of the message. + The log message. + An optional associated with the message. + + + + Factory for creating instances that write data to . + + + + Below is an example how to configure this adapter: + + <configuration> + + <configSections> + <sectionGroup name="common"> + <section name="logging" + type="Common.Logging.ConfigurationSectionHandler, Common.Logging" + requirePermission="false" /> + </sectionGroup> + </configSections> + + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.ConsoleOutLoggerFactoryAdapter, Common.Logging"> + <arg key="level" value="ALL" /> + </factoryAdapter> + </logging> + </common> + + </configuration> + + + + + + + Gilles Bayon + Mark Pollack + Erich Eichinger + + + + Initializes a new instance of the class using default + settings. + + + + + Initializes a new instance of the class. + + + Looks for level, showDateTime, showLogName, dateTimeFormat items from + for use when the GetLogger methods are called. + for more information on how to use the + standard .NET application configuraiton file (App.config/Web.config) + to configure this adapter. + + The name value collection, typically specified by the user in + a configuration section named common/logging. + + + + Initializes a new instance of the class with + default settings for the loggers created by this factory. + + + + + Creates a new instance. + + + + + Silently ignores all log messages. + + Gilles Bayon + Erich Eichinger + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack trace. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Debug. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting information. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Info. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Warnrmation. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Warnrmation. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Warn. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Errorrmation. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Errorrmation. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Error. + + + + Ignores message. + + + + + + Ignores message. + + + + + + + Ignores message. + + The format of the message object to log. + + + + + Ignores message. + + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Fatalrmation. + The format of the message object to log. + the list of message format arguments + + + + Ignores message. + + An that supplies culture-specific formatting Fatalrmation. + The format of the message object to log. + The exception to log. + the list of message format arguments + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + + + + Ignores message. + + An that supplies culture-specific formatting information. + A callback used by the logger to obtain the message if log level is matched + The exception to log, including its stack Fatal. + + + + Always returns . + + + + + Always returns . + + + + + Always returns . + + + + + Always returns . + + + + + Always returns . + + + + + Always returns . + + + + + Factory for creating instances that silently ignores + logging requests. + + + This logger adapter is the default used by Common.Logging if unconfigured. Using this logger adapter is the most efficient + way to suppress any logging output. + + Below is an example how to configure this adapter: + + <configuration> + + <configSections> + <sectionGroup name="common"> + <section name="logging" + type="Common.Logging.ConfigurationSectionHandler, Common.Logging" + requirePermission="false" /> + </sectionGroup> + </configSections> + + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.NoOpLoggerFactoryAdapter, Common.Logging"> + <arg key="level" value="ALL" /> + </factoryAdapter> + </logging> + </common> + + </configuration> + + + + + + Gilles Bayon + + + + Constructor + + + + + Constructor + + + + + Get a ILog instance by type + + + + + + + Get a ILog instance by type name + + + + + + + Logger sending everything to the trace output stream using . + + + Beware not to use in combination with this logger as + this would result in an endless loop for obvious reasons! + + + + Gilles Bayon + Erich Eichinger + + + + Creates a new TraceLogger instance. + + whether to use or for logging. + the name of this logger + the default log level to use + Include the current log level in the log message. + Include the current time in the log message. + Include the instance name in the log message. + The date and time format to use in the log message. + + + + Determines if the given log level is currently enabled. + checks if is true. + + + + + Do the actual logging. + + + + + + + + Called after deserialization completed. + + + + + Used to defer message formatting until it is really needed. + + + This class also improves performance when multiple + s are configured. + + + + + Factory for creating instances that send + everything to the output stream. + + + Beware not to use in combination with this logger factory + as this would result in an endless loop for obvious reasons! + + Below is an example how to configure this adapter: + + <configuration> + + <configSections> + <sectionGroup name="common"> + <section name="logging" + type="Common.Logging.ConfigurationSectionHandler, Common.Logging" + requirePermission="false" /> + </sectionGroup> + </configSections> + + <common> + <logging> + <factoryAdapter type="Common.Logging.Simple.TraceLoggerFactoryAdapter, Common.Logging"> + <arg key="level" value="ALL" /> + </factoryAdapter> + </logging> + </common> + + </configuration> + + + + + + + Gilles Bayon + Mark Pollack + Erich Eichinger + + + + Initializes a new instance of the class using default settings. + + + + + Initializes a new instance of the class. + + + Looks for level, showDateTime, showLogName, dateTimeFormat items from + for use when the GetLogger methods are called. + for more information on how to use the + standard .NET application configuraiton file (App.config/Web.config) + to configure this adapter. + + The name value collection, typically specified by the user in + a configuration section named common/logging. + + + + Initializes a new instance of the class with + default settings for the loggers created by this factory. + + + + + Creates a new instance. + + + + + Whether to use .TraceXXXX(string,object[]) methods for logging + or . + + + + + Indicates classes or members to be ignored by NCover + + + Note, the name is chosen, because TestDriven.NET uses it as //ea argument to "Test With... Coverage" + + Erich Eichinger + +
+
diff --git a/packages/Lucene.Net.3.0.3/Lucene.Net.3.0.3.nupkg b/packages/Lucene.Net.3.0.3/Lucene.Net.3.0.3.nupkg new file mode 100644 index 0000000..c8314d2 Binary files /dev/null and b/packages/Lucene.Net.3.0.3/Lucene.Net.3.0.3.nupkg differ diff --git a/packages/Lucene.Net.3.0.3/Lucene.Net.3.0.3.nuspec b/packages/Lucene.Net.3.0.3/Lucene.Net.3.0.3.nuspec new file mode 100644 index 0000000..e1b8ee1 --- /dev/null +++ b/packages/Lucene.Net.3.0.3/Lucene.Net.3.0.3.nuspec @@ -0,0 +1,25 @@ + + + + Lucene.Net + 3.0.3 + Lucene.Net + Lucene.Net Community + Lucene.Net Community + http://www.apache.org/licenses/LICENSE-2.0.html + http://lucenenet.apache.org/ + https://svn.apache.org/repos/asf/lucene.net/trunk/branding/logo/lucene-net-icon-128x128.png + false + Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users. + This package contains only the core Lucene.Net assembly. + Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users. + Copyright 2006-2012 The Apache Software Foundation + lucene.net core text search information retrieval lucene apache + + + + + + + + \ No newline at end of file diff --git a/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.XML b/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.XML new file mode 100644 index 0000000..5303121 --- /dev/null +++ b/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.XML @@ -0,0 +1,21744 @@ + + + + Lucene.Net + + + + An Analyzer builds TokenStreams, which analyze text. It thus represents a + policy for extracting index terms from text. +

+ Typical implementations first build a Tokenizer, which breaks the stream of + characters from the Reader into raw Tokens. One or more TokenFilters may + then be applied to the output of the Tokenizer. +

+
+ + Creates a TokenStream which tokenizes all the text in the provided + Reader. Must be able to handle null field name for + backward compatibility. + + + + Creates a TokenStream that is allowed to be re-used + from the previous time that the same thread called + this method. Callers that do not need to use more + than one TokenStream at the same time from this + analyzer should use this method for better + performance. + + + + This is only present to preserve + back-compat of classes that subclass a core analyzer + and override tokenStream but not reusableTokenStream + + + Java uses Class<? extends Analyer> to constrain to + only Types that inherit from Analyzer. C# does not have a generic type class, + ie Type<t>. The method signature stays the same, and an exception may + still be thrown, if the method doesn't exist. + + + + Invoked before indexing a Fieldable instance if + terms have already been added to that field. This allows custom + analyzers to place an automatic position increment gap between + Fieldable instances using the same field name. The default value + position increment gap is 0. With a 0 position increment gap and + the typical default token position increment of 1, all terms in a field, + including across Fieldable instances, are in successive positions, allowing + exact PhraseQuery matches, for instance, across Fieldable instance boundaries. + + + Fieldable name being indexed. + + position increment gap, added to the next token emitted from + + + + Just like , except for + Token offsets instead. By default this returns 1 for + tokenized fields and, as if the fields were joined + with an extra space character, and 0 for un-tokenized + fields. This method is only called if the field + produced at least one token for indexing. + + + the field just indexed + + offset gap, added to the next token emitted from + + + + Frees persistent resources used by this Analyzer + + + Used by Analyzers that implement reusableTokenStream + to retrieve previously saved TokenStreams for re-use + by the same thread. + + + + This class converts alphabetic, numeric, and symbolic Unicode characters + which are not in the first 127 ASCII characters (the "Basic Latin" Unicode + block) into their ASCII equivalents, if one exists. + + Characters from the following Unicode blocks are converted; however, only + those characters with reasonable ASCII alternatives are converted: + + + C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf + Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf + Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf + Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf + Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf + Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf + IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf + Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf + Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf + General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf + Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf + Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf + Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf + Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf + Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf + Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf + + + See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode + + The set of character conversions supported by this class is a superset of + those supported by Lucene's which strips + accents from Latin1 characters. For example, 'À' will be replaced by + 'a'. + + + + A TokenFilter is a TokenStream whose input is another TokenStream. +

+ This is an abstract class; subclasses must override . + +

+ + +
+ + A TokenStream enumerates the sequence of tokens, either from + s of a or from query text. +

+ This is an abstract class. Concrete subclasses are: + + , a TokenStream whose input is a Reader; and + , a TokenStream whose input is another + TokenStream. + + A new TokenStream API has been introduced with Lucene 2.9. This API + has moved from being based to based. While + still exists in 2.9 as a convenience class, the preferred way + to store the information of a is to use s. +

+ TokenStream now extends , which provides + access to all of the token s for the TokenStream. + Note that only one instance per is created and reused + for every token. This approach reduces object creation and allows local + caching of references to the s. See + for further details. +

+ The workflow of the new TokenStream API is as follows: + + Instantiation of TokenStream/s which add/get + attributes to/from the . + The consumer calls . + The consumer retrieves attributes from the stream and stores local + references to all attributes it wants to access + The consumer calls until it returns false and + consumes the attributes after each call. + The consumer calls so that any end-of-stream operations + can be performed. + The consumer calls to release any resource when finished + using the TokenStream + + To make sure that filters and consumers know which attributes are available, + the attributes must be added during instantiation. Filters and consumers are + not required to check for availability of attributes in + . +

+ You can find some example code for the new API in the analysis package level + Javadoc. +

+ Sometimes it is desirable to capture a current state of a TokenStream + , e. g. for buffering purposes (see , + ). For this usecase + and + can be used. +

+
+ + An AttributeSource contains a list of different s, + and methods to add and get them. There can only be a single instance + of an attribute in the same AttributeSource instance. This is ensured + by passing in the actual type of the Attribute (Class<Attribute>) to + the , which then checks if an instance of + that type is already present. If yes, it returns the instance, otherwise + it creates a new instance and returns it. + + + + An AttributeSource using the default attribute factory . + + + An AttributeSource that uses the same attributes as the supplied one. + + + An AttributeSource using the supplied for creating new instances. + + + Returns a new iterator that iterates the attribute classes + in the same order they were added in. + Signature for Java 1.5: public Iterator<Class<? extends Attribute>> getAttributeClassesIterator() + + Note that this return value is different from Java in that it enumerates over the values + and not the keys + + + + Returns a new iterator that iterates all unique Attribute implementations. + This iterator may contain less entries that , + if one instance implements more than one Attribute interface. + Signature for Java 1.5: public Iterator<AttributeImpl> getAttributeImplsIterator() + + + + a cache that stores all interfaces for known implementation classes for performance (slow reflection) + + + + Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. +

Please note: It is not guaranteed, that att is added to + the AttributeSource, because the provided attributes may already exist. + You should always retrieve the wanted attributes using after adding + with this method and cast to your class. + The recommended way to use custom implementations is using an +

+
+
+ + The caller must pass in a Class<? extends Attribute> value. + This method first checks if an instance of that class is + already in this AttributeSource and returns it. Otherwise a + new instance is created, added to this AttributeSource and returned. + + + + The caller must pass in a Class<? extends Attribute> value. + Returns true, iff this AttributeSource contains the passed-in Attribute. + \ + + + + The caller must pass in a Class<? extends Attribute> value. + Returns the instance of the passed in Attribute contained in this AttributeSource + + + IllegalArgumentException if this AttributeSource does not contain the Attribute. + It is recommended to always use even in consumers + of TokenStreams, because you cannot know if a specific TokenStream really uses + a specific Attribute. will automatically make the attribute + available. If you want to only use the attribute, if it is available (to optimize + consuming), use . + + + + Resets all Attributes in this AttributeSource by calling + on each Attribute implementation. + + + + Captures the state of all Attributes. The return value can be passed to + to restore the state of this or another AttributeSource. + + + + Restores this state by copying the values of all attribute implementations + that this state contains into the attributes implementations of the targetStream. + The targetStream must contain a corresponding instance for each argument + contained in this state (e.g. it is not possible to restore the state of + an AttributeSource containing a TermAttribute into a AttributeSource using + a Token instance as implementation). + + Note that this method does not affect attributes of the targetStream + that are not contained in this state. In other words, if for example + the targetStream contains an OffsetAttribute, but this state doesn't, then + the value of the OffsetAttribute remains unchanged. It might be desirable to + reset its value to the default, in which case the caller should first + call on the targetStream. + + + + Performs a clone of all instances returned in a new + AttributeSource instance. This method can be used to e.g. create another TokenStream + with exactly the same attributes (using ) + + + + Returns the used AttributeFactory. + + + Returns true, iff this AttributeSource has any attributes + + + An AttributeFactory creates instances of s. + + + returns an for the supplied interface class. + + + This is the default factory that creates s using the + class name of the supplied interface class by appending Impl to it. + + + + This class holds the state of an AttributeSource. + + + + + + + A TokenStream using the default attribute factory. + + + A TokenStream that uses the same attributes as the supplied one. + + + A TokenStream using the supplied AttributeFactory for creating new instances. + + + Consumers (i.e., ) use this method to advance the stream to + the next token. Implementing classes must implement this method and update + the appropriate s with the attributes of the next + token. + + The producer must make no assumptions about the attributes after the + method has been returned: the caller may arbitrarily change it. If the + producer needs to preserve the state for subsequent calls, it can use + to create a copy of the current attribute state. + + This method is called for every token of a document, so an efficient + implementation is crucial for good performance. To avoid calls to + and , + references to all s that this stream uses should be + retrieved during instantiation. + + To ensure that filters and consumers know which attributes are available, + the attributes must be added during instantiation. Filters and consumers + are not required to check for availability of attributes in + . + + + false for end of stream; true otherwise + + + This method is called by the consumer after the last token has been + consumed, after returned false + (using the new TokenStream API). Streams implementing the old API + should upgrade to use this feature. +

+ This method can be used to perform any end-of-stream operations, such as + setting the final offset of a stream. The final offset of a stream might + differ from the offset of the last token eg in case one or more whitespaces + followed after the last token, but a was used. + +

+ IOException +
+ + Resets this stream to the beginning. This is an optional operation, so + subclasses may or may not implement this method. is not needed for + the standard indexing process. However, if the tokens of a + TokenStream are intended to be consumed more than once, it is + necessary to implement . Note that if your TokenStream + caches tokens and feeds them back again after a reset, it is imperative + that you clone the tokens when you store them away (on the first pass) as + well as when you return them (on future passes after ). + + + + Releases resources associated with this stream. + + + The source of tokens for this filter. + + + Construct a token stream filtering the given input. + + + Performs end-of-stream operations, if any, and calls then end() on the + input TokenStream.

+ NOTE: Be sure to call super.end() first when overriding this method. +

+
+ + Reset the filter as well as the input TokenStream. + + + Converts characters above ASCII to their ASCII equivalents. For example, + accents are removed from accented characters. + + The string to fold + + The number of characters in the input string + + + + + * Base utility class for implementing a . + * You subclass this, and then record mappings by calling + * , and then invoke the correct + * method to correct an offset. + + + + Subclasses of CharFilter can be chained to filter CharStream. + They can be used as with additional offset + correction. s will automatically use + if a CharFilter/CharStream subclass is used. + + + $Id$ + + + + + CharStream adds + functionality over . All Tokenizers accept a + CharStream instead of as input, which enables + arbitrary character based filtering before tokenization. + The method fixed offsets to account for + removal or insertion of characters, so that the offsets + reported in the tokens match the character offsets of the + original Reader. + + + + Called by CharFilter(s) and Tokenizer to correct token offset. + + + offset as seen in the output + + corrected offset based on the input + + + + Subclass may want to override to correct the current offset. + current offset + corrected offset + + + Chains the corrected offset through the input + CharFilter. + + + + This class can be used if the token attributes of a TokenStream + are intended to be consumed more than once. It caches + all token attribute states locally in a List. + +

CachingTokenFilter implements the optional method + , which repositions the + stream to the first Token. +

+
+ + A simple class that stores Strings as char[]'s in a + hash table. Note that this is not a general purpose + class. For example, it cannot remove items from the + set, nor does it resize its hash table to be smaller, + etc. It is designed to be quick to test if a char[] + is in the set without the necessity of converting it + to a String first. +

+ Please note: This class implements but + does not behave like it should in all cases. The generic type is + , because you can add any object to it, + that has a string representation. The add methods will use + and store the result using a + buffer. The same behaviour have the methods. + The method returns an IEnumerable. + For type safety also {@link #stringIterator()} is provided. +

+
+ + Create set with enough capacity to hold startSize + terms + + + + Create set from a Collection of char[] or String + + + Create set from entries + + + true if the len chars of text starting at off + are in the set + + + + Returns true if the String is in the set + + + Add this char[] directly to the set. + If ignoreCase is true for this Set, the text array will be directly modified. + The user should never modify this text array after calling this method. + + + + + Returns an unmodifiable . This allows to provide + unmodifiable views of internal sets for "read-only" use + + A Set for which the unmodifiable set it returns. + A new unmodifiable + ArgumentNullException of the given set is null + + + + returns a copy of the given set as a . If the given set + is a the ignoreCase property will be preserved. + + A set to copy + a copy of the given set as a . If the given set + is a the ignoreCase property will be preserved. + + + Adds all of the elements in the specified collection to this collection + + + Wrapper that calls UnionWith + + + + The IEnumerator<String> for this set. Strings are constructed on the fly, + so use nextCharArray for more efficient access + + + + do not modify the returned char[] + + + CharReader is a Reader wrapper. It reads chars from + Reader and outputs , defining an + identify function method that + simply returns the provided offset. + + + + An abstract base class for simple, character-oriented tokenizers. + + + A Tokenizer is a TokenStream whose input is a Reader. +

+ This is an abstract class; subclasses must override +

+ NOTE: Subclasses overriding must call + before setting attributes. +

+
+ + The text source for this Tokenizer. + + + Construct a tokenizer with null input. + + + Construct a token stream processing the given input. + + + Construct a tokenizer with null input using the given AttributeFactory. + + + Construct a token stream processing the given input using the given AttributeFactory. + + + Construct a token stream processing the given input using the given AttributeSource. + + + Construct a token stream processing the given input using the given AttributeSource. + + + Return the corrected offset. If is a subclass + this method calls , else returns currentOff. + + offset as seen in the output + + corrected offset based on the input + + + + + + Expert: Reset the tokenizer to a new reader. Typically, an + analyzer (in its reusableTokenStream method) will use + this to re-use a previously created tokenizer. + + + + Returns true iff a character should be included in a token. This + tokenizer generates as tokens adjacent sequences of characters which + satisfy this predicate. Characters for which this is false are used to + define token boundaries and are not included in tokens. + + + + Called on each token character to normalize it before it is added to the + token. The default implementation does nothing. Subclasses may use this + to, e.g., lowercase tokens. + + + + A filter that replaces accented characters in the ISO Latin 1 character set + (ISO-8859-1) by their unaccented equivalent. The case will not be altered. +

+ For instance, 'À' will be replaced by 'a'. +

+ +

+ If you build a new index, use + which covers a superset of Latin 1. + This class is included for use with existing indexes and will be removed + in a future release (possible Lucene 4.0) + +
+ + To replace accented characters in a String by unaccented equivalents. + + + "Tokenizes" the entire stream as a single token. This is useful + for data like zip codes, ids, and some product names. + + + + Emits the entire input as a single token. + + + Removes words that are too long or too short from the stream. + + + Build a filter that removes words that are too long or too + short from the text. + + + + Returns the next input Token whose term() is the right len + + + A LetterTokenizer is a tokenizer that divides text at non-letters. That's + to say, it defines tokens as maximal strings of adjacent letters, as defined + by java.lang.Character.isLetter() predicate. + Note: this does a decent job for most European languages, but does a terrible + job for some Asian languages, where words are not separated by spaces. + + + + Construct a new LetterTokenizer. + + + Construct a new LetterTokenizer using a given . + + + Construct a new LetterTokenizer using a given . + + + Collects only characters which satisfy + . + + + + Normalizes token text to lower case. + + + LowerCaseTokenizer performs the function of LetterTokenizer + and LowerCaseFilter together. It divides text at non-letters and converts + them to lower case. While it is functionally equivalent to the combination + of LetterTokenizer and LowerCaseFilter, there is a performance advantage + to doing the two tasks at once, hence this (redundant) implementation. +

+ Note: this does a decent job for most European languages, but does a terrible + job for some Asian languages, where words are not separated by spaces. +

+
+ + Construct a new LowerCaseTokenizer. + + + Construct a new LowerCaseTokenizer using a given . + + + Construct a new LowerCaseTokenizer using a given . + + + Converts char to lower case + . + + + + Simplistic that applies the mappings + contained in a to the character + stream, and correcting the resulting changes to the + offsets. + + + + Default constructor that takes a . + + + Easy-use constructor that takes a . + + + Holds a map of String input to String output, to be used + with . + + + + Records a replacement to be applied to the inputs + stream. Whenever singleMatch occurs in + the input, it will be replaced with + replacement. + + + input String to be replaced + + output String + + + + Expert: This class provides a + for indexing numeric values that can be used by + or . + +

Note that for simple usage, is + recommended. disables norms and + term freqs, as they are not usually needed during + searching. If you need to change these settings, you + should use this class. + +

See for capabilities of fields + indexed numerically.

+ +

Here's an example usage, for an int field: + + + Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value)); + field.setOmitNorms(true); + field.setOmitTermFreqAndPositions(true); + document.add(field); + + +

For optimal performance, re-use the TokenStream and Field instance + for more than one document: + + + NumericTokenStream stream = new NumericTokenStream(precisionStep); + Field field = new Field(name, stream); + field.setOmitNorms(true); + field.setOmitTermFreqAndPositions(true); + Document document = new Document(); + document.add(field); + + for(all documents) { + stream.setIntValue(value) + writer.addDocument(document); + } + + +

This stream is not intended to be used in analyzers; + it's more for iterating the different precisions during + indexing a specific numeric value.

+ +

NOTE: as token streams are only consumed once + the document is added to the index, if you index more + than one numeric field, use a separate NumericTokenStream + instance for each.

+ +

See for more details on the + precisionStep + parameter as well as how numeric fields work under the hood.

+ +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + Since 2.9 +

+
+ + The full precision token gets this token type assigned. + + + The lower precision tokens gets this token type assigned. + + + Creates a token stream for numeric values using the default precisionStep + (4). The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Creates a token stream for numeric values with the specified + precisionStep. The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Expert: Creates a token stream for numeric values with the specified + precisionStep using the given . + The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Expert: Creates a token stream for numeric values with the specified + precisionStep using the given + . + The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Initializes the token stream with the supplied long value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value)) + + + + Initializes the token stream with the supplied int value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value)) + + + + Initializes the token stream with the supplied double value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value)) + + + + Initializes the token stream with the supplied float value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value)) + + + + This analyzer is used to facilitate scenarios where different + fields require different analysis techniques. Use + to add a non-default analyzer on a field name basis. + +

Example usage: + + + PerFieldAnalyzerWrapper aWrapper = + new PerFieldAnalyzerWrapper(new StandardAnalyzer()); + aWrapper.addAnalyzer("firstname", new KeywordAnalyzer()); + aWrapper.addAnalyzer("lastname", new KeywordAnalyzer()); + + +

In this example, StandardAnalyzer will be used for all fields except "firstname" + and "lastname", for which KeywordAnalyzer will be used. + +

A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing + and query parsing. +

+
+ + Constructs with default analyzer. + + + Any fields not specifically + defined to use a different analyzer will use the one provided here. + + + + Constructs with default analyzer and a map of analyzers to use for + specific fields. + + + Any fields not specifically + defined to use a different analyzer will use the one provided here. + + a Map (String field name to the Analyzer) to be + used for those fields + + + + Defines an analyzer to use for the specified field. + + + field name requiring a non-default analyzer + + non-default analyzer to use for field + + + + Return the positionIncrementGap from the analyzer assigned to fieldName + + + Return the offsetGap from the analyzer assigned to field + + + Transforms the token stream as per the Porter stemming algorithm. + Note: the input to the stemming filter must already be in lower case, + so you will need to use LowerCaseFilter or LowerCaseTokenizer farther + down the Tokenizer chain in order for this to work properly! +

+ To use this filter with other analyzers, you'll want to write an + Analyzer class that sets up the TokenStream chain as you want it. + To use this with LowerCaseTokenizer, for example, you'd write an + analyzer like this: +

+ + class MyAnalyzer extends Analyzer { + public final TokenStream tokenStream(String fieldName, Reader reader) { + return new PorterStemFilter(new LowerCaseTokenizer(reader)); + } + } + +

+
+ + + Stemmer, implementing the Porter Stemming Algorithm + + The Stemmer class transforms a word into its root form. The input + word can be provided a character at time (by calling add()), or at once + by calling one of the various stem(something) methods. + + + + reset() resets the stemmer so it can stem another word. If you invoke + the stemmer by calling add(char) and then stem(), you must call reset() + before starting another word. + + + + Add a character to the word being stemmed. When you are finished + adding characters, you can call stem(void) to process the word. + + + + After a word has been stemmed, it can be retrieved by toString(), + or a reference to the internal buffer can be retrieved by getResultBuffer + and getResultLength (which is generally more efficient.) + + + + Stem a word provided as a String. Returns the result as a String. + + + Stem a word contained in a char[]. Returns true if the stemming process + resulted in a word different from the input. You can retrieve the + result with getResultLength()/getResultBuffer() or toString(). + + + + Stem a word contained in a portion of a char[] array. Returns + true if the stemming process resulted in a word different from + the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Stem a word contained in a leading portion of a char[] array. + Returns true if the stemming process resulted in a word different + from the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Stem the word placed into the Stemmer buffer through calls to add(). + Returns true if the stemming process resulted in a word different + from the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Test program for demonstrating the Stemmer. It reads a file and + stems each word, writing the result to standard out. + Usage: Stemmer file-name + + + + Returns the length of the word resulting from the stemming process. + + + Returns a reference to a character buffer containing the results of + the stemming process. You also need to consult getResultLength() + to determine the length of the result. + + + + An that filters + with + + + + Filters with , + and , using a list of English stop + words. + + +

+ You must specify the required compatibility when creating + StandardAnalyzer: + + As of 2.9, StopFilter preserves position increments + As of 2.4, Tokens incorrectly identified as acronyms are corrected (see + LUCENE-1608) + +

+
+ + Default maximum allowed token length + + + Specifies whether deprecated acronyms should be replaced with HOST type. + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + + Specifies whether deprecated acronyms should be replaced with HOST type. + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + + An unmodifiable set containing some common English words that are usually not + useful for searching. + + + + Builds an analyzer with the default stop words (). + + Lucene version to match see above + + + Builds an analyzer with the given stop words. + Lucene version to match See above /> + + + stop words + + + + Builds an analyzer with the stop words from the given file. + + + Lucene version to match See above /> + + + File to read stop words from + + + + Builds an analyzer with the stop words from the given reader. + + + Lucene version to match See above /> + + + Reader to read stop words from + + + + Constructs a filtered by a + , a and a . + + + + Set maximum allowed token length. If a token is seen + that exceeds this length then it is discarded. This + setting only takes effect the next time tokenStream or + reusableTokenStream is called. + + + + Normalizes tokens extracted with . + + + Construct filtering in. + + + Returns the next token in the stream, or null at EOS. +

Removes 's from the end of words. +

Removes dots from acronyms. +

+
+ + A grammar-based tokenizer constructed with JFlex + +

This should be a good tokenizer for most European-language documents: + + + Splits words at punctuation characters, removing punctuation. However, a + dot that's not followed by whitespace is considered part of a token. + Splits words at hyphens, unless there's a number in the token, in which case + the whole token is interpreted as a product number and is not split. + Recognizes email addresses and internet hostnames as one token. + + +

Many applications have specific tokenizer needs. If this tokenizer does + not suit your application, please consider copying this source code + directory to your project and maintaining your own grammar-based tokenizer. + + +

+ You must specify the required compatibility when creating + StandardAnalyzer: + + As of 2.4, Tokens incorrectly identified as acronyms are corrected (see + LUCENE-1608 + +

+
+ + this solves a bug where HOSTs that end with '.' are identified + as ACRONYMs. + + + + A private instance of the JFlex-constructed scanner + + + String token types that correspond to token type int constants + + + Creates a new instance of the + . Attaches + the input to the newly created JFlex scanner. + + + + The input reader + + See http://issues.apache.org/jira/browse/LUCENE-1068 + + + + Creates a new StandardTokenizer with a given . + + + Creates a new StandardTokenizer with a given + + + + + + (non-Javadoc) + + + + + + Remove in 3.X and make true the only valid value + See https://issues.apache.org/jira/browse/LUCENE-1068 + + Set to true to replace mischaracterized acronyms as HOST. + + + + Set the max allowed token length. Any token longer + than this is skipped. + + + + This class is a scanner generated by + JFlex 1.4.1 + on 9/4/08 6:49 PM from the specification file + /tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex + + + + This character denotes the end of file + + + initial size of the lookahead buffer + + + lexical states + + + Translates characters to character classes + + + Translates characters to character classes + + + Translates DFA states to action switch labels. + + + Translates a state to a row index in the transition table + + + The transition table of the DFA + + + ZZ_ATTRIBUTE[aState] contains the attributes of state aState + + + the input device + + + the current state of the DFA + + + the current lexical state + + + this buffer contains the current text to be matched and is + the source of the yytext() string + + + + the textposition at the last accepting state + + + the textposition at the last state to be included in yytext + + + the current text position in the buffer + + + startRead marks the beginning of the yytext() string in the buffer + + + endRead marks the last character in the buffer, that has been read + from input + + + + number of newlines encountered up to the start of the matched text + + + the number of characters up to the start of the matched text + + + the number of characters from the last newline up to the start of the + matched text + + + + zzAtBOL == true <=> the scanner is currently at the beginning of a line + + + zzAtEOF == true <=> the scanner is at the EOF + + + this solves a bug where HOSTs that end with '.' are identified + as ACRONYMs. + + + + Fills Lucene token with the current token text. + + + Fills TermAttribute with the current token text. + + + Creates a new scanner + There is also a java.io.InputStream version of this constructor. + + + the java.io.Reader to read input from. + + + + Creates a new scanner. + There is also java.io.Reader version of this constructor. + + + the java.io.Inputstream to read input from. + + + + Unpacks the compressed character translation table. + + + the packed character translation table + + the unpacked character translation table + + + + Refills the input buffer. + + false, iff there was new input. + + + if any I/O-Error occurs + + + + Closes the input stream. + + + Resets the scanner to read from a new input stream. + Does not close the old reader. + + All internal variables are reset, the old input stream + cannot be reused (internal buffer is discarded and lost). + Lexical state is set to ZZ_INITIAL. + + + the new input stream + + + + Returns the current lexical state. + + + Enters a new lexical state + + + the new lexical state + + + + Returns the text matched by the current regular expression. + + + Returns the character at position pos from the + matched text. + + It is equivalent to yytext().charAt(pos), but faster + + + the position of the character to fetch. + A value from 0 to yylength()-1. + + + the character at position pos + + + + Returns the length of the matched text region. + + + Reports an error that occured while scanning. + + In a wellformed scanner (no or only correct usage of + yypushback(int) and a match-all fallback rule) this method + will only be called with things that "Can't Possibly Happen". + If this method is called, something is seriously wrong + (e.g. a JFlex bug producing a faulty scanner etc.). + + Usual syntax/scanner level error handling should be done + in error fallback rules. + + + the code of the errormessage to display + + + + Pushes the specified amount of characters back into the input stream. + + They will be read again by then next call of the scanning method + + + the number of characters to be read again. + This number must not be greater than yylength()! + + + + Resumes scanning until the next regular expression is matched, + the end of input is encountered or an I/O-Error occurs. + + + the next token + + if any I/O-Error occurs + + + + Filters with and + . + + +

+ You must specify the required compatibility when creating + StopAnalyzer: + + As of 2.9, position increments are preserved + +

+
+ + An unmodifiable set containing some common English words that are not usually useful + for searching. + + + + Builds an analyzer which removes words in ENGLISH_STOP_WORDS. + + + Builds an analyzer with the stop words from the given set. + + + Builds an analyzer with the stop words from the given file. + + + + + See above + + File to load stop words from + + + + Builds an analyzer with the stop words from the given reader. + + + See above + + Reader to load stop words from + + + + Filters LowerCaseTokenizer with StopFilter. + + + Filters LowerCaseTokenizer with StopFilter. + + + Removes stop words from a token stream. + + + Construct a token stream filtering the given input. + If stopWords is an instance of (true if + makeStopSet() was used to construct the set) it will be directly used + and ignoreCase will be ignored since CharArraySet + directly controls case sensitivity. +

+ If stopWords is not an instance of , + a new CharArraySet will be constructed and ignoreCase will be + used to specify the case sensitivity of that set. +

+ true if token positions should record the removed stop words + Input TokenStream + A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords + if true, all words are lower cased first +
+ + Constructs a filter which removes words from the input + TokenStream that are named in the Set. + + true if token positions should record the removed stop words + Input stream + A Set of strings or char[] or any other ToString()-able set representing the stopwords + + + + Builds a Set from an array of stop words, + appropriate for passing into the StopFilter constructor. + This permits this stopWords construction to be cached once when + an Analyzer is constructed. + + + passing false to ignoreCase + + + Builds a Set from an array of stop words, + appropriate for passing into the StopFilter constructor. + This permits this stopWords construction to be cached once when + an Analyzer is constructed. + + A list of strings or char[] or any other ToString()-able list representing the stop words + passing false to ignoreCase + + + + An array of stopwords + If true, all words are lower cased first. + a Set containing the words + + + + A List of Strings or char[] or any other toString()-able list representing the stopwords + if true, all words are lower cased first + A Set ()containing the words + + + Returns the next input Token whose term() is not a stop word. + + + Returns version-dependent default for enablePositionIncrements. Analyzers + that embed StopFilter use this method when creating the StopFilter. Prior + to 2.9, this returns false. On 2.9 or later, it returns true. + + + + If true, this StopFilter will preserve + positions of the incoming tokens (ie, accumulate and + set position increments of the removed stop tokens). + Generally, true is best as it does not + lose information (positions of the original tokens) + during indexing. + +

When set, when a token is stopped + (omitted), the position increment of the following + token is incremented. + +

NOTE: be sure to also + set if + you use QueryParser to create queries. +

+
+ + This TokenFilter provides the ability to set aside attribute states + that have already been analyzed. This is useful in situations where multiple fields share + many common analysis steps and then go their separate ways. +

+ It is also useful for doing things like entity extraction or proper noun analysis as + part of the analysis workflow and saving off those tokens for use in another field. + + + TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1)); + TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream(); + TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream(); + TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2)); + source2.addSinkTokenStream(sink1); + source2.addSinkTokenStream(sink2); + TokenStream final1 = new LowerCaseFilter(source1); + TokenStream final2 = source2; + TokenStream final3 = new EntityDetect(sink1); + TokenStream final4 = new URLDetect(sink2); + d.add(new Field("f1", final1)); + d.add(new Field("f2", final2)); + d.add(new Field("f3", final3)); + d.add(new Field("f4", final4)); + + In this example, sink1 and sink2 will both get tokens from both + reader1 and reader2 after whitespace tokenizer + and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. + It is important, that tees are consumed before sinks (in the above example, the field names must be + less the sink's field names). If you are not sure, which stream is consumed first, you can simply + add another sink and then pass all tokens to the sinks at once using . + This TokenFilter is exhausted after this. In the above example, change + the example above to: + + ... + TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream()); + TokenStream final2 = source2.newSinkTokenStream(); + sink1.consumeAllTokens(); + sink2.consumeAllTokens(); + ... + + In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready. +

Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. +

+
+ + Instantiates a new TeeSinkTokenFilter. + + + Returns a new that receives all tokens consumed by this stream. + + + Returns a new that receives all tokens consumed by this stream + that pass the supplied filter. + + + + + + Adds a created by another TeeSinkTokenFilter + to this one. The supplied stream will also receive all consumed tokens. + This method can be used to pass tokens from two different tees to one sink. + + + + TeeSinkTokenFilter passes all tokens to the added sinks + when itself is consumed. To be sure, that all tokens from the input + stream are passed to the sinks, you can call this methods. + This instance is exhausted after this, but all sinks are instant available. + + + + A filter that decides which states to store in the sink. + + + Returns true, iff the current state of the passed-in shall be stored + in the sink. + + + + Called by . This method does nothing by default + and can optionally be overridden. + + + + A Token is an occurrence of a term from the text of a field. It consists of + a term's text, the start and end offset of the term in the text of the field, + and a type string. +

+ The start and end offsets permit applications to re-associate a token with + its source text, e.g., to display highlighted query terms in a document + browser, or to show matching text fragments in a KWIC display, etc. +

+ The type is a string, assigned by a lexical analyzer + (a.k.a. tokenizer), naming the lexical or syntactic class that the token + belongs to. For example an end of sentence marker token might be implemented + with type "eos". The default token type is "word". +

+ A Token can optionally have metadata (a.k.a. Payload) in the form of a variable + length byte array. Use and + to retrieve the payloads from the index. +

+

+
+

NOTE: As of 2.9, Token implements all interfaces + that are part of core Lucene and can be found in the namespace. + Even though it is not necessary to use Token anymore, with the new TokenStream API it can + be used as convenience class that implements all s, which is especially useful + to easily switch from the old to the new TokenStream API. +

+

Tokenizers and TokenFilters should try to re-use a Token instance when + possible for best performance, by implementing the + API. + Failing that, to create a new Token you should first use + one of the constructors that starts with null text. To load + the token from a char[] use . + To load from a String use or . + Alternatively you can get the Token's termBuffer by calling either , + if you know that your text is shorter than the capacity of the termBuffer + or , if there is any possibility + that you may need to grow the buffer. Fill in the characters of your term into this + buffer, with if loading from a string, + or with , and finally call to + set the length of the term text. See LUCENE-969 + for details.

+

Typical Token reuse patterns: + + Copying text from a string (type is reset to if not + specified):
+ + return reusableToken.reinit(string, startOffset, endOffset[, type]); + +
+ Copying some text from a string (type is reset to + if not specified):
+ + return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]); + +
+ Copying text from char[] buffer (type is reset to + if not specified):
+ + return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]); + +
+ Copying some text from a char[] buffer (type is reset to + if not specified):
+ + return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]); + +
+ Copying from one one Token to another (type is reset to + if not specified):
+ + return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]); + +
+
+ A few things to note: + + clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one. + Because TokenStreams can be chained, one cannot assume that the Token's current type is correct. + The startOffset and endOffset represent the start and offset in the + source text, so be careful in adjusting them. + When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again. + +

+

+ + +
+ + Base class for Attributes that can be added to a + . +

+ Attributes are used to add data in a dynamic, yet type-safe way to a source + of usually streamed objects, e. g. a . +

+
+ + Base interface for attributes. + + + Clears the values in this AttributeImpl and resets it to its + default value. If this implementation implements more than one Attribute interface + it clears all. + + + + The default implementation of this method accesses all declared + fields of this object and prints the values in the following syntax: + + + public String toString() { + return "start=" + startOffset + ",end=" + endOffset; + } + + + This method may be overridden by subclasses. + + + + Subclasses must implement this method and should compute + a hashCode similar to this: + + public int hashCode() { + int code = startOffset; + code = code * 31 + endOffset; + return code; + } + + + see also + + + + All values used for computation of + should be checked here for equality. + + see also + + + + Copies the values from this Attribute into the passed-in + target attribute. The target implementation must support all the + Attributes this implementation supports. + + + + Shallow clone. Subclasses must override this if they + need to clone any members deeply, + + + + The term text of a Token. + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use + to increase it. After + altering the buffer be sure to call + to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + , + , or + + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use first. + + the truncated length + + + + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use and + directly instead. If you really need a + String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + A Token's lexical type. The Default value is "word". + + + Gets or sets this Token's lexical type. Defaults to "word". + + + The positionIncrement determines the position of this token + relative to the previous Token in a TokenStream, used in phrase + searching. + +

The default value is one. + +

Some common uses for this are: + + Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token. + + Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words. + + + +

+ + +
+ + Gets or sets the position increment. The default value is one. + + + the distance from the prior term + + + This attribute can be used to pass different flags down the chain, + eg from one TokenFilter to another one. + + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + The flags can be used to encode information about the token for use by other s. + + +

+ The bits +
+ + The start and end character offset of a Token. + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. + + + + Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + The payload of a Token. See also . + + + Returns this Token's payload. + + + Constructs a Token will null text. + + + Constructs a Token with null text and start & end + offsets. + + start offset in the source text + end offset in the source text + + + Constructs a Token with null text and start & end + offsets plus the Token type. + + start offset in the source text + end offset in the source text + the lexical type of this Token + + + Constructs a Token with null text and start & end + offsets plus flags. NOTE: flags is EXPERIMENTAL. + + start offset in the source text + end offset in the source text + The bits to set for this token + + + Constructs a Token with the given term text, and start + & end offsets. The type defaults to "word." + NOTE: for better indexing speed you should + instead use the char[] termBuffer methods to set the + term text. + + term text + start offset + end offset + + + Constructs a Token with the given text, start and end + offsets, & type. NOTE: for better indexing + speed you should instead use the char[] termBuffer + methods to set the term text. + + term text + start offset + end offset + token type + + + Constructs a Token with the given text, start and end + offsets, & type. NOTE: for better indexing + speed you should instead use the char[] termBuffer + methods to set the term text. + + + + + token type bits + + + Constructs a Token with the given term buffer (offset + & length), start and end + offsets + + + + + + + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + the index in the buffer of the first character to copy + the number of characters to copy + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use + to increase it. After + altering the buffer be sure to call + to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + , + , or + + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Allocates a buffer char[] of at least newSize, without preserving the existing content. + its always used in places that set the content + + minimum size of the buffer + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use first. + + the truncated length + + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Resets the term text, payload, flags, and positionIncrement, + startOffset, endOffset and token type to default. + + + + Makes a clone, but replaces the term buffer & + start/end offset in the process. This is more + efficient than doing a full clone (and then calling + setTermBuffer) because it saves a wasted copy of the old + termBuffer. + + + + Shorthand for calling , + , + , + , + + + this Token instance + + + + Shorthand for calling , + , + , + + on Token.DEFAULT_TYPE + + this Token instance + + + + Shorthand for calling , + , + , + + + + this Token instance + + + + Shorthand for calling , + , + , + + + + this Token instance + + + + Shorthand for calling , + , + , + + on Token.DEFAULT_TYPE + + this Token instance + + + + Shorthand for calling , + , + , + + on Token.DEFAULT_TYPE + + this Token instance + + + + Copy the prototype token's fields into this one. Note: Payloads are shared. + + + + + Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + + + + + + + Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + + + + + + + + + + + + Convenience factory that returns Token as implementation for the basic + attributes and return the default impl (with "Impl" appended) for all other + attributes. + @since 3.0 + + + + Set the position increment. This determines the position of this token + relative to the previous Token in a , used in phrase + searching. + +

The default value is one. + +

Some common uses for this are: + + Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token. + + Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words. + + +

+ the distance from the prior term + + +
+ + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use and + directly instead. If you really need a + String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + Gets or sets this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to , as the term text may have been altered by a + stemmer or some other filter. + + + + Gets or sets this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + Returns this Token's lexical type. Defaults to "word". + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + The flags can be used to encode information about the token for use by other s. + + +

+ The bits +
+ + Returns this Token's payload. + + + + Expert: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes + and for all other attributes calls the given delegate factory. + + + + + Expert: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes + and for all other attributes calls the given delegate factory. + + + + This attribute can be used to pass different flags down the tokenizer chain, + eg from one TokenFilter to another one. + + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + The flags can be used to encode information about the token for use by other s. + + +

+ The bits +
+ + The start and end character offset of a Token. + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. + + + + Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + The payload of a Token. See also . + + + Initialize this attribute with no payload. + + + Initialize this attribute with the given payload. + + + Returns this Token's payload. + + + The positionIncrement determines the position of this token + relative to the previous Token in a , used in phrase + searching. + +

The default value is one. + +

Some common uses for this are: + + Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token. + + Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words. + + +

+
+ + Set the position increment. The default value is one. + + + the distance from the prior term + + + The term text of a Token. + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use + to increase it. After + altering the buffer be sure to call + to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + , + , or + + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Allocates a buffer char[] of at least newSize, without preserving the existing content. + its always used in places that set the content + + minimum size of the buffer + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use first. + + the truncated length + + + + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use and + directly instead. If you + really need a String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + A Token's lexical type. The Default value is "word". + + + Returns this Token's lexical type. Defaults to "word". + + + An Analyzer that uses . + + + A WhitespaceTokenizer is a tokenizer that divides text at whitespace. + Adjacent sequences of non-Whitespace characters form tokens. + + + + Construct a new WhitespaceTokenizer. + + + Construct a new WhitespaceTokenizer using a given . + + + Construct a new WhitespaceTokenizer using a given . + + + Collects only characters which do not satisfy + . + + + + Loader for text files that represent a list of stopwords. + + + Loads a text file and adds every line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the file should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + File containing the wordlist + A HashSet with the file's words + + + Loads a text file and adds every non-comment line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the file should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + File containing the wordlist + The comment string to ignore + A HashSet with the file's words + + + Reads lines from a Reader and adds every line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the Reader should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + Reader containing the wordlist + A HashSet with the reader's words + + + Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the Reader should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + + Reader containing the wordlist + + The string representing a comment. + + A HashSet with the reader's words + + + + Reads a stem dictionary. Each line contains: + word\tstem + (i.e. two tab seperated words) + + + stem dictionary that overrules the stemming algorithm + + IOException + + + + + + + + + Synonymous with . + +

WARNING: This interface may change within minor versions, despite Lucene's backward compatibility requirements. + This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards + compatibility promises remain intact. For example, Lucene can still + read and write indices created within the same major version. +

+ + +

+
+ + Return the raw byte[] for the binary field. Note that + you must also call and + to know which range of bytes in this + returned array belong to the field. + + reference to the Field value as byte[]. + + + Return the raw byte[] for the binary field. Note that + you must also call and + to know which range of bytes in this + returned array belong to the field.

+ About reuse: if you pass in the result byte[] and it is + used, likely the underlying implementation will hold + onto this byte[] and return it in future calls to + or . + So if you subsequently re-use the same byte[] elsewhere + it will alter this Fieldable's value. +

+ User defined buffer that will be used if + possible. If this is null or not large enough, a new + buffer is allocated + + reference to the Field value as byte[]. + +
+ + Gets or sets the boost factor for hits for this field. This value will be + multiplied into the score of all hits on this this field of this + document. + +

The boost is multiplied by of the document + containing this field. If a document has multiple fields with the same + name, all such values are multiplied together. This product is then + used to compute the norm factor for the field. By + default, in the + method, the boost value is multiplied + by the + and then rounded by before it is stored in the + index. One should attempt to ensure that this product does not overflow + the range of that encoding. + +

The default value is 1.0. + +

Note: this value is not stored directly with the document in the index. + Documents returned from and + may thus not have the same value present as when + this field was indexed. + +

+ + + + + + +
+ + Returns the name of the field as an interned string. + For example "date", "title", "body", ... + + + + The value of the field as a String, or null. +

+ For indexing, if isStored()==true, the stringValue() will be used as the stored field value + unless isBinary()==true, in which case GetBinaryValue() will be used. + + If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. + If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null, + else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens. +

+
+ + The value of the field as a Reader, which can be used at index time to generate indexed tokens. + + + + + The TokenStream for this field to be used when indexing, or null. + + + + + True if the value of the field is to be stored in the index for return + with search hits. + + + + True if the value of the field is to be indexed, so that it may be + searched on. + + + + True if the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. + + + + True if the term or terms used to index this field are stored as a term + vector, available from . + These methods do not provide access to the original content of the field, + only to terms used to index it. If the original content must be + preserved, use the stored attribute instead. + + + + + + + True if terms are stored as term vector together with their offsets + (start and end positon in source text). + + + + True if terms are stored as term vector together with their token positions. + + + True if the value of the field is stored as binary + + + + True if norms are omitted for this indexed field. + + Expert: + If set, omit normalization factors associated with this indexed field. + This effectively disables indexing boosts and length normalization for this field. + + + + + Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving + it's values via or is only valid as long as the that + retrieved the is still open. + + + true if this field can be loaded lazily + + + Returns offset into byte[] segment that is used as value, if Field is not binary + returned value is undefined + + index of the first character in byte[] segment that represents this Field value + + + Returns length of byte[] segment that is used as value, if Field is not binary + returned value is undefined + + length of byte[] segment that represents this Field value + + + Expert: + + If set, omit term freq, positions and payloads from + postings for this field. + + + NOTE: While this option reduces storage space + required in the index, it also means any query + requiring positional information, such as + or + + subclasses will silently fail to find results. + + + + Return the raw byte[] for the binary field. Note that + you must also call and + to know which range of bytes in this + returned array belong to the field. + + reference to the Field value as byte[]. + + + Prints a Field for human consumption. + + + Gets or sets the boost factor for hits for this field. + +

The default value is 1.0. + +

Note: this value is not stored directly with the document in the index. + Documents returned from and + may thus not have the same value present as when + this field was indexed. +

+
+ + Returns the name of the field as an interned string. + For example "date", "title", "body", ... + + + + True iff the value of the field is to be stored in the index for return + with search hits. It is an error for this to be true if a field is + Reader-valued. + + + + True iff the value of the field is to be indexed, so that it may be + searched on. + + + + True iff the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. + + + + True iff the term or terms used to index this field are stored as a term + vector, available from . + These methods do not provide access to the original content of the field, + only to terms used to index it. If the original content must be + preserved, use the stored attribute instead. + + + + + + + True iff terms are stored as term vector together with their offsets + (start and end position in source text). + + + + True iff terms are stored as term vector together with their token positions. + + + True iff the value of the filed is stored as binary + + + Returns length of byte[] segment that is used as value, if Field is not binary + returned value is undefined + + length of byte[] segment that represents this Field value + + + Returns offset into byte[] segment that is used as value, if Field is not binary + returned value is undefined + + index of the first character in byte[] segment that represents this Field value + + + True if norms are omitted for this indexed field + + + Expert: + + If set, omit term freq, positions and payloads from + postings for this field. + +

NOTE: While this option reduces storage space + required in the index, it also means any query + requiring positional information, such as + or subclasses will + silently fail to find results. +

+
+ + Simple utility class providing static methods to + compress and decompress binary data for stored fields. + This class uses java.util.zip.Deflater and Inflater + classes to compress and decompress. + + + + Compresses the specified byte range using the + specified compressionLevel (constants are defined in + java.util.zip.Deflater). + + + + Compresses the specified byte range, with default BEST_COMPRESSION level + + + Compresses all bytes in the array, with default BEST_COMPRESSION level + + + Compresses the String value, with default BEST_COMPRESSION level + + + Compresses the String value using the specified + compressionLevel (constants are defined in + java.util.zip.Deflater). + + + + Decompress the byte array previously returned by + compress + + + + Decompress the byte array previously returned by + compressString back into a String + + + + Provides support for converting dates to strings and vice-versa. + The strings are structured so that lexicographic sorting orders by date, + which makes them suitable for use as field values and search terms. + +

Note that this class saves dates with millisecond granularity, + which is bad for and , as those + queries are expanded to a BooleanQuery with a potentially large number + of terms when searching. Thus you might want to use + instead. + +

+ Note: dates before 1970 cannot be used, and therefore cannot be + indexed when using this class. See for an + alternative without such a limitation. + +

+ Another approach is , which provides + a sortable binary representation (prefix encoded) of numeric values, which + date/time are. + For indexing a , convert it to unix timestamp as + long and + index this as a numeric value with + and use to query it. + +

+ If you build a new index, use or + instead. + This class is included for use with existing + indices and will be removed in a future (possibly Lucene 4.0) + +
+ + Converts a Date to a string suitable for indexing. + RuntimeException if the date specified in the + method argument is before 1970 + + + + Converts a millisecond time to a string suitable for indexing. + RuntimeException if the time specified in the + method argument is negative, that is, before 1970 + + + + Converts a string-encoded date into a millisecond time. + + + Converts a string-encoded date into a Date object. + + + Provides support for converting dates to strings and vice-versa. + The strings are structured so that lexicographic sorting orders + them by date, which makes them suitable for use as field values + and search terms. + +

This class also helps you to limit the resolution of your dates. Do not + save dates with a finer resolution than you really need, as then + RangeQuery and PrefixQuery will require more memory and become slower. + +

Compared to the strings generated by the methods + in this class take slightly more space, unless your selected resolution + is set to Resolution.DAY or lower. + +

+ Another approach is , which provides + a sortable binary representation (prefix encoded) of numeric values, which + date/time are. + For indexing a , convert it to unix timestamp as + long and + index this as a numeric value with + and use to query it. +

+
+ + Converts a Date to a string suitable for indexing. + + + the date to be converted + + the desired resolution, see + + + a string in format yyyyMMddHHmmssSSS or shorter, + depending on resolution; using GMT as timezone + + + + Converts a millisecond time to a string suitable for indexing. + + + the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT + + the desired resolution, see + + + a string in format yyyyMMddHHmmssSSS or shorter, + depending on resolution; using GMT as timezone + + + + Converts a string produced by timeToString or + DateToString back to a time, represented as the + number of milliseconds since January 1, 1970, 00:00:00 GMT. + + + the date string to be converted + + the number of milliseconds since January 1, 1970, 00:00:00 GMT + + ParseException if dateString is not in the + expected format + + + + Converts a string produced by timeToString or + DateToString back to a time, represented as a + Date object. + + + the date string to be converted + + the parsed time as a Date object + + ParseException if dateString is not in the + expected format + + + + Limit a date's resolution. For example, the date 2004-09-21 13:50:11 + will be changed to 2004-09-01 00:00:00 when using + Resolution.MONTH. + + + + The desired resolution of the date to be returned + + the date with all values more precise than resolution + set to 0 or 1 + + + + Limit a date's resolution. For example, the date 1095767411000 + (which represents 2004-09-21 13:50:11) will be changed to + 1093989600000 (2004-09-01 00:00:00) when using + Resolution.MONTH. + + + The time in milliseconds (not ticks). + The desired resolution of the date to be returned + + the date with all values more precise than resolution + set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT + + + + Specifies the time granularity. + + + Documents are the unit of indexing and search. + + A Document is a set of fields. Each field has a name and a textual value. + A field may be stored with the document, in which + case it is returned with search hits on the document. Thus each document + should typically contain one or more stored fields which uniquely identify + it. + +

Note that fields which are not stored are + not available in documents retrieved from the index, e.g. with , + or . +

+
+ + Constructs a new document with no fields. + + +

Adds a field to a document. Several fields may be added with + the same name. In this case, if the fields are indexed, their text is + treated as though appended for the purposes of search.

+

Note that add like the removeField(s) methods only makes sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ +

Removes field with the specified name from the document. + If multiple fields exist with this name, this method removes the first field that has been added. + If there is no field with the specified name, the document remains unchanged.

+

Note that the removeField(s) methods like the add method only make sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ +

Removes all fields with the given name from the document. + If there is no field with the specified name, the document remains unchanged.

+

Note that the removeField(s) methods like the add method only make sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ + Returns a field with the given name if any exist in this document, or + null. If multiple fields exists with this name, this method returns the + first value added. + Do not use this method with lazy loaded fields. + + + + Returns a field with the given name if any exist in this document, or + null. If multiple fields exists with this name, this method returns the + first value added. + + + + Returns the string value of the field with the given name if any exist in + this document, or null. If multiple fields exist with this name, this + method returns the first value added. If only binary fields with this name + exist, returns null. + + + + Returns a List of all the fields in a document. +

Note that fields which are not stored are + not available in documents retrieved from the + index, e.g. or . +

+
+ + Returns an array of s with the given name. + Do not use with lazy loaded fields. + This method returns an empty array when there are no + matching fields. It never returns null. + + + the name of the field + + a Field[] array + + + + Returns an array of s with the given name. + This method returns an empty array when there are no + matching fields. It never returns null. + + + the name of the field + + a Fieldable[] array + + + + Returns an array of values of the field specified as the method parameter. + This method returns an empty array when there are no + matching fields. It never returns null. + + the name of the field + + a String[] of field values + + + + Returns an array of byte arrays for of the fields that have the name specified + as the method parameter. This method returns an empty + array when there are no matching fields. It never + returns null. + + + the name of the field + + a byte[][] of binary field values + + + + Returns an array of bytes for the first (or only) field that has the name + specified as the method parameter. This method will return null + if no binary fields with the specified name are available. + There may be non-binary fields with the same name. + + + the name of the field. + + a byte[] containing the binary field value or null + + + + Prints the fields of a document for human consumption. + + + Gets or sets, at indexing time, the boost factor. + + The default is 1.0 + +

Note that once a document is indexed this value is no longer available + from the index. At search time, for retrieved documents, this method always + returns 1. This however does not mean that the boost value set at indexing + time was ignored - it was just combined with other indexing time factors and + stored elsewhere, for better indexing and search performance. (For more + information see the "norm(t,d)" part of the scoring formula in + Similarity.) +

+
+ + A field is a section of a Document. Each field has two parts, a name and a + value. Values may be free text, provided as a String or as a Reader, or they + may be atomic keywords, which are not further processed. Such keywords may + be used to represent dates, urls, etc. Fields are optionally stored in the + index, so that they may be returned with hits on the document. + + + +

Expert: change the value of this field. This can + be used during indexing to re-use a single Field + instance to improve indexing speed by avoiding GC cost + of new'ing and reclaiming Field instances. Typically + a single instance is re-used as + well. This helps most on small documents.

+ +

Each Field instance should only be used once + within a single instance. See ImproveIndexingSpeed + for details.

+

+
+ + Expert: change the value of this field. See setValue(String). + + + Expert: change the value of this field. See setValue(String). + + + Expert: change the value of this field. See setValue(String). + + + Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. + May be combined with stored values from stringValue() or GetBinaryValue() + + + + Create a field by specifying its name, value and how it will + be saved in the index. Term vectors will not be stored in the index. + + + The name of the field + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + NullPointerException if name or value is null + IllegalArgumentException if the field is neither stored nor indexed + + + Create a field by specifying its name, value and how it will + be saved in the index. + + + The name of the field + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + Whether term vector should be stored + + NullPointerException if name or value is null + IllegalArgumentException in any of the following situations: + + the field is neither stored nor indexed + the field is not indexed but termVector is TermVector.YES + + + + + Create a field by specifying its name, value and how it will + be saved in the index. + + + The name of the field + + Whether to .intern() name or not + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + Whether term vector should be stored + + NullPointerException if name or value is null + IllegalArgumentException in any of the following situations: + + the field is neither stored nor indexed + the field is not indexed but termVector is TermVector.YES + + + + + Create a tokenized and indexed field that is not stored. Term vectors will + not be stored. The Reader is read only when the Document is added to the index, + i.e. you may not close the Reader until + has been called. + + + The name of the field + + The reader with the content + + NullPointerException if name or reader is null + + + Create a tokenized and indexed field that is not stored, optionally with + storing term vectors. The Reader is read only when the Document is added to the index, + i.e. you may not close the Reader until + has been called. + + + The name of the field + + The reader with the content + + Whether term vector should be stored + + NullPointerException if name or reader is null + + + Create a tokenized and indexed field that is not stored. Term vectors will + not be stored. This is useful for pre-analyzed fields. + The TokenStream is read only when the Document is added to the index, + i.e. you may not close the TokenStream until + has been called. + + + The name of the field + + The TokenStream with the content + + NullPointerException if name or tokenStream is null + + + Create a tokenized and indexed field that is not stored, optionally with + storing term vectors. This is useful for pre-analyzed fields. + The TokenStream is read only when the Document is added to the index, + i.e. you may not close the TokenStream until + has been called. + + + The name of the field + + The TokenStream with the content + + Whether term vector should be stored + + NullPointerException if name or tokenStream is null + + + Create a stored field with binary value. Optionally the value may be compressed. + + + The name of the field + + The binary value + + How value should be stored (compressed or not) + + IllegalArgumentException if store is Store.NO + + + Create a stored field with binary value. Optionally the value may be compressed. + + + The name of the field + + The binary value + + Starting offset in value where this Field's bytes are + + Number of bytes to use for this Field, starting at offset + + How value should be stored (compressed or not) + + IllegalArgumentException if store is Store.NO + + + The value of the field as a String, or null. If null, the Reader value or + binary value is used. Exactly one of stringValue(), + readerValue(), and getBinaryValue() must be set. + + + + The value of the field as a Reader, or null. If null, the String value or + binary value is used. Exactly one of stringValue(), + readerValue(), and getBinaryValue() must be set. + + + + The TokesStream for this field to be used when indexing, or null. If null, the Reader value + or String value is analyzed to produce the indexed tokens. + + + + Specifies whether and how a field should be stored. + + + Store the original field value in the index. This is useful for short texts + like a document's title which should be displayed with the results. The + value is stored in its original form, i.e. no analyzer is used before it is + stored. + + + + Do not store the field value in the index. + + + Specifies whether and how a field should be indexed. + + + Do not index the field value. This field can thus not be searched, + but one can still access its contents provided it is + stored. + + + + Index the tokens produced by running the field's + value through an Analyzer. This is useful for + common text. + + + + Index the field's value without using an Analyzer, so it can be searched. + As no analyzer is used the value will be stored as a single term. This is + useful for unique Ids like product numbers. + + + + Expert: Index the field's value without an Analyzer, + and also disable the storing of norms. Note that you + can also separately enable/disable norms by setting + . No norms means that + index-time field and document boosting and field + length normalization are disabled. The benefit is + less memory usage as norms take up one byte of RAM + per indexed field for every document in the index, + during searching. Note that once you index a given + field with norms enabled, disabling norms will + have no effect. In other words, for this to have the + above described effect on a field, all instances of + that field must be indexed with NOT_ANALYZED_NO_NORMS + from the beginning. + + + + Expert: Index the tokens produced by running the + field's value through an Analyzer, and also + separately disable the storing of norms. See + for what norms are + and why you may want to disable them. + + + + Specifies whether and how a field should have term vectors. + + + Do not store term vectors. + + + Store the term vectors of each document. A term vector is a list + of the document's terms and their number of occurrences in that document. + + + + Store the term vector + token position information + + + + + + + Store the term vector + Token offset information + + + + + + + Store the term vector + Token position and offset information + + + + + + + + + + + + Get the best representation of a TermVector given the flags. + + + + Similar to a + java.io.FileFilter, the FieldSelector allows one to make decisions about + what Fields get loaded on a by + + + + + the field to accept or reject + + an instance of + if the named fieldName should be loaded. + + + + Provides information about what should be done with this Field + + + + + + + + Load this every time the is loaded, reading in the data as it is encountered. + and should not return null. +

+ should be called by the Reader. +

+
+ + Lazily load this . This means the is valid, but it may not actually contain its data until + invoked. SHOULD NOT BE USED. is safe to use and should + return a valid instance of a . +

+ should be called by the Reader. +

+
+ + Do not load the . and should return null. + is not called. +

+ should not be called by the Reader. +

+
+ + Load this field as in the case, but immediately return from loading for the . Thus, the + Document may not have its complete set of Fields. and should + both be valid for this +

+ should be called by the Reader. +

+
+ + Expert: Load the size of this rather than its value. + Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value. + The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0] + + + + Expert: Like but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded + + + Load the First field and break. +

+ See +

+
+ + A based on a Map of field names to s + + + Create a a MapFieldSelector + maps from field names (String) to s + + + + Create a a MapFieldSelector + fields to LOAD. List of Strings. All other fields are NO_LOAD. + + + + Create a a MapFieldSelector + fields to LOAD. All other fields are NO_LOAD. + + + + Load field according to its associated value in fieldSelections + a field name + + the fieldSelections value that field maps to or NO_LOAD if none. + + + + Provides support for converting longs to Strings, and back again. The strings + are structured so that lexicographic sorting order is preserved. + +

+ That is, if l1 is less than l2 for any two longs l1 and l2, then + NumberTools.longToString(l1) is lexicographically less than + NumberTools.longToString(l2). (Similarly for "greater than" and "equals".) + +

+ This class handles all long values (unlike + ). + +

+ For new indexes use instead, which + provides a sortable binary representation (prefix encoded) of numeric + values. + To index and efficiently query numeric values use + and . + This class is included for use with existing + indices and will be removed in a future release (possibly Lucene 4.0). + +
+ + Equivalent to longToString(Long.MIN_VALUE) + + + Equivalent to longToString(Long.MAX_VALUE) + + + The length of (all) strings returned by + + + Converts a long to a String suitable for indexing. + + + Converts a String that was returned by back to a + long. + + + IllegalArgumentException + if the input is null + + NumberFormatException + if the input does not parse (it was not a String returned by + longToString()). + + + +

This class provides a that enables indexing + of numeric values for efficient range filtering and + sorting. Here's an example usage, adding an int value: + + document.add(new NumericField(name).setIntValue(value)); + + + For optimal performance, re-use the + NumericField and instance for more than + one document: + + + NumericField field = new NumericField(name); + Document document = new Document(); + document.add(field); + + for(all documents) { + ... + field.setIntValue(value) + writer.addDocument(document); + ... + } + + +

The .Net native types int, long, + float and double are + directly supported. However, any value that can be + converted into these native types can also be indexed. + For example, date/time values represented by a + can be translated into a long + value using the java.util.Date.getTime method. If you + don't need millisecond precision, you can quantize the + value, either by dividing the result of + java.util.Date.getTime or using the separate getters + (for year, month, etc.) to construct an int or + long value.

+ +

To perform range querying or filtering against a + NumericField, use or + . To sort according to a + NumericField, use the normal numeric sort types, eg + NumericField values + can also be loaded directly from .

+ +

By default, a NumericField's value is not stored but + is indexed for range filtering and sorting. You can use + the + constructor if you need to change these defaults.

+ +

You may add the same field name as a NumericField to + the same document more than once. Range querying and + filtering will be the logical OR of all values; so a range query + will hit all documents that have at least one value in + the range. However sort behavior is not defined. If you need to sort, + you should separately index a single-valued NumericField.

+ +

A NumericField will consume somewhat more disk space + in the index than an ordinary single-valued field. + However, for a typical index that includes substantial + textual content per document, this increase will likely + be in the noise.

+ +

Within Lucene, each numeric value is indexed as a + trie structure, where each term is logically + assigned to larger and larger pre-defined brackets (which + are simply lower-precision representations of the value). + The step size between each successive bracket is called the + precisionStep, measured in bits. Smaller + precisionStep values result in larger number + of brackets, which consumes more disk space in the index + but may result in faster range search performance. The + default value, 4, was selected for a reasonable tradeoff + of disk space consumption versus performance. You can + use the expert constructor + if you'd + like to change the value. Note that you must also + specify a congruent value when creating + or . + For low cardinality fields larger precision steps are good. + If the cardinality is < 100, it is fair + to use , which produces one + term per value. + +

For more information on the internals of numeric trie + indexing, including the precisionStep + configuration, see . The format of + indexed values is described in . + +

If you only need to sort by numeric value, and never + run range querying/filtering, you can index using a + precisionStep of . + This will minimize disk space consumed.

+ +

More advanced users can instead use + directly, when indexing numbers. This + class is a wrapper around this token stream type for + easier, more intuitive usage.

+ +

NOTE: This class is only used during + indexing. When retrieving the stored field value from a + instance after search, you will get a + conventional instance where the numeric + values are returned as s (according to + toString(value) of the used data type). + +

NOTE: This API is + experimental and might change in incompatible ways in the + next release. + +

+ 2.9 + +
+ + Creates a field for numeric values using the default precisionStep + (4). The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + This constructor creates an indexed, but not stored field. + + the field name + + + + Creates a field for numeric values using the default precisionStep + (4). The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + + the field name + + if the field should be stored in plain text form + (according to toString(value) of the used data type) + + if the field should be indexed using + + + + Creates a field for numeric values with the specified + precisionStep. The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + This constructor creates an indexed, but not stored field. + + the field name + + the used precision step + + + + Creates a field for numeric values with the specified + precisionStep. The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + + the field name + + the used precision step + + if the field should be stored in plain text form + (according to toString(value) of the used data type) + + if the field should be indexed using + + + + Returns always null for numeric fields + + + Initializes the field with the supplied long value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).SetLongValue(value)) + + + + Initializes the field with the supplied int value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setIntValue(value)) + + + + Initializes the field with the supplied double value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setDoubleValue(value)) + + + + Initializes the field with the supplied float value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setFloatValue(value)) + + + + Returns a for indexing the numeric value. + + + Returns always null for numeric fields + + + Returns the numeric value as a string (how it is stored, when is chosen). + + + Returns the current numeric value as a subclass of , null if not yet initialized. + + + Declare what fields to load normally and what fields to load lazily + + + + + + Pass in the Set of names to load and the Set of names to load lazily. If both are null, the + Document will not have any on it. + + A Set of field names to load. May be empty, but not null + + A Set of field names to load lazily. May be empty, but not null + + + + Indicate whether to load the field with the given name or not. If the is not in either of the + initializing Sets, then is returned. If a Field name + is in both fieldsToLoad and lazyFieldsToLoad, lazy has precedence. + + + The name to check + + The + + + + + Base class for enumerating all but deleted docs. + +

NOTE: this class is meant only to be used internally + by Lucene; it's only public so it can be shared across + packages. This means the API is freely subject to + change, and, the class could be removed entirely, in any + Lucene release. Use directly at your own risk! */ +

+
+ + TermDocs provides an interface for enumerating <document, frequency> + pairs for a term.

The document portion names each document containing + the term. Documents are indicated by number. The frequency portion gives + the number of times the term occurred in each document.

The pairs are + ordered by document number. +

+ +
+ + Sets this to the data for a term. + The enumeration is reset to the start of the data for this term. + + + + Sets this to the data for the current term in a . + This may be optimized in some implementations. + + + + Moves to the next pair in the enumeration.

Returns true iff there is + such a next pair in the enumeration. +

+
+ + Attempts to read multiple entries from the enumeration, up to length of + docs. Document numbers are stored in docs, and term + frequencies are stored in freqs. The freqs array must be as + long as the docs array. + +

Returns the number of entries read. Zero is only returned when the + stream has been exhausted. +

+
+ + Skips entries to the first beyond the current whose document number is + greater than or equal to target.

Returns true iff there is such + an entry.

Behaves as if written: + boolean skipTo(int target) { + do { + if (!next()) + return false; + } while (target > doc()); + return true; + } + + Some implementations are considerably more efficient than that. +

+
+ + Frees associated resources. + + + Returns the current document number.

This is invalid until + is called for the first time. +

+
+ + Returns the frequency of the term within the current document.

This + is invalid until is called for the first time. +

+
+ + Holds buffered deletes, by docID, term or query. We + hold two instances of this class: one for the deletes + prior to the last flush, the other for deletes after + the last flush. This is so if we need to abort + (discard all buffered docs) we can also discard the + buffered deletes yet keep the deletes done during + previously flushed segments. + + + + Abstract base class for input from a file in a . A + random-access input stream. Used for all Lucene index input operations. + + + + + + Reads and returns a single byte. + + + + + Reads a specified number of bytes into an array at the specified offset. + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + + + Reads a specified number of bytes into an array at the + specified offset with control over whether the read + should be buffered (callers who have their own buffer + should pass in "false" for useBuffer). Currently only + respects this parameter. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + set to false if the caller will handle + buffering. + + + + + + Reads four bytes and returns an int. + + + + + Reads an int stored in variable-length format. Reads between one and + five bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Reads eight bytes and returns a long. + + + + + Reads a long stored in variable-length format. Reads between one and + nine bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + Call this if readString should read characters stored + in the old modified UTF8 format (length in java chars + and java's modified UTF8 encoding). This is used for + indices written pre-2.4 See LUCENE-510 for details. + + + + Reads a string. + + + + + Reads Lucene's old "modified UTF-8" encoded + characters into an array. + + the array to read characters into + + the offset in the array to start storing characters + + the number of characters to read + + + + -- please use readString or readBytes + instead, and construct the string + from those utf8 bytes + + + + Expert + + Similar to but does not do any conversion operations on the bytes it is reading in. It still + has to invoke just as does, but it does not need a buffer to store anything + and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine + how many more bytes to read + + The number of chars to read + + this method operates on old "modified utf8" encoded + strings + + + + Closes the stream to futher operations. + + + Sets current position in this file, where the next read will occur. + + + + + The number of bytes in the file. + + + Returns a clone of this stream. + +

Clones of a stream access the same data, and are positioned at the same + point as the stream they were cloned from. + +

Expert: Subclasses must ensure that clones may be positioned at + different points in the input from each other and from the stream they + were cloned from. +

+
+ + Returns the current position in this file, where the next read will + occur. + + + + + + Class to write byte streams into slices of shared + byte[]. This is used by DocumentsWriter to hold the + posting list for many terms in RAM. + + + + Set up the writer to write at address. + + + Write byte into byte slice stream + + + Basic tool and API to check the health of an index and + write a new segments file that removes reference to + problematic segments. + +

As this tool checks every byte in the index, on a large + index it can take quite a long time to run. + +

WARNING: this tool and API is new and + experimental and is subject to suddenly change in the + next release. Please make a complete backup of your + index before using this to fix your index! +

+
+ + Create a new CheckIndex on the directory. + + + Set infoStream where messages should go. If null, no + messages are printed + + + + Returns a instance detailing + the state of the index. + +

As this method checks every byte in the index, on a large + index it can take quite a long time to run. + +

WARNING: make sure + you only call this when the index is not opened by any + writer. +

+
+ + Returns a instance detailing + the state of the index. + + + list of specific segment names to check + +

As this method checks every byte in the specified + segments, on a large index it can take quite a long + time to run. + +

WARNING: make sure + you only call this when the index is not opened by any + writer. + + + +

Test field norms. +
+ + Test the term index. + + + Test stored fields for a segment. + + + Test term vectors for a segment. + + + Repairs the index using previously returned result + from . Note that this does not + remove any of the unreferenced files after it's done; + you must separately open an , which + deletes unreferenced files when it's created. + +

WARNING: this writes a + new segments file into the index, effectively removing + all documents in broken segments from the index. + BE CAREFUL. + +

WARNING: Make sure you only call this when the + index is not opened by any writer. +

+
+ + Command-line interface to check and fix an index. +

+ Run it like this: + + java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] + + + -fix: actually write a new segments_N file, removing any problematic segments + -segment X: only check the specified + segment(s). This can be specified multiple times, + to check more than one segment, eg -segment _2 + -segment _a. You can't use this with the -fix + option. + +

WARNING: -fix should only be used on an emergency basis as it will cause + documents (perhaps many) to be permanently removed from the index. Always make + a backup copy of your index before running this! Do not run this tool on an index + that is actively being written to. You have been warned! +

Run without -fix, this tool will open the index, report version information + and report any exceptions it hits and what action it would take if -fix were + specified. With -fix, this tool will remove any segments that have issues and + write a new segments_N file. This means all documents contained in the affected + segments will be removed. +

+ This tool exits with exit code 1 if the index cannot be opened or has any + corruption, else 0. +

+
+ + Returned from detailing the health and status of the index. + +

WARNING: this API is new and experimental and is + subject to suddenly change in the next release. + +

+
+ + True if no problems were found with the index. + + + True if we were unable to locate and load the segments_N file. + + + True if we were unable to open the segments_N file. + + + True if we were unable to read the version number from segments_N file. + + + Name of latest segments_N file in the index. + + + Number of segments in the index. + + + String description of the version of the index. + + + Empty unless you passed specific segments list to check as optional 3rd argument. + + CheckIndex.CheckIndex_Renamed_Method(System.Collections.IList) + + + + True if the index was created with a newer version of Lucene than the CheckIndex tool. + + + List of instances, detailing status of each segment. + + + Directory index is in. + + + SegmentInfos instance containing only segments that + had no problems (this is used with the + method to repair the index. + + + + How many documents will be lost to bad segments. + + + How many bad segments were found. + + + True if we checked only specific segments () + was called with non-null + argument). + + + + Holds the userData of the last commit in the index + + + Holds the status of each segment in the index. + See . + +

WARNING: this API is new and experimental and is + subject to suddenly change in the next release. +

+
+ + Name of the segment. + + + Document count (does not take deletions into account). + + + True if segment is compound file format. + + + Number of files referenced by this segment. + + + Net size (MB) of the files referenced by this + segment. + + + + Doc store offset, if this segment shares the doc + store files (stored fields and term vectors) with + other segments. This is -1 if it does not share. + + + + String of the shared doc store segment, or null if + this segment does not share the doc store files. + + + + True if the shared doc store files are compound file + format. + + + + True if this segment has pending deletions. + + + Name of the current deletions file name. + + + Number of deleted documents. + + + True if we were able to open a SegmentReader on this + segment. + + + + Number of fields in this segment. + + + True if at least one of the fields in this segment + does not omitTermFreqAndPositions. + + + + + + Map<String, String> that includes certain + debugging details that IndexWriter records into + each segment it creates + + + + Status for testing of field norms (null if field norms could not be tested). + + + Status for testing of indexed terms (null if indexed terms could not be tested). + + + Status for testing of stored fields (null if stored fields could not be tested). + + + Status for testing of term vectors (null if term vectors could not be tested). + + + Status from testing field norms. + + + Number of fields successfully tested + + + Exception thrown during term index test (null on success) + + + Status from testing term index. + + + Total term count + + + Total frequency across all terms. + + + Total number of positions. + + + Exception thrown during term index test (null on success) + + + Status from testing stored fields. + + + Number of documents tested. + + + Total number of stored fields tested. + + + Exception thrown during stored fields test (null on success) + + + Status from testing stored fields. + + + Number of documents tested. + + + Total number of term vectors tested. + + + Exception thrown during term vector test (null on success) + + + Optimized implementation. + + + Overridden by SegmentTermPositions to skip in prox stream. + + + Optimized implementation. + + + Class for accessing a compound stream. + This class implements a directory, but is limited to only read operations. + Directory methods that would normally modify data throw an exception. + + + + A Directory is a flat list of files. Files may be written once, when they + are created. Once a file is created it may only be opened for read, or + deleted. Random access is permitted both when reading and writing. + +

Java's i/o APIs not used directly, but rather all i/o is + through this API. This permits things such as: + implementation of RAM-based indices; + implementation indices stored in a database, via JDBC; + implementation of an index as a single file; + + + Directory locking is implemented by an instance of + , and can be changed for each Directory + instance using . + +

+
+ + Holds the LockFactory instance (implements locking for + this Directory instance). + + + + Returns an array of strings, one for each file in the directory. + + + + Returns true iff a file with the given name exists. + + + Returns the time the named file was last modified. + + + Set the modified time of an existing file to now. + + + Removes an existing file in the directory. + + + Returns the length of a file in the directory. + + + Creates a new, empty file in the directory with the given name. + Returns a stream writing this file. + + + + Ensure that any writes to this file are moved to + stable storage. Lucene uses this to properly commit + changes to the index, to prevent a machine/OS crash + from corrupting the index. + + + + Returns a stream reading an existing file. + + + Returns a stream reading an existing file, with the + specified read buffer size. The particular Directory + implementation may ignore the buffer size. Currently + the only Directory implementations that respect this + parameter are and + . + + + + Construct a . + the name of the lock file + + + + Attempt to clear (forcefully unlock and remove) the + specified lock. Only call this at a time when you are + certain this lock is no longer in use. + + name of the lock to be cleared. + + + + Closes the store. + + + Set the LockFactory that this Directory instance should + use for its locking implementation. Each * instance of + LockFactory should only be used for one directory (ie, + do not share a single instance across multiple + Directories). + + + instance of . + + + + Return a string identifier that uniquely differentiates + this Directory instance from other Directory instances. + This ID should be the same if two Directory instances + (even in different JVMs and/or on different machines) + are considered "the same index". This is how locking + "scopes" to the right index. + + + + Copy contents of a directory src to a directory dest. + If a file in src already exists in dest then the + one in dest will be blindly overwritten. + +

NOTE: the source directory cannot change + while this method is running. Otherwise the results + are undefined and you could easily hit a + FileNotFoundException. + +

NOTE: this method only copies files that look + like index files (ie, have extensions matching the + known extensions of index files). + +

+ source directory + + destination directory + + if true, call method on source directory + + IOException +
+ + AlreadyClosedException if this Directory is closed + + + Get the LockFactory that this Directory instance is + using for its locking implementation. Note that this + may be null for Directory implementations that provide + their own locking implementation. + + + + Returns an array of strings, one for each file in the directory. + + + Returns true iff a file with the given name exists. + + + Returns the time the compound file was last modified. + + + Set the modified time of the compound file to now. + + + Not implemented + UnsupportedOperationException + + + Not implemented + UnsupportedOperationException + + + Returns the length of a file in the directory. + IOException if the file does not exist + + + Not implemented + UnsupportedOperationException + + + Not implemented + UnsupportedOperationException + + + Implementation of an IndexInput that reads from a portion of the + compound file. The visibility is left as "package" *only* because + this helps with testing since JUnit test cases in a different class + can then access package fields of this class. + + + + Base implementation class for buffered . + + + Default buffer size + + + Inits BufferedIndexInput with a specific bufferSize + + + Change the buffer size used by this IndexInput + + + Expert: implements buffer refill. Reads bytes from the current position + in the input. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + Expert: implements seek. Sets current position in this file, where the + next will occur. + + + + + + + + + + Expert: implements buffer refill. Reads bytes from the current + position in the input. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + Expert: implements seek. Sets current position in this file, where + the next will occur. + + + + + + Combines multiple files into a single compound file. + The file format:
+ + VInt fileCount + {Directory} + fileCount entries with the following structure: + + long dataOffset + String fileName + + {File Data} + fileCount entries with the raw data of the corresponding file + + + The fileCount integer indicates how many files are contained in this compound + file. The {directory} that follows has that many entries. Each directory entry + contains a long pointer to the start of this file's data section, and a String + with that file's name. +
+
+ + Create the compound stream in the specified file. The file name is the + entire name (no extensions are added). + + NullPointerException if dir or name is null + + + Add a source stream. file is the string by which the + sub-stream will be known in the compound stream. + + + IllegalStateException if this writer is closed + NullPointerException if file is null + IllegalArgumentException if a file with the same name + has been added already + + + + Merge files with the extensions added up to now. + All files with these extensions are combined sequentially into the + compound stream. After successful merge, the source files + are deleted. + + IllegalStateException if close() had been called before or + if no file has been added to this object + + + + Copy the contents of the file with specified extension into the + provided output stream. Use the provided buffer for moving data + to reduce memory allocation. + + + + Returns the directory of the compound file. + + + Returns the name of the compound file. + + + source file + + + temporary holder for the start of directory entry for this file + + + temporary holder for the start of this file's data section + + + A that runs each merge using a + separate thread, up until a maximum number of threads + () at which when a merge is + needed, the thread(s) that are updating the index will + pause until one or more merges completes. This is a + simple way to use concurrency in the indexing process + without having to create and manage application level + threads. + + + +

Expert: uses an instance + implementing this interface to execute the merges + selected by a . The default + MergeScheduler is .

+ +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+ +

NOTE: This class typically requires access to + package-private APIs (eg, SegmentInfos) to do its job; + if you implement your own MergePolicy, you'll need to put + it in package Lucene.Net.Index in order to use + these APIs. +

+
+ + Run the merges provided by . + + + Close this MergeScheduler. + + + Return the priority that merge threads run at. By + default the priority is 1 plus the priority of (ie, + slightly higher priority than) the first thread that + calls merge. + + + + Set the priority that merge threads run at. + + + Does the actual merge, by calling + + + Create and return a new MergeThread + + + Called when an exception is hit in a background merge + thread + + + + Used for testing + + + Used for testing + + + Used for testing + + + Used for testing + + + Used for testing + + + Gets or sets the max # simultaneous threads that may be + running. If a merge is necessary yet we already have + this many threads running, the incoming thread (that + is calling add/updateDocument) will block until + a merge thread has completed. + + + + + Support class used to handle threads + + + + + This interface should be implemented by any class whose instances are intended + to be executed by a thread. + + + + + This method has to be implemented in order that starting of the thread causes the object's + run method to be called in that separately executing thread. + + + + + The instance of System.Threading.Thread + + + + + Initializes a new instance of the ThreadClass class + + + + + Initializes a new instance of the Thread class. + + The name of the thread + + + + Initializes a new instance of the Thread class. + + A ThreadStart delegate that references the methods to be invoked when this thread begins executing + + + + Initializes a new instance of the Thread class. + + A ThreadStart delegate that references the methods to be invoked when this thread begins executing + The name of the thread + + + + This method has no functionality unless the method is overridden + + + + + Causes the operating system to change the state of the current thread instance to ThreadState.Running + + + + + Interrupts a thread that is in the WaitSleepJoin thread state + + + + + Blocks the calling thread until a thread terminates + + + + + Blocks the calling thread until a thread terminates or the specified time elapses + + Time of wait in milliseconds + + + + Blocks the calling thread until a thread terminates or the specified time elapses + + Time of wait in milliseconds + Time of wait in nanoseconds + + + + Resumes a thread that has been suspended + + + + + Raises a ThreadAbortException in the thread on which it is invoked, + to begin the process of terminating the thread. Calling this method + usually terminates the thread + + + + + Raises a ThreadAbortException in the thread on which it is invoked, + to begin the process of terminating the thread while also providing + exception information about the thread termination. + Calling this method usually terminates the thread. + + An object that contains application-specific information, such as state, which can be used by the thread being aborted + + + + Suspends the thread, if the thread is already suspended it has no effect + + + + + Obtain a String that represents the current object + + A String that represents the current object + + + + Gets the currently running thread + + The currently running thread + + + + Gets the current thread instance + + + + + Gets or sets the name of the thread + + + + + Gets or sets a value indicating the scheduling priority of a thread + + + + + Gets a value indicating the execution status of the current thread + + + + + Gets or sets a value indicating whether or not a thread is a background thread. + + + + This exception is thrown when Lucene detects + an inconsistency in the index. + + + + Implements the skip list reader for the default posting list format + that stores positions and payloads. + + + + + This abstract class reads skip lists with multiple levels. + + See for the information about the encoding + of the multi level skip lists. + + Subclasses must implement the abstract method + which defines the actual format of the skip data. + + + + Returns the id of the doc to which the last call of + has skipped. + + + + Skips entries to the first beyond the current whose document number is + greater than or equal to target. Returns the current doc count. + + + + Seeks the skip entry on the given level + + + initializes the reader + + + Loads the skip levels + + + Subclasses must implement the actual skip data encoding in this method. + + + the level skip data shall be read from + + the skip stream to read from + + + + Copies the values of the last read skip entry on this level + + + used to buffer the top skip levels + + + Returns the freq pointer of the doc to which the last call of + has skipped. + + + + Returns the prox pointer of the doc to which the last call of + has skipped. + + + + Returns the payload length of the payload stored just before + the doc to which the last call of + has skipped. + + + + Implements the skip list writer for the default posting list format + that stores positions and payloads. + + + + + This abstract class writes skip lists with multiple levels. + + Example for skipInterval = 3: + c (skip level 2) + c c c (skip level 1) + x x x x x x x x x x (skip level 0) + d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) + 3 6 9 12 15 18 21 24 27 30 (df) + + d - document + x - skip data + c - skip data with child pointer + + Skip level i contains every skipInterval-th entry from skip level i-1. + Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))). + + Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1. + This guarantess a logarithmic amount of skips to find the target document. + + While this class takes care of writing the different skip levels, + subclasses must define the actual format of the skip data. + + + + + Subclasses must implement the actual skip data encoding in this method. + + + the level skip data shall be writting for + + the skip buffer to write to + + + + Writes the current skip data to the buffers. The current document frequency determines + the max level is skip data is to be written to. + + + the current document frequency + + IOException + + + Writes the buffered skip lists to the given output. + + + the IndexOutput the skip lists shall be written to + + the pointer the skip list starts + + + + Sets the values for the current skip data. + + + An IndexReader which reads indexes with multiple segments. + + + IndexReader is an abstract class, providing an interface for accessing an + index. Search of an index is done entirely through this abstract interface, + so that any subclass which implements it is searchable. +

Concrete subclasses of IndexReader are usually constructed with a call to + one of the static open() methods, e.g. + . +

For efficiency, in this API documents are often referred to via + document numbers, non-negative integers which each name a unique + document in the index. These document numbers are ephemeral--they may change + as documents are added to and deleted from an index. Clients should thus not + rely on a given document having the same number between sessions. +

An IndexReader can be opened on a directory for which an IndexWriter is + opened already, but it cannot be used to delete documents from the index then. +

+ NOTE: for backwards API compatibility, several methods are not listed + as abstract, but have no useful implementations in this base class and + instead always throw UnsupportedOperationException. Subclasses are + strongly encouraged to override these methods, but in many cases may not + need to. +

+

+ NOTE: as of 2.4, it's possible to open a read-only + IndexReader using the static open methods that accepts the + boolean readOnly parameter. Such a reader has better + better concurrency as it's not necessary to synchronize on the + isDeleted method. You must explicitly specify false + if you want to make changes with the resulting IndexReader. +

+

NOTE: + instances are completely thread + safe, meaning multiple threads can call any of its methods, + concurrently. If your application requires external + synchronization, you should not synchronize on the + IndexReader instance; use your own + (non-Lucene) objects instead. +

+
+ + Expert: increments the refCount of this IndexReader + instance. RefCounts are used to determine when a + reader can be closed safely, i.e. as soon as there are + no more references. Be sure to always call a + corresponding , in a finally clause; + otherwise the reader may never be closed. Note that + simply calls decRef(), which means that + the IndexReader will not really be closed until + has been called for all outstanding + references. + + + + + + + Expert: decreases the refCount of this IndexReader + instance. If the refCount drops to 0, then pending + changes (if any) are committed to the index and this + reader is closed. + + + IOException in case an IOException occurs in commit() or doClose() + + + + + + + AlreadyClosedException if this IndexReader is closed + + + Returns an IndexReader reading the index in the given + Directory. You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the reader. + + the index directory + true if no changes (deletions, norms) will be made with this IndexReader + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns an IndexReader reading the index in the given + . You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the reader. + + the commit point to open + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, with a custom + . You should pass readOnly=true, + since it gives much better concurrent performance, + unless you intend to do write operations (delete + documents or change norms) with the reader. + + the index directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, with a custom + . You should pass readOnly=true, + since it gives much better concurrent performance, + unless you intend to do write operations (delete + documents or change norms) with the reader. + + the index directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + Subsamples which indexed + terms are loaded into RAM. This has the same effect as + IndexWriter.SetTermIndexInterval + except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, using a specific commit and with + a custom . You should pass + readOnly=true, since it gives much better concurrent + performance, unless you intend to do write operations + (delete documents or change norms) with the reader. + + the specific to open; + see to list all commits + in a directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, using a specific commit and with + a custom . You should pass + readOnly=true, since it gives much better concurrent + performance, unless you intend to do write operations + (delete documents or change norms) with the reader. + + the specific to open; + see to list all commits + in a directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + Subsambles which indexed + terms are loaded into RAM. This has the same effect as + IndexWriter.SetTermIndexInterval + except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Refreshes an IndexReader if the index has changed since this instance + was (re)opened. +

+ Opening an IndexReader is an expensive operation. This method can be used + to refresh an existing IndexReader to reduce these costs. This method + tries to only load segments that have changed or were created after the + IndexReader was (re)opened. +

+ If the index has not changed since this instance was (re)opened, then this + call is a NOOP and returns this instance. Otherwise, a new instance is + returned. The old instance is not closed and remains usable.
+

+ If the reader is reopened, even though they share + resources internally, it's safe to make changes + (deletions, norms) with the new reader. All shared + mutable state obeys "copy on write" semantics to ensure + the changes are not seen by other readers. +

+ You can determine whether a reader was actually reopened by comparing the + old instance with the instance returned by this method: + + IndexReader reader = ... + ... + IndexReader newReader = r.reopen(); + if (newReader != reader) { + ... // reader was reopened + reader.close(); + } + reader = newReader; + ... + + + Be sure to synchronize that code so that other threads, + if present, can never use reader after it has been + closed and before it's switched to newReader. + +

NOTE: If this reader is a near real-time + reader (obtained from , + reopen() will simply call writer.getReader() again for + you, though this may change in the future. + +

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error +
+ + Just like , except you can change the + readOnly of the original reader. If the index is + unchanged but readOnly is different then a new reader + will be returned. + + + + Expert: reopen this reader on a specific commit point. + This always returns a readOnly reader. If the + specified commit point matches what this reader is + already on, and this reader is already readOnly, then + this same instance is returned; if it is not already + readOnly, a readOnly clone is returned. + + + + Efficiently clones the IndexReader (sharing most + internal state). +

+ On cloning a reader with pending changes (deletions, + norms), the original reader transfers its write lock to + the cloned reader. This means only the cloned reader + may make further changes to the index, and commit the + changes to the index on close, but the old reader still + reflects all changes made up until it was cloned. +

+ Like , it's safe to make changes to + either the original or the cloned reader: all shared + mutable state obeys "copy on write" semantics to ensure + the changes are not seen by other readers. +

+

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error +
+ + Clones the IndexReader and optionally changes readOnly. A readOnly + reader cannot open a writeable reader. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Returns the directory associated with this index. The Default + implementation returns the directory specified by subclasses when + delegating to the IndexReader(Directory) constructor, or throws an + UnsupportedOperationException if one was not specified. + + UnsupportedOperationException if no directory + + + Returns the time the index in the named directory was last modified. + Do not use this to check whether the reader is still up-to-date, use + instead. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Reads version number from segments files. The version number is + initialized with a timestamp and then increased by one for each change of + the index. + + + where the index resides. + + version number. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Reads commitUserData, previously passed to + , + from current index segments file. This will return null if + + has never been called for this index. + + where the index resides. + + commit userData. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + + + + + Check whether any new changes have occurred to the index since this + reader was opened. + +

+ If this reader is based on a Directory (ie, was created by calling + + Open(Store.Directory) + , or on a reader based on a Directory), then + this method checks if any further commits (see + have occurred in that directory). +

+ +

+ If instead this reader is a near real-time reader (ie, obtained by a call + to , or by calling on a near + real-time reader), then this method checks if either a new commmit has + occurred, or any new uncommitted changes have taken place via the writer. + Note that even if the writer has only performed merging, this method will + still return false. +

+ +

+ In any event, if this returns false, you should call to + get a new reader that sees the changes. +

+ +

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error + UnsupportedOperationException unless overridden in subclass +
+ + Checks is the index is optimized (if it has a single segment and + no deletions). Not implemented in the IndexReader base class. + + &lt;c&gt;true&lt;/c&gt; if the index is optimized; &lt;c&gt;false&lt;/c&gt; otherwise + UnsupportedOperationException unless overridden in subclass + + + Return an array of term frequency vectors for the specified document. + The array contains a vector for each vectorized field in the document. + Each vector contains terms and frequencies for all terms in a given vectorized field. + If no such fields existed, the method returns null. The term vectors that are + returned may either be of type + or of type if + positions or offsets have been stored. + + + document for which term frequency vectors are returned + + array of term frequency vectors. May be null if no term vectors have been + stored for the specified document. + + IOException if index cannot be accessed + + + + + Return a term frequency vector for the specified document and field. The + returned vector contains terms and frequencies for the terms in + the specified field of this document, if the field had the storeTermVector + flag set. If termvectors had been stored with positions or offsets, a + is returned. + + + document for which the term frequency vector is returned + + field for which the term frequency vector is returned. + + term frequency vector May be null if field does not exist in the specified + document or term vector was not stored. + + IOException if index cannot be accessed + + + + + Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of + the . + + The number of the document to load the vector for + + The name of the field to load + + The to process the vector. Must not be null + + IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. + + + + + Map all the term vectors for all fields in a Document + The number of the document to load the vector for + + The to process the vector. Must not be null + + IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. + + + Returns true if an index exists at the specified directory. + If the directory does not exist or if there is no index in it. + + the directory to check for an index + + true if an index exists; false otherwise + + IOException if there is a problem with accessing the index + + + Returns the number of documents in this index. + + + Returns the stored fields of the nth + Document in this index. +

+ NOTE: for performance reasons, this method does not check if the + requested document is deleted, and therefore asking for a deleted document + may yield unspecified results. Usually this is not required, however you + can call with the requested document ID to verify + the document is not deleted. + +

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error +
+ + Get the at the n + th position. The may be used to determine + what s to load and how they should + be loaded. NOTE: If this Reader (more specifically, the underlying + FieldsReader) is closed before the lazy + is loaded an exception may be + thrown. If you want the value of a lazy + to be available after closing you + must explicitly load it or fetch the Document again with a new loader. +

+ NOTE: for performance reasons, this method does not check if the + requested document is deleted, and therefore asking for a deleted document + may yield unspecified results. Usually this is not required, however you + can call with the requested document ID to verify + the document is not deleted. + +

+ Get the document at the nth position + + The to use to determine what + Fields should be loaded on the Document. May be null, in which case + all Fields will be loaded. + + The stored fields of the + at the nth position + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + + + + + + +
+ + Returns true if document n has been deleted + + + Returns true if there are norms stored for this field. + + + + Returns the byte-encoded normalization factor for the named field of + every document. This is used by the search code to score documents. + + + + + + Reads the byte-encoded normalization factor for the named field of every + document. This is used by the search code to score documents. + + + + + Expert: Resets the normalization factor for the named field of the named + document. The norm represents the product of the field's boost + and its length normalization. Thus, to preserve the length normalization + values when resetting this, one should base the new value upon the old. + + NOTE: If this field does not store norms, then + this method call will silently do nothing. + + + + + If the index has changed since this reader was opened + + + If the index is corrupt + + + If another writer has this index open (write.lock could not be obtained) + + + If there is a low-level IO error + + + + Implements setNorm in subclass. + + + + Expert: Resets the normalization factor for the named field of the named document. + + + + + If the index has changed since this reader was opened + + + If the index is corrupt + + + If another writer has this index open (write.lock could not be obtained) + + + If there is a low-level IO error + + + + Returns an enumeration of all the terms in the index. The + enumeration is ordered by Term.compareTo(). Each term is greater + than all that precede it in the enumeration. Note that after + calling terms(), must be called + on the resulting enumeration before calling other methods such as + . + + + If there is a low-level IO error + + + + Returns an enumeration of all terms starting at a given term. If + the given term does not exist, the enumeration is positioned at the + first term greater than the supplied term. The enumeration is + ordered by Term.compareTo(). Each term is greater than all that + precede it in the enumeration. + + + If there is a low-level IO error + + + + Returns the number of documents containing the term t. + If there is a low-level IO error + + + Returns an enumeration of all the documents which contain + term. For each document, the document number, the frequency of + the term in that document is also provided, for use in + search scoring. If term is null, then all non-deleted + docs are returned with freq=1. + Thus, this method implements the mapping: +

+ Term    =>    <docNum, freq>* + +

The enumeration is ordered by document number. Each document number + is greater than all that precede it in the enumeration. +

+ If there is a low-level IO error +
+ + Returns an unpositioned enumerator. + If there is a low-level IO error + + + Returns an enumeration of all the documents which contain + term. For each document, in addition to the document number + and frequency of the term in that document, a list of all of the ordinal + positions of the term in the document is available. Thus, this method + implements the mapping: + +

+ Term    =>    <docNum, freq, + <pos1, pos2, ... + posfreq-1> + >* + +

This positional information facilitates phrase and proximity searching. +

The enumeration is ordered by document number. Each document number is + greater than all that precede it in the enumeration. +

+ If there is a low-level IO error +
+ + Returns an unpositioned enumerator. + If there is a low-level IO error + + + + Deletes the document numbered docNum. Once a document is + deleted it will not appear in TermDocs or TermPostitions enumerations. + Attempts to read its field with the + method will result in an error. The presence of this document may still be + reflected in the statistic, though + this will be corrected eventually as the index is further modified. + + + If the index has changed since this reader was opened + + If the index is corrupt + + If another writer has this index open (write.lock could not be obtained) + + If there is a low-level IO error + + + Implements deletion of the document numbered docNum. + Applications should call or . + + + + + Deletes all documents that have a given term indexed. + This is useful if one uses a document field to hold a unique ID string for + the document. Then to delete such a document, one merely constructs a + term with the appropriate field and the unique ID string as its text and + passes it to this method. + See for information about when this deletion will + become effective. + + The number of documents deleted + + If the index has changed since this reader was opened + + If the index is corrupt + + If another writer has this index open (write.lock could not be obtained) + + If there is a low-level IO error + + + Undeletes all documents currently marked as deleted in this index. + + + + If the index has changed since this reader was opened + + If the index is corrupt + + If another writer has this index open (write.lock could not be obtained) + + If there is a low-level IO error + + + Implements actual undeleteAll() in subclass. + + + + Does nothing by default. Subclasses that require a write lock for + index modifications must implement this method. + + + + + + + + Opaque Map (String -> String) + that's recorded into the segments file in the index, + and retrievable by + + + + + Commit changes resulting from delete, undeleteAll, or + setNorm operations + + If an exception is hit, then either no changes or all + changes will have been committed to the index + (transactional semantics). + + If there is a low-level IO error + + + Commit changes resulting from delete, undeleteAll, or + setNorm operations + + If an exception is hit, then either no changes or all + changes will have been committed to the index + (transactional semantics). + + If there is a low-level IO error + + + Implements commit. + + + Closes files associated with this index. + Also saves any new deletions to disk. + No other methods should be called after this has been called. + + If there is a low-level IO error + + + Implements close. + + + Get a list of unique field names that exist in this index and have the specified + field option information. + + specifies which field option should be available for the returned fields + + Collection of Strings indicating the names of the fields. + + + + + + Prints the filename and size of each file within a given compound file. + Add the -extract flag to extract files to the current working directory. + In order to make the extracted version of the index work, you have to copy + the segments file from the compound index into the directory where the extracted files are stored. + + Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile> + + + + Returns all commit points that exist in the Directory. + Normally, because the default is + , there would be only + one commit point. But if you're using a custom + then there could be many commits. + Once you have a given commit, you can open a reader on + it by calling + There must be at least one commit in + the Directory, else this method throws . + Note that if a commit is in + progress while this method is running, that commit + may or may not be returned array. + + + + Expert: returns the sequential sub readers that this + reader is logically composed of. For example, + IndexSearcher uses this API to drive searching by one + sub reader at a time. If this reader is not composed + of sequential child readers, it should return null. + If this method returns an empty array, that means this + reader is a null reader (for example a MultiReader + that has no sub readers). +

+ NOTE: You should not try using sub-readers returned by + this method to make any changes (setNorm, deleteDocument, + etc.). While this might succeed for one composite reader + (like MultiReader), it will most likely lead to index + corruption for other readers (like DirectoryReader obtained + through . Use the parent reader directly. +

+
+ + Expert: returns the current refCount for this reader + + + Version number when this IndexReader was opened. Not implemented in the + IndexReader base class. + +

+ If this reader is based on a Directory (ie, was created by calling + , or + on a reader based on a Directory), then + this method returns the version recorded in the commit that the reader + opened. This version is advanced every time is + called. +

+ +

+ If instead this reader is a near real-time reader (ie, obtained by a call + to , or by calling on a near + real-time reader), then this method returns the version of the last + commit done by the writer. Note that even as further changes are made + with the writer, the version will not changed until a commit is + completed. Thus, you should not rely on this method to determine when a + near real-time reader should be opened. Use instead. +

+ +

+ UnsupportedOperationException + unless overridden in subclass + +
+ + Retrieve the String userData optionally passed to + . + This will return null if + + has never been called for this index. + + + + + + Returns one greater than the largest possible document number. + This may be used to, e.g., determine how big to allocate an array which + will have an element for every document number in an index. + + + + Returns the number of deleted documents. + + + Returns the stored fields of the nth + Document in this index. +

+ NOTE: for performance reasons, this method does not check if the + requested document is deleted, and therefore asking for a deleted document + may yield unspecified results. Usually this is not required, however you + can call with the requested document ID to verify + the document is not deleted. + +

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error +
+ + Returns true if any documents have been deleted + + + Expert: return the IndexCommit that this reader has + opened. This method is only implemented by those + readers that correspond to a Directory with its own + segments_N file. + +

WARNING: this API is new and experimental and + may suddenly change.

+

+
+ + Expert + + + Returns the number of unique terms (across all fields) + in this reader. + + This method returns long, even though internally + Lucene cannot handle more than 2^31 unique terms, for + a possible future when this limitation is removed. + + + UnsupportedOperationException if this count + cannot be easily determined (eg Multi*Readers). + Instead, you should call + and ask each sub reader for + its unique term count. + + + + + For IndexReader implementations that use + TermInfosReader to read terms, this returns the + current indexDivisor as specified when the reader was + opened. + + + + Utility class for executing code that needs to do + something with the current segments file. This is + necessary with lock-less commits because from the time + you locate the current segments file name, until you + actually open it, read its contents, or check modified + time, etc., it could have been deleted due to a writer + commit finishing. + + + + A collection of segmentInfo objects with methods for operating on + those segments in relation to the file system. + +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + The file format version, a negative number. + + + This format adds details used for lockless commits. It differs + slightly from the previous format in that file names + are never re-used (write once). Instead, each file is + written to the next generation. For example, + segments_1, segments_2, etc. This allows us to not use + a commit lock. See file + formats for details. + + + + This format adds a "hasSingleNormFile" flag into each segment info. + See LUCENE-756 + for details. + + + + This format allows multiple segments to share a single + vectors and stored fields file. + + + + This format adds a checksum at the end of the file to + ensure all bytes were successfully written. + + + + This format adds the deletion count for each segment. + This way IndexWriter can efficiently report numDocs(). + + + + This format adds the boolean hasProx to record if any + fields in the segment store prox information (ie, have + omitTermFreqAndPositions==false) + + + + This format adds optional commit userData (String) storage. + + + This format adds optional per-segment String + dianostics storage, and switches userData to Map + + + + counts how often the index has been changed by adding or deleting docs. + starting with the current time in milliseconds forces to create unique version numbers. + + + + If non-null, information about loading segments_N files + + + + + Get the generation (N) of the current segments_N file + from a list of files. + + + -- array of file names to check + + + + Get the generation (N) of the current segments_N file + in the directory. + + + -- directory to search for the latest segments_N file + + + + Get the filename of the current segments_N file + from a list of files. + + + -- array of file names to check + + + + Get the filename of the current segments_N file + in the directory. + + + -- directory to search for the latest segments_N file + + + + Get the segments_N filename in use by this segment infos. + + + Parse the generation off the segments file name and + return it. + + + + Get the next segments_N filename that will be written. + + + Read a particular segmentFileName. Note that this may + throw an IOException if a commit is in process. + + + -- directory containing the segments file + + -- segment file to load + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + This version of read uses the retry logic (for lock-less + commits) to find the right segments file to load. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns a copy of this instance, also copying each + SegmentInfo. + + + + Current version number from segments file. + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns userData from latest segments file + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + If non-null, information about retries when loading + the segments file will be printed to this. + + + + Returns a new SegmentInfos containg the SegmentInfo + instances in the specified range first (inclusive) to + last (exclusive), so total number of segments returned + is last-first. + + + + Call this to start a commit. This writes the new + segments file, but writes an invalid checksum at the + end, so that it is not visible to readers. Once this + is called you must call to complete + the commit or to abort it. + + + + Returns all file names referenced by SegmentInfo + instances matching the provided Directory (ie files + associated with any "external" segments are skipped). + The returned collection is recomputed on each + invocation. + + + + Writes & syncs to the Directory dir, taking care to + remove the segments file on exception + + + + Replaces all segments in this instance, but keeps + generation, version, counter so that future commits + remain write once. + + + + + Simple brute force implementation. + If size is equal, compare items one by one. + + SegmentInfos object to check equality for + true if lists are equal, false otherwise + + + + Calculate hash code of SegmentInfos + + hash code as in java version of ArrayList + + + version number when this SegmentInfos was generated. + + + Advanced: Gets or sets how many times to try loading the + segments.gen file contents to determine current segment + generation. This file is only referenced when the + primary method (listing the directory) fails. + + + + Advanced: set how many times to try incrementing the + gen when loading the segments file. This only runs if + the primary (listing directory) and secondary (opening + segments.gen file) methods fail to find the segments + file. + + + + + + + + Utility class for executing code that needs to do + something with the current segments file. This is + necessary with lock-less commits because from the time + you locate the current segments file name, until you + actually open it, read its contents, or check modified + time, etc., it could have been deleted due to a writer + commit finishing. + + + + Subclass must implement this. The assumption is an + IOException will be thrown if something goes wrong + during the processing that could have been caused by + a writer committing. + + + + Constants describing field properties, for example used for + . + + + + All fields + + + All indexed fields + + + All fields that store payloads + + + All fields that omit tf + + + All fields which are not indexed + + + All fields which are indexed with termvectors enabled + + + All fields which are indexed but don't have termvectors enabled + + + All fields with termvectors enabled. Please note that only standard termvector fields are returned + + + All fields with termvectors with position values enabled + + + All fields with termvectors with offset values enabled + + + All fields with termvectors with offset values and position values enabled + + + Construct reading the named set of readers. + + + This constructor is only used for + + + Checks is the index is optimized (if it has a single segment and no deletions) + &lt;c&gt;true&lt;/c&gt; if the index is optimized; &lt;c&gt;false&lt;/c&gt; otherwise + + + Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory + owner. + + + StaleReaderException if the index has changed since this reader was opened + CorruptIndexException if the index is corrupt + Lucene.Net.Store.LockObtainFailedException + if another writer has this index open (write.lock could not be + obtained) + + IOException if there is a low-level IO error + + + Commit changes resulting from delete, undeleteAll, or setNorm operations +

+ If an exception is hit, then either no changes or all changes will have been committed to the index (transactional + semantics). + +

+ IOException if there is a low-level IO error +
+ + Returns the directory this index resides in. + + + + + + + Version number when this IndexReader was opened. + + + Expert: return the IndexCommit that this reader has opened. +

+

WARNING: this API is new and experimental and may suddenly change.

+

+
+ +

Expert: represents a single commit into an index as seen by the + or .

+ +

Changes to the content of an index are made visible + only after the writer who made that change commits by + writing a new segments file + (segments_N). This point in time, when the + action of writing of a new segments file to the directory + is completed, is an index commit.

+ +

Each index commit point has a unique segments file + associated with it. The segments file associated with a + later index commit point would have a larger N.

+ +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Delete this commit point. This only applies when using + the commit point in the context of IndexWriter's + IndexDeletionPolicy. +

+ Upon calling this, the writer is notified that this commit + point should be deleted. +

+ Decision that a commit-point should be deleted is taken by the in effect + and therefore this should only be called by its or + methods. +

+
+ + Two IndexCommits are equal if both their Directory and versions are equal. + + + Get the segments file (segments_N) associated + with this commit point. + + + + Returns all index files referenced by this commit point. + + + Returns the for the index. + + + Returns true if this commit is an optimized index. + + + Returns the version for this IndexCommit. This is the + same value that would + return if it were opened on this commit. + + + + Returns the generation (the _N in segments_N) for this + IndexCommit + + + + Convenience method that returns the last modified time + of the segments_N file corresponding to this index + commit, equivalent to + getDirectory().fileModified(getSegmentsFileName()). + + + + Returns userData, previously passed to + + for this commit. IDictionary is String -> String. + + + + Abstract class for enumerating terms. +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + Increments the enumeration to the next element. True if one exists. + + + Returns the docFreq of the current Term in the enumeration. + + + Closes the enumeration to further activity, freeing resources. + + + Closes the enumeration to further activity, freeing resources. + + + Returns the current Term in the enumeration. + + + Optimized implementation. + + + TermPositions provides an interface for enumerating the <document, + frequency, <position>* > tuples for a term.

The document and + frequency are the same as for a TermDocs. The positions portion lists the ordinal + positions of each occurrence of a term in a document. + +

+ + +
+ + Returns next position in the current document. It is an error to call + this more than times + without calling

This is + invalid until is called for + the first time. +

+
+ + Returns the payload data at the current term position. + This is invalid until is called for + the first time. + This method must not be called more than once after each call + of . However, payloads are loaded lazily, + so if the payload data for the current position is not needed, + this method may not be called at all for performance reasons.
+ +
+ the array into which the data of this payload is to be + stored, if it is big enough; otherwise, a new byte[] array + is allocated for this purpose. + + the offset in the array into which the data of this payload + is to be stored. + + a byte[] array containing the data of this payload + + IOException +
+ + Returns the length of the payload at the current term position. + This is invalid until is called for + the first time.
+
+ length of the current payload in number of bytes +
+ + Checks if a payload can be loaded at this position. +

+ Payloads can only be loaded once per call to + . + +

+ true if there is a payload available at this position that can be loaded +
+ + Process the document. If there is + something for this document to be done in docID order, + you should encapsulate that as a + DocumentsWriter.DocWriter and return it. + DocumentsWriter then calls finish() on this object + when it's its turn. + + + + Called when DocumentsWriter decides to create a new + segment + + + + Called when DocumentsWriter decides to close the doc + stores + + + + Called when an aborting exception is hit + + + Add a new thread + + + Called when DocumentsWriter is using too much RAM. + The consumer should free RAM, if possible, returning + true if any RAM was in fact freed. + + + + Processes all occurrences of a single field + + + This is just a "splitter" class: it lets you wrap two + DocFieldConsumer instances as a single consumer. + + + + Consumer returns this on each doc. This holds any + state that must be flushed synchronized "in docID + order". We gather these and flush them in order. + + + + This class accepts multiple added documents and directly + writes a single segment file. It does this more + efficiently than creating a single segment per document + (with DocumentWriter) and doing standard merges on those + segments. + + Each added document is passed to the , + which in turn processes the document and interacts with + other consumers in the indexing chain. Certain + consumers, like and + , digest a document and + immediately write bytes to the "doc store" files (ie, + they do not consume RAM per document, except while they + are processing the document). + + Other consumers, eg and + , buffer bytes in RAM and flush only + when a new segment is produced. + Once we have used our allowed RAM buffer, or the number + of added docs is large enough (in the case we are + flushing by doc count instead of RAM usage), we create a + real segment and flush it to the Directory. + + Threads: + + Multiple threads are allowed into addDocument at once. + There is an initial synchronized call to getThreadState + which allocates a ThreadState for this thread. The same + thread will get the same ThreadState over time (thread + affinity) so that if there are consistent patterns (for + example each thread is indexing a different content + source) then we make better use of RAM. Then + processDocument is called on that ThreadState without + synchronization (most of the "heavy lifting" is in this + call). Finally the synchronized "finishDocument" is + called to flush changes to the directory. + + When flush is called by IndexWriter we forcefully idle + all threads and flush only once they are all idle. This + means you can call flush with a given thread even while + other threads are actively adding/deleting documents. + + + Exceptions: + + Because this class directly updates in-memory posting + lists, and flushes stored fields and term vectors + directly to files in the directory, there are certain + limited times when an exception can corrupt this state. + For example, a disk full while flushing stored fields + leaves this file in a corrupt state. Or, an OOM + exception while appending to the in-memory posting lists + can corrupt that posting list. We call such exceptions + "aborting exceptions". In these cases we must call + abort() to discard all docs added since the last flush. + + All other exceptions ("non-aborting exceptions") can + still partially update the index structures. These + updates are consistent, but, they represent only a part + of the document seen up until the exception was hit. + When this happens, we immediately mark the document as + deleted so that the document is always atomically ("all + or none") added to the index. + + + + Returns true if any of the fields in the current + buffered docs have omitTermFreqAndPositions==false + + + + If non-null, various details of indexing are printed + here. + + + + Set how much RAM we can use before flushing. + + + Closes the current open doc stores an returns the doc + store segment name. This returns null if there are * + no buffered documents. + + + + Called if we hit an exception at a bad time (when + updating the index files) and must discard all + currently buffered docs. This resets our state, + discarding any docs added since last flush. + + + + Reset after a flush + + + Flush all pending docs to a new segment + + + Build compound file for the segment we just flushed + + + Set flushPending if it is not already set and returns + whether it was set. This is used by IndexWriter to + trigger a single flush even when multiple threads are + trying to do so. + + + + Returns a free (idle) ThreadState that may be used for + indexing this one document. This call also pauses if a + flush is pending. If delTerm is non-null then we + buffer this deleted term after the thread state has + been acquired. + + + + Returns true if the caller (IndexWriter) should now + flush. + + + + Called whenever a merge has completed and the merged segments had deletions + + + Does the synchronized work to finish/flush the + inverted document. + + + + Gets or sets max buffered docs, which means we will flush by + doc count instead of by RAM usage. + + + + Get current segment name we are writing. + + + Returns how many docs are currently buffered in RAM. + + + Returns the current doc store segment we are writing + to. + + + + Returns the doc offset into the shared doc store for + the current buffered docs. + + + + The IndexingChain must define the method + which returns the DocConsumer that the DocumentsWriter calls to process the + documents. + + + + Consumer returns this on each doc. This holds any + state that must be flushed synchronized "in docID + order". We gather these and flush them in order. + + + + Expert: allocate a new buffer. + Subclasses can allocate differently. + + size of allocated buffer. + + allocated buffer. + + + + This is a DocConsumer that gathers all fields under the + same name, and calls per-field consumers to process field + by field. This class doesn't doesn't do any "real" work + of its own: it just forwards the fields to a + DocFieldConsumer. + + + + Holds all per thread, per field state. + + + Gathers all Fieldables for a document under the same + name, updates FieldInfos, and calls per-field consumers + to process field by field. + + Currently, only a single thread visits the fields, + sequentially, for processing. + + + + If there are fields we've seen but did not see again + in the last run, then free them up. + + + + This is a DocFieldConsumer that inverts each field, + separately, from a Document, and accepts a + InvertedTermsConsumer to process those terms. + + + + Holds state for inverting all occurrences of a single + field in the document. This class doesn't do anything + itself; instead, it forwards the tokens produced by + analysis to its own consumer + (InvertedDocConsumerPerField). It also interacts with an + endConsumer (InvertedDocEndConsumerPerField). + + + + This is a DocFieldConsumer that inverts each field, + separately, from a Document, and accepts a + InvertedTermsConsumer to process those terms. + + + + Used by DocumentsWriter to maintain per-thread state. + We keep a separate Posting hash and other state for each + thread and then merge postings hashes from all threads + when writing the segment. + + + + Access to the Fieldable Info file that describes document fields and whether or + not they are indexed. Each segment has a separate Fieldable Info file. Objects + of this class are thread-safe for multiple readers, but only one thread can + be adding documents at a time, with no other reader or writer threads + accessing this object. + + + + Construct a FieldInfos object using the directory and the name of the file + IndexInput + + The directory to open the IndexInput from + + The name of the file to open the IndexInput from in the Directory + + IOException + + + Returns a deep clone of this FieldInfos instance. + + + Adds field info for a Document. + + + Returns true if any fields do not omitTermFreqAndPositions + + + Add fields that are indexed. Whether they have termvectors has to be specified. + + + The names of the fields + + Whether the fields store term vectors or not + + true if positions should be stored. + + true if offsets should be stored + + + + Assumes the fields are not storing term vectors. + + + The names of the fields + + Whether the fields are indexed or not + + + + + + + Calls 5 parameter add with false for all TermVector parameters. + + + The name of the Fieldable + + true if the field is indexed + + + + + + Calls 5 parameter add with false for term vector positions and offsets. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + true if the norms for the indexed field should be omitted + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + true if the norms for the indexed field should be omitted + + true if payloads should be stored for this field + + true if term freqs should be omitted for this field + + + + Return the fieldName identified by its number. + + + + + the fieldName or an empty string when the field + with the given number doesn't exist. + + + + Return the fieldinfo object referenced by the fieldNumber. + + + the FieldInfo object or null when the given fieldNumber + doesn't exist. + + + + This class tracks the number and position / offset parameters of terms + being added to the index. The information collected in this class is + also used to calculate the normalization factor for a field. + +

WARNING: This API is new and experimental, and may suddenly + change.

+

+
+ + Re-initialize the state, using this boost value. + boost value to use. + + + + Get the last processed term position. + the position + + + Get total number of terms in this field. + the length + + + Get the number of terms with positionIncrement == 0. + the numOverlap + + + Get end offset of the last processed term. + the offset + + + Get boost value. This is the cumulative product of + document boost and field boost for all field instances + sharing the same field name. + + the boost + + + + + + + + + Constructs a new runtime exception with null as its + detail message. The cause is not initialized, and may subsequently be + initialized by a call to . + + + + Constructs a new runtime exception with the specified cause and a + detail message of (cause==null ? null : cause.toString()) + (which typically contains the class and detail message of + cause). +

+ This constructor is useful for runtime exceptions + that are little more than wrappers for other throwables. + +

+ the cause (which is saved for later retrieval by the + ). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.) + + 1.4 + +
+ + Constructs a new runtime exception with the specified detail message. + The cause is not initialized, and may subsequently be initialized by a + call to . + + + the detail message. The detail message is saved for + later retrieval by the method. + + + + Constructs a new runtime exception with the specified detail message and + cause.

Note that the detail message associated with + cause is not automatically incorporated in + this runtime exception's detail message. + +

+ the detail message (which is saved for later retrieval + by the method). + + the cause (which is saved for later retrieval by the + method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.) + + 1.4 + +
+ + For each Field, store a sorted collection of s +

+ This is not thread-safe. +

+
+ + The TermVectorMapper can be used to map Term Vectors into your own + structure instead of the parallel array structure used by + . +

+ It is up to the implementation to make sure it is thread-safe. + + + +

+
+ + + true if this mapper should tell Lucene to ignore positions even if they are stored + + similar to ignoringPositions + + + + Tell the mapper what to expect in regards to field, number of terms, offset and position storage. + This method will be called once before retrieving the vector for a field. + + This method will be called before . + + The field the vector is for + + The number of terms that need to be mapped + + true if the mapper should expect offset information + + true if the mapper should expect positions info + + + + Map the Term Vector information into your own structure + The term to add to the vector + + The frequency of the term in the document + + null if the offset is not specified, otherwise the offset into the field of the term + + null if the position is not specified, otherwise the position in the field of the term + + + + Passes down the index of the document whose term vector is currently being mapped, + once for each top level call to a term vector reader. +

+ Default implementation IGNORES the document number. Override if your implementation needs the document number. +

+ NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. + +

+ index of document currently being mapped + +
+ + Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they + can be skipped over. Derived classes should set this to true if they want to ignore positions. The default + is false, meaning positions will be loaded if they are stored. + + false + + + + Same principal as , but applied to offsets. false by default. + + false + + + + A Comparator for sorting s + + + + Get the mapping between fields and terms, sorted by the comparator + + + A map between field names and <see cref="System.Collections.Generic.SortedDictionary{Object,Object}" />s per field. SortedSet entries are <see cref="TermVectorEntry" /> + + + Class responsible for access to stored document fields. +

+ It uses <segment>.fdt and <segment>.fdx; files. + +

+
+ + Returns a cloned FieldsReader that shares open + IndexInputs with the original one. It is the caller's + job not to close the original FieldsReader until all + clones are called (eg, currently SegmentReader manages + this logic). + + + + AlreadyClosedException if this FieldsReader is closed + + + Closes the underlying streams, including any ones associated with a + lazy implementation of a Field. This means that the Fields values will not be accessible. + + + IOException + + + Returns the length in bytes of each raw document in a + contiguous range of length numDocs starting with + startDocID. Returns the IndexInput (the fieldStream), + already seeked to the starting point for startDocID. + + + + Skip the field. We still have to read some of the information about the field, but can skip past the actual content. + This will have the most payoff on large fields. + + + + A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is + loaded. + + + + The value of the field as a Reader, or null. If null, the String value, + binary value, or TokenStream value is used. Exactly one of StringValue(), + ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + + + + The value of the field as a TokenStream, or null. If null, the Reader value, + String value, or binary value is used. Exactly one of StringValue(), + ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + + + + The value of the field as a String, or null. If null, the Reader value, + binary value, or TokenStream value is used. Exactly one of StringValue(), + ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + + + + Bulk write a contiguous series of documents. The + lengths array is the length (in bytes) of each raw + document. The stream IndexInput is the + fieldsStream from which we should bulk-copy all + bytes. + + + + A FilterIndexReader contains another IndexReader, which it + uses as its basic source of data, possibly transforming the data along the + way or providing additional functionality. The class + FilterIndexReader itself simply implements all abstract methods + of IndexReader with versions that pass all requests to the + contained index reader. Subclasses of FilterIndexReader may + further override some of these methods and may also provide additional + methods and fields. + + + +

Construct a FilterIndexReader based on the specified base reader. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the base reader.

+

Note that base reader is closed if this FilterIndexReader is closed.

+

+ specified base reader. + +
+ + + If the subclass of FilteredIndexReader modifies the + contents of the FieldCache, you must override this + method to provide a different key */ + + + + + If the subclass of FilteredIndexReader modifies the + deleted docs, you must override this method to provide + a different key */ + + + + Base class for filtering implementations. + + + Base class for filtering implementations. + + + Base class for filtering implementations. + + + NOTE: this API is experimental and will likely change + + + Adds a new doc in this term. If this returns null + then we just skip consuming positions/payloads. + + + + Called when we are done adding docs to this term + + + Consumes doc and freq, writing them using the current + index file format + + + + Adds a new doc in this term. If this returns null + then we just skip consuming positions/payloads. + + + + Called when we are done adding docs to this term + + + Abstract API that consumes terms, doc, freq, prox and + payloads postings. Concrete implementations of this + actually do "something" with the postings (write it into + the index in a specific format). + + NOTE: this API is experimental and will likely change + + + + Add a new field + + + Called when we are done adding everything. + + + Add a new field + + + Called when we are done adding everything. + + + Add a new position & payload. If payloadLength > 0 + you must read those bytes from the IndexInput. + + + + Called when we are done adding positions & payloads + + + Add a new position & payload + + + Called when we are done adding positions & payloads + + + NOTE: this API is experimental and will likely change + + + Adds a new term in this field; term ends with U+FFFF + char + + + + Called when we are done adding terms to this field + + + Adds a new term in this field + + + Called when we are done adding terms to this field + + + Used by DocumentsWriter to merge the postings from + multiple ThreadStates when creating a segment + + + + This is the base class for an in-memory posting list, + keyed by a Token. maintains a hash + table holding one instance of this per unique Token. + Consumers of TermsHash () must + subclass this class with its own concrete class. + FreqProxTermsWriter.PostingList is a private inner class used + for the freq/prox postings, and + TermVectorsTermsWriter.PostingList is a private inner class + used to hold TermVectors postings. + + + + Implement this class to plug into the TermsHash + processor, which inverts and stores Tokens into a hash + table and provides an API for writing bytes into + multiple streams for each unique Token. + + + +

Expert: policy for deletion of stale index commits. + +

Implement this interface, and pass it to one + of the or + constructors, to customize when older + point-in-time commits + are deleted from the index directory. The default deletion policy + is , which always + removes old commits as soon as a new commit is done (this + matches the behavior before 2.2).

+ +

One expected use case for this (and the reason why it + was first created) is to work around problems with an + index directory accessed via filesystems like NFS because + NFS does not provide the "delete on last close" semantics + that Lucene's "point in time" search normally relies on. + By implementing a custom deletion policy, such as "a + commit is only removed once it has been stale for more + than X minutes", you can give your readers time to + refresh to the new commit before + removes the old commits. Note that doing so will + increase the storage requirements of the index. See LUCENE-710 + for details.

+

+
+ +

This is called once when a writer is first + instantiated to give the policy a chance to remove old + commit points.

+ +

The writer locates all index commits present in the + index directory and calls this method. The policy may + choose to delete some of the commit points, doing so by + calling method + of .

+ +

Note: the last CommitPoint is the most recent one, + i.e. the "front index state". Be careful not to delete it, + unless you know for sure what you are doing, and unless + you can afford to lose the index content while doing that. + +

+ List of current + point-in-time commits, + sorted by age (the 0th one is the oldest commit). + +
+ + +

This is called each time the writer completed a commit. + This gives the policy a chance to remove old commit points + with each commit.

+ +

The policy may now choose to delete old commit points + by calling method + of .

+ +

This method is only called when + or is called, or possibly not at + all if the is called.

+ +

Note: the last CommitPoint is the most recent one, + i.e. the "front index state". Be careful not to delete it, + unless you know for sure what you are doing, and unless + you can afford to lose the index content while doing that.

+
+ + List of , sorted by age (the 0th one is the oldest commit). + +
+ + + This class keeps track of each SegmentInfos instance that + is still "live", either because it corresponds to a + segments_N file in the Directory (a "commit", i.e. a + committed SegmentInfos) or because it's an in-memory + SegmentInfos that a writer is actively updating but has + not yet committed. This class uses simple reference + counting to map the live SegmentInfos instances to + individual files in the Directory. + + The same directory file may be referenced by more than + one IndexCommit, i.e. more than one SegmentInfos. + Therefore we count how many commits reference each file. + When all the commits referencing a certain file have been + deleted, the refcount for that file becomes zero, and the + file is deleted. + + A separate deletion policy interface + (IndexDeletionPolicy) is consulted on creation (onInit) + and once per commit (onCommit), to decide when a commit + should be removed. + + It is the business of the IndexDeletionPolicy to choose + when to delete commit points. The actual mechanics of + file deletion, retrying, etc, derived from the deletion + of commit points is the business of the IndexFileDeleter. + + The current default deletion policy is + , which removes all + prior commits when a new commit has completed. This + matches the behavior before 2.2. + + Note that you must hold the write.lock before + instantiating this class. It opens segments_N file(s) + directly with no retry logic. + + + + because they are open and we are running on Windows), + so we will retry them again later: //// + + + Counts how many existing commits reference a file. + Maps String to RefCount (class below) instances: //// + + + This will have just 1 commit if you are using the + default delete policy (KeepOnlyLastCommitDeletionPolicy). + Other policies may leave commit points live for longer + in which case this list would be longer than 1: //// + + + non-commit checkpoint: //// + + + Change to true to see details of reference counts when + infoStream != null + + + + Initialize the deleter: find all previous commits in + the Directory, incref the files they reference, call + the policy to let it delete commits. This will remove + any files not referenced by any of the commits. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Remove the CommitPoints in the commitsToDelete List by + DecRef'ing all files from each SegmentInfos. + + + + Writer calls this when it has hit an error and had to + roll back, to tell us that there may now be + unreferenced files in the filesystem. So we re-list + the filesystem and delete such files. If segmentName + is non-null, we will only delete files corresponding to + that segment. + + + + For definition of "check point" see IndexWriter comments: + "Clarification: Check Points (and commits)". + + Writer calls this when it has made a "consistent + change" to the index, meaning new files are written to + the index and the in-memory SegmentInfos have been + modified to point to those files. + + This may or may not be a commit (segments_N may or may + not have been written). + + We simply incref the files referenced by the new + SegmentInfos and decref the files we had previously + seen (if any). + + If this is a commit, we also call the policy to give it + a chance to remove other commits. If any commits are + removed, we decref their files as well. + + + + Deletes the specified files, but only if they are new + (have not yet been incref'd). + + + + Tracks the reference count for a single index file: + + + Holds details for each commit point. This class is + also passed to the deletion policy. Note: this class + has a natural ordering that is inconsistent with + equals. + + + + Called only be the deletion policy, to remove this + commit point from the index. + + + + Filename filter that accept filenames and extensions only created by Lucene. + + + Returns true if this is a file that would be contained + in a CFS file. This function should only be called on + files that pass the above "accept" (ie, are already + known to be a Lucene index file). + + + + Useful constants representing filenames and extensions used by lucene + + + Name of the index segment file + + + Name of the generation reference file name + + + Name of the index deletable file (only used in + pre-lockless indices) + + + + Extension of norms file + + + Extension of freq postings file + + + Extension of prox postings file + + + Extension of terms file + + + Extension of terms index file + + + Extension of stored fields index file + + + Extension of stored fields file + + + Extension of vectors fields file + + + Extension of vectors documents file + + + Extension of vectors index file + + + Extension of compound file + + + Extension of compound file for doc store files + + + Extension of deletes + + + Extension of field infos + + + Extension of plain norms + + + Extension of separate norms + + + Extension of gen file + + + This array contains all filename extensions used by + Lucene's index files, with two exceptions, namely the + extension made up from .f + a number and + from .s + a number. Also note that + Lucene's segments_N files do not have any + filename extension. + + + + File extensions that are added to a compound file + (same as above, minus "del", "gen", "cfs"). + + + + File extensions of old-style index files + + + File extensions for term vector support + + + Computes the full file name from base, extension and + generation. If the generation is -1, the file name is + null. If it's 0, the file name is + If it's > 0, the file name is + + + -- main part of the file name + + -- extension of the filename (including .) + + -- generation + + + + Returns true if the provided filename is one of the doc + store files (ends with an extension in + STORE_INDEX_EXTENSIONS). + + + + An IndexWriter creates and maintains an index. +

The create argument to the + constructor determines + whether a new index is created, or whether an existing index is + opened. Note that you can open an index with create=true + even while readers are using the index. The old readers will + continue to search the "point in time" snapshot they had opened, + and won't see the newly created index until they re-open. There are + also constructors + with no create argument which will create a new index + if there is not already an index at the provided path and otherwise + open the existing index.

+

In either case, documents are added with + and removed with or + . A document can be updated with + (which just deletes + and then adds the entire document). When finished adding, deleting + and updating documents, should be called.

+ +

These changes are buffered in memory and periodically + flushed to the (during the above method + calls). A flush is triggered when there are enough + buffered deletes (see ) + or enough added documents since the last flush, whichever + is sooner. For the added documents, flushing is triggered + either by RAM usage of the documents (see + ) or the number of added documents. + The default is to flush when RAM usage hits 16 MB. For + best indexing speed you should flush by RAM usage with a + large RAM buffer. Note that flushing just moves the + internal buffered state in IndexWriter into the index, but + these changes are not visible to IndexReader until either + or is called. A flush may + also trigger one or more segment merges which by default + run with a background thread so as not to block the + addDocument calls (see below + for changing the ). +

+ If an index will not have more documents added for a while and optimal search + performance is desired, then either the full + method or partial method should be + called before the index is closed. +

+ Opening an IndexWriter creates a lock file for the directory in use. Trying to open + another IndexWriter on the same directory will lead to a + . The + is also thrown if an IndexReader on the same directory is used to delete documents + from the index.

+

+ +

Expert: IndexWriter allows an optional + implementation to be + specified. You can use this to control when prior commits + are deleted from the index. The default policy is + which removes all prior + commits as soon as a new commit is done (this matches + behavior before 2.2). Creating your own policy can allow + you to explicitly keep previous "point in time" commits + alive in the index for some time, to allow readers to + refresh to the new commit without having the old commit + deleted out from under them. This is necessary on + filesystems like NFS that do not support "delete on last + close" semantics, which Lucene's "point in time" search + normally relies on.

+

Expert: + IndexWriter allows you to separately change + the and the . + The is invoked whenever there are + changes to the segments in the index. Its role is to + select which merges to do, if any, and return a + describing the merges. It + also selects merges to do for optimize(). (The default is + . Then, the + is invoked with the requested merges and + it decides when and how to run the merges. The default is + .

+

NOTE: if you hit an + OutOfMemoryError then IndexWriter will quietly record this + fact and block all future segment commits. This is a + defensive measure in case any internal state (buffered + documents and deletions) were corrupted. Any subsequent + calls to will throw an + IllegalStateException. The only course of action is to + call , which internally will call + , to undo any changes to the index since the + last commit. You can also just call + directly.

+

NOTE: + instances are completely thread + safe, meaning multiple threads can call any of its + methods, concurrently. If your application requires + external synchronization, you should not + synchronize on the IndexWriter instance as + this may cause deadlock; use your own (non-Lucene) objects + instead.

+ NOTE: if you call + Thread.Interrupt() on a thread that's within + IndexWriter, IndexWriter will try to catch this (eg, if + it's in a Wait() or Thread.Sleep()), and will then throw + the unchecked exception + and clear the interrupt status on the thread

+

+
+ + Name of the write lock in the index. + + + Value to denote a flush trigger is disabled + + + Default value is 16 MB (which means flush when buffered + docs consume 16 MB RAM). Change using . + + + + Default value is 10,000. Change using . + + + Default value is 128. Change using . + + + Default value for the write lock timeout (1,000). + + + + + Disabled by default (because IndexWriter flushes by RAM usage + by default). Change using . + + + + Disabled by default (because IndexWriter flushes by RAM usage + by default). Change using . + + + + Absolute hard maximum length for a term. If a term + arrives from the analyzer longer than this length, it + is skipped and a message is printed to infoStream, if + set (see ). + + + + Expert: returns a readonly reader, covering all committed as well as + un-committed changes to the index. This provides "near real-time" + searching, in that changes made during an IndexWriter session can be + quickly made available for searching without closing the writer nor + calling . + +

+ Note that this is functionally equivalent to calling {#commit} and then + using to open a new reader. But the turarnound + time of this method should be faster since it avoids the potentially + costly . +

+ + You must close the returned by this method once you are done using it. + +

+ It's near real-time because there is no hard + guarantee on how quickly you can get a new reader after + making changes with IndexWriter. You'll have to + experiment in your situation to determine if it's + faster enough. As this is a new and experimental + feature, please report back on your findings so we can + learn, improve and iterate.

+ +

The resulting reader suppports + , but that call will simply forward + back to this method (though this may change in the + future).

+ +

The very first time this method is called, this + writer instance will make every effort to pool the + readers that it opens for doing merges, applying + deletes, etc. This means additional resources (RAM, + file descriptors, CPU time) will be consumed.

+ +

For lower latency on reopening a reader, you should call + to call to + pre-warm a newly merged segment before it's committed + to the index. This is important for minimizing index-to-search + delay after a large merge. + +

If an addIndexes* call is running in another thread, + then this reader will only search those segments from + the foreign index that have been successfully copied + over, so far

. + +

NOTE: Once the writer is closed, any + outstanding readers may continue to be used. However, + if you attempt to reopen any of those readers, you'll + hit an .

+ +

NOTE: This API is experimental and might + change in incompatible ways in the next release.

+ +

+ IndexReader that covers entire index plus all + changes made so far by this IndexWriter instance + + + IOException +
+ + Expert: like , except you can + specify which termInfosIndexDivisor should be used for + any newly opened readers. + + Subsambles which indexed + terms are loaded into RAM. This has the same effect as + except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + + + Obtain the number of deleted docs for a pooled reader. + If the reader isn't being pooled, the segmentInfo's + delCount is returned. + + + + Used internally to throw an + if this IndexWriter has been + closed. + + AlreadyClosedException if this IndexWriter is + + + Prints a message to the infoStream (if non-null), + prefixed with the identifying information for this + writer and the thread that's calling it. + + + + Expert: Set the Similarity implementation used by this IndexWriter. + + + + Constructs an IndexWriter for the index in d. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + d, replacing the index already there, if any. + + + the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + + + Constructs an IndexWriter for the index in + d, first creating it if it does not + already exist. + + + the index directory + + the analyzer to use + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + + + Expert: constructs an IndexWriter with a custom + , for the index in d, + first creating it if it does not already exist. Text + will be analyzed with a. + + + the index directory + + the analyzer to use + + see above + + whether or not to limit field lengths + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + + + Expert: constructs an IndexWriter with a custom + , for the index in d. + Text will be analyzed with a. If + create is true, then a new, empty index + will be created in d, replacing the index + already there, if any. + + + the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + see above + + , whether or not to limit field lengths. Value is in number of terms/tokens + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + + + Expert: constructs an IndexWriter with a custom + and , + for the index in d. + Text will be analyzed with a. If + create is true, then a new, empty index + will be created in d, replacing the index + already there, if any. + + + the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + see above + + whether or not to limit field lengths, value is in number of terms/tokens. See . + + the chain to be used to + process documents + + which commit to open + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + + + Expert: constructs an IndexWriter on specific commit + point, with a custom , for + the index in d. Text will be analyzed + with a. + +

This is only meaningful if you've used a + in that past that keeps more than + just the last commit. + +

This operation is similar to , + except that method can only rollback what's been done + with the current instance of IndexWriter since its last + commit, whereas this method can rollback to an + arbitrary commit point from the past, assuming the + has preserved past + commits. + +

+ the index directory + + the analyzer to use + + see above + + whether or not to limit field lengths, value is in number of terms/tokens. See . + + which commit to open + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + +
+ + Expert: set the merge policy used by this writer. + + + Expert: set the merge scheduler used by this writer. + + + The maximum number of terms that will be indexed for a single field in a + document. This limits the amount of memory required for indexing, so that + collections with very large files will not crash the indexing process by + running out of memory. This setting refers to the number of running terms, + not to the number of different terms.

+ Note: this silently truncates large documents, excluding from the + index all terms that occur further in the document. If you know your source + documents are large, be sure to set this value high enough to accomodate + the expected size. If you set it to Integer.MAX_VALUE, then the only limit + is your memory, but you should anticipate an OutOfMemoryError.

+ By default, no more than terms + will be indexed for a field. +

+
+ + Returns the maximum number of terms that will be + indexed for a single field in a document. + + + + + + Determines the minimal number of documents required + before the buffered in-memory documents are flushed as + a new Segment. Large values generally gives faster + indexing. + +

When this is set, the writer will flush every + maxBufferedDocs added documents. Pass in + to prevent triggering a flush due + to number of buffered documents. Note that if flushing + by RAM usage is also enabled, then the flush will be + triggered by whichever comes first.

+ +

Disabled by default (writer flushes by RAM usage).

+ +

+ IllegalArgumentException if maxBufferedDocs is + enabled but smaller than 2, or it disables maxBufferedDocs + when ramBufferSize is already disabled + + + +
+ + If we are flushing by doc count (not by RAM usage), and + using LogDocMergePolicy then push maxBufferedDocs down + as its minMergeDocs, to keep backwards compatibility. + + + + Returns the number of buffered added documents that will + trigger a flush if enabled. + + + + + + Determines the amount of RAM that may be used for + buffering added documents and deletions before they are + flushed to the Directory. Generally for faster + indexing performance it's best to flush by RAM usage + instead of document count and use as large a RAM buffer + as you can. + +

When this is set, the writer will flush whenever + buffered documents and deletions use this much RAM. + Pass in to prevent + triggering a flush due to RAM usage. Note that if + flushing by document count is also enabled, then the + flush will be triggered by whichever comes first.

+ +

NOTE: the account of RAM usage for pending + deletions is only approximate. Specifically, if you + delete by Query, Lucene currently has no way to measure + the RAM usage if individual Queries so the accounting + will under-estimate and you should compensate by either + calling commit() periodically yourself, or by using + to flush by count + instead of RAM usage (each buffered delete Query counts + as one). + +

+ NOTE: because IndexWriter uses ints when managing its + internal storage, the absolute maximum value for this setting is somewhat + less than 2048 MB. The precise limit depends on various factors, such as + how large your documents are, how many fields have norms, etc., so it's + best to set this value comfortably under 2048. +

+ +

The default value is .

+ +

+ IllegalArgumentException if ramBufferSize is + enabled but non-positive, or it disables ramBufferSize + when maxBufferedDocs is already disabled + +
+ + Returns the value set by if enabled. + + +

Determines the minimal number of delete terms required before the buffered + in-memory delete terms are applied and flushed. If there are documents + buffered in memory at the time, they are merged and a new segment is + created.

+

Disabled by default (writer flushes by RAM usage).

+ +

+ IllegalArgumentException if maxBufferedDeleteTerms + is enabled but smaller than 1 + + + +
+ + Returns the number of buffered deleted terms that will + trigger a flush if enabled. + + + + + + If non-null, information about merges, deletes and a + message when maxFieldLength is reached will be printed + to this. + + + + Commits all changes to an index and closes all + associated files. Note that this may be a costly + operation, so, try to re-use a single writer instead of + closing and opening a new one. See for + caveats about write caching done by some IO devices. + +

If an Exception is hit during close, eg due to disk + full or some other reason, then both the on-disk index + and the internal state of the IndexWriter instance will + be consistent. However, the close will not be complete + even though part of it (flushing buffered documents) + may have succeeded, so the write lock will still be + held.

+ +

If you can correct the underlying cause (eg free up + some disk space) then you can call close() again. + Failing that, if you want to force the write lock to be + released (dangerous, because you may then lose buffered + docs in the IndexWriter instance) then you can do + something like this:

+ + + try { + writer.close(); + } finally { + if (IndexWriter.isLocked(directory)) { + IndexWriter.unlock(directory); + } + } + + + after which, you must be certain not to use the writer + instance anymore.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Commits all changes to an index and closes all + associated files. Note that this may be a costly + operation, so, try to re-use a single writer instead of + closing and opening a new one. See for + caveats about write caching done by some IO devices. + +

If an Exception is hit during close, eg due to disk + full or some other reason, then both the on-disk index + and the internal state of the IndexWriter instance will + be consistent. However, the close will not be complete + even though part of it (flushing buffered documents) + may have succeeded, so the write lock will still be + held.

+ +

If you can correct the underlying cause (eg free up + some disk space) then you can call close() again. + Failing that, if you want to force the write lock to be + released (dangerous, because you may then lose buffered + docs in the IndexWriter instance) then you can do + something like this:

+ + + try { + writer.close(); + } finally { + if (IndexWriter.isLocked(directory)) { + IndexWriter.unlock(directory); + } + } + + + after which, you must be certain not to use the writer + instance anymore.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Closes the index with or without waiting for currently + running merges to finish. This is only meaningful when + using a MergeScheduler that runs merges in background + threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

NOTE: it is dangerous to always call + close(false), especially when IndexWriter is not open + for very long, because this can result in "merge + starvation" whereby long merges will never have a + chance to finish. This will cause too many segments in + your index over time.

+ +

+ if true, this call will block + until all merges complete; else, it will ask all + running merges to abort, wait until those merges have + finished (which should be at most a few seconds), and + then return. + +
+ + Closes the index with or without waiting for currently + running merges to finish. This is only meaningful when + using a MergeScheduler that runs merges in background + threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

NOTE: it is dangerous to always call + close(false), especially when IndexWriter is not open + for very long, because this can result in "merge + starvation" whereby long merges will never have a + chance to finish. This will cause too many segments in + your index over time.

+ +

+ if true, this call will block + until all merges complete; else, it will ask all + running merges to abort, wait until those merges have + finished (which should be at most a few seconds), and + then return. + +
+ + Tells the docWriter to close its currently open shared + doc stores (stored fields & vectors files). + Return value specifices whether new doc store files are compound or not. + + + + Returns total number of docs in this index, including + docs not yet flushed (still in the RAM buffer), + not counting deletions. + + + + + + Returns total number of docs in this index, including + docs not yet flushed (still in the RAM buffer), and + including deletions. NOTE: buffered deletions + are not counted. If you really need these to be + counted you should call first. + + + + + + The maximum number of terms that will be indexed for a single field in a + document. This limits the amount of memory required for indexing, so that + collections with very large files will not crash the indexing process by + running out of memory.

+ Note that this effectively truncates large documents, excluding from the + index terms that occur further in the document. If you know your source + documents are large, be sure to set this value high enough to accomodate + the expected size. If you set it to Integer.MAX_VALUE, then the only limit + is your memory, but you should anticipate an OutOfMemoryError.

+ By default, no more than 10,000 terms will be indexed for a field. + +

+ + +
+ + Adds a document to this index. If the document contains more than + terms for a given field, the remainder are + discarded. + +

Note that if an Exception is hit (for example disk full) + then the index will be consistent, but this document + may not have been added. Furthermore, it's possible + the index will have one segment in non-compound format + even when using compound files (when a merge has + partially succeeded).

+ +

This method periodically flushes pending documents + to the Directory (see above), and + also periodically triggers segment merges in the index + according to the in use.

+ +

Merges temporarily consume space in the + directory. The amount of space required is up to 1X the + size of all segments being merged, when no + readers/searchers are open against the index, and up to + 2X the size of all segments being merged when + readers/searchers are open against the index (see + for details). The sequence of + primitive merge operations performed is governed by the + merge policy. + +

Note that each term in the document can be no longer + than 16383 characters, otherwise an + IllegalArgumentException will be thrown.

+ +

Note that it's possible to create an invalid Unicode + string in java if a UTF16 surrogate pair is malformed. + In this case, the invalid characters are silently + replaced with the Unicode replacement character + U+FFFD.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Adds a document to this index, using the provided analyzer instead of the + value of . If the document contains more than + terms for a given field, the remainder are + discarded. + +

See for details on + index and IndexWriter state after an Exception, and + flushing/merging temporary free space requirements.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) containing term. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the documents to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) containing any of the + terms. All deletes are flushed at the same time. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ array of terms to identify the documents + to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) matching the provided query. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the query to identify the documents to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) matching any of the provided queries. + All deletes are flushed at the same time. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ array of queries to identify the documents + to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Updates a document by first deleting the document(s) + containing term and then adding the new + document. The delete and then add are atomic as seen + by a reader on the same index (flush may happen only after + the add). + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the document(s) to be + deleted + + the document to be added + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Updates a document by first deleting the document(s) + containing term and then adding the new + document. The delete and then add are atomic as seen + by a reader on the same index (flush may happen only after + the add). + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the document(s) to be + deleted + + the document to be added + + the analyzer to use when analyzing the document + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + If non-null, information about merges will be printed to this. + + + Requests an "optimize" operation on an index, priming the index + for the fastest available search. Traditionally this has meant + merging all segments into a single segment as is done in the + default merge policy, but individaul merge policies may implement + optimize in different ways. + +

It is recommended that this method be called upon completion of indexing. In + environments with frequent updates, optimize is best done during low volume times, if at all. + +

+

See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

+ +

Note that optimize requires 2X the index size free + space in your Directory (3X if you're using compound + file format). For example, if your index + size is 10 MB then you need 20 MB free for optimize to + complete (30 MB if you're using compound fiel format).

+ +

If some but not all readers re-open while an + optimize is underway, this will cause > 2X temporary + space to be consumed as those new readers will then + hold open the partially optimized segments at that + time. It is best not to re-open readers while optimize + is running.

+ +

The actual temporary usage could be much less than + these figures (it depends on many factors).

+ +

In general, once the optimize completes, the total size of the + index will be less than the size of the starting index. + It could be quite a bit smaller (if there were many + pending deletes) or just slightly smaller.

+ +

If an Exception is hit during optimize(), for example + due to disk full, the index will not be corrupt and no + documents will have been lost. However, it may have + been partially optimized (some segments were merged but + not all), and it's possible that one of the segments in + the index will be in non-compound format even when + using compound file format. This will occur when the + Exception is hit during conversion of the segment into + compound format.

+ +

This call will optimize those segments present in + the index when the call started. If other threads are + still adding documents and flushing segments, those + newly created segments will not be optimized unless you + call optimize again.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + +
+ + Optimize the index down to <= maxNumSegments. If + maxNumSegments==1 then this is the same as + . + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ maximum number of segments left + in the index after optimization finishes + +
+ + Just like , except you can specify + whether the call should block until the optimize + completes. This is only meaningful with a + that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Just like , except you can + specify whether the call should block until the + optimize completes. This is only meaningful with a + that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Returns true if any merges in pendingMerges or + runningMerges are optimization merges. + + + + Just like , except you can + specify whether the call should block until the + operation completes. This is only meaningful with a + that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expunges all deletes from the index. When an index + has many document deletions (or updates to existing + documents), it's best to either call optimize or + expungeDeletes to remove all unused data in the index + associated with the deleted documents. To see how + many deletions you have pending in your index, call + + This saves disk space and memory usage while + searching. expungeDeletes should be somewhat faster + than optimize since it does not insist on reducing the + index to a single segment (though, this depends on the + ; see .). Note that + this call does not first commit any buffered + documents, so you must do so yourself if necessary. + See also + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expert: asks the mergePolicy whether any merges are + necessary now and if so, runs the requested merges and + then iterate (test again if merges are needed) until no + more merges are returned by the mergePolicy. + + Explicit calls to maybeMerge() are usually not + necessary. The most common case is when merge policy + parameters have changed. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expert: the calls this method + to retrieve the next merge requested by the + MergePolicy + + + + Like getNextMerge() except only returns a merge if it's + external. + + + + Close the IndexWriter without committing + any changes that have occurred since the last commit + (or since it was opened, if commit hasn't been called). + This removes any temporary files that had been created, + after which the state of the index will be the same as + it was when commit() was last called or when this + writer was first opened. This also clears a previous + call to . + + IOException if there is a low-level IO error + + + Delete all documents in the index. + +

This method will drop all buffered documents and will + remove all segments from the index. This change will not be + visible until a has been called. This method + can be rolled back using .

+ +

NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).

+ +

NOTE: this method will forcefully abort all merges + in progress. If other threads are running + or any of the addIndexes methods, they + will receive s. +

+
+ + Wait for any currently outstanding merges to finish. + +

It is guaranteed that any merges started prior to calling this method + will have completed once this method completes.

+

+
+ + Merges all segments from an array of indexes into this + index. + +

This may be used to parallelize batch indexing. A large document + collection can be broken into sub-collections. Each sub-collection can be + indexed in parallel, on a different thread, process or machine. The + complete index can then be created by merging sub-collection indexes + with this method. + +

NOTE: the index in each Directory must not be + changed (opened by a writer) while this method is + running. This method does not acquire a write lock in + each input Directory, so it is up to the caller to + enforce this. + +

NOTE: while this is running, any attempts to + add or delete documents (with another thread) will be + paused until this method completes. + +

This method is transactional in how Exceptions are + handled: it does not commit a new segments_N file until + all indexes are added. This means if an Exception + occurs (for example disk full), then either no indexes + will have been added or they all will have been.

+ +

Note that this requires temporary free space in the + Directory up to 2X the sum of all input indexes + (including the starting index). If readers/searchers + are open against the starting index, then temporary + free space required will be higher by the size of the + starting index (see for details). +

+ +

Once this completes, the final size of the index + will be less than the sum of all input index sizes + (including the starting index). It could be quite a + bit smaller (if there were many pending deletes) or + just slightly smaller.

+ +

+ This requires this index not be among those to be added. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Merges the provided indexes into this index. +

After this completes, the index is optimized.

+

The provided IndexReaders are not closed.

+ +

NOTE: while this is running, any attempts to + add or delete documents (with another thread) will be + paused until this method completes. + +

See for + details on transactional semantics, temporary free + space required in the Directory, and non-CFS segments + on an Exception.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + + A hook for extending classes to execute operations after pending added and + deleted documents have been flushed to the Directory but before the change + is committed (new segments_N file written). + + + + + A hook for extending classes to execute operations before pending added and + deleted documents are flushed to the Directory. + + + + Expert: prepare for commit. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ + +
+ +

Expert: prepare for commit, specifying + commitUserData Map (String -> String). This does the + first phase of 2-phase commit. This method does all steps + necessary to commit changes since this writer was + opened: flushes pending added and deleted docs, syncs + the index files, writes most of next segments_N file. + After calling this you must call either + to finish the commit, or + to revert the commit and undo all changes + done since the writer was opened.

+ + You can also just call directly + without prepareCommit first in which case that method + will internally call prepareCommit. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ Opaque Map (String->String) + that's recorded into the segments file in the index, + and retrievable by . + Note that when IndexWriter commits itself, during , the + commitUserData is unchanged (just carried over from + the prior commit). If this is null then the previous + commitUserData is kept. Also, the commitUserData will + only "stick" if there are actually changes in the + index to commit. + +
+ +

Commits all pending changes (added & deleted + documents, optimizations, segment merges, added + indexes, etc.) to the index, and syncs all referenced + index files, such that a reader will see the changes + and the index updates will survive an OS or machine + crash or power loss. Note that this does not wait for + any running background merges to finish. This may be a + costly operation, so you should test the cost in your + application and do it only when really necessary.

+ +

Note that this operation calls Directory.sync on + the index files. That call should not return until the + file contents & metadata are on stable storage. For + FSDirectory, this calls the OS's fsync. But, beware: + some hardware devices may in fact cache writes even + during fsync, and return before the bits are actually + on stable storage, to give the appearance of faster + performance. If you have such a device, and it does + not have a battery backup (for example) then on power + loss it may still lose data. Lucene cannot guarantee + consistency on such devices.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ + + + +
+ + Commits all changes to the index, specifying a + commitUserData Map (String -> String). This just + calls (if you didn't + already call it) and then . + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Flush all in-memory buffered udpates (adds and deletes) + to the Directory. + + if true, we may merge segments (if + deletes or docs were flushed) if necessary + + if false we are allowed to keep + doc stores open to share with the next segment + + whether pending deletes should also + be flushed + + + + Expert: Return the total size of all index files currently cached in memory. + Useful for size management with flushRamDocs() + + + + Expert: Return the number of documents currently + buffered in RAM. + + + + Carefully merges deletes for the segments we just + merged. This is tricky because, although merging will + clear all deletes (compacts the documents), new + deletes may have been flushed to the segments since + the merge was started. This method "carries over" + such new deletes onto the newly merged segment, and + saves the resulting deletes file (incrementing the + delete generation for merge.info). If no deletes were + flushed, no new deletes file is saved. + + + + Merges the indicated segments, replacing them in the stack with a + single segment. + + + + Hook that's called when the specified merge is complete. + + + Checks whether this merge involves any segments + already participating in a merge. If not, this merge + is "registered", meaning we record that its segments + are now participating in a merge, and true is + returned. Else (the merge conflicts) false is + returned. + + + + Does initial setup for a merge, which is fast but holds + the synchronized lock on IndexWriter instance. + + + + Does fininishing for a merge, which is fast but holds + the synchronized lock on IndexWriter instance. + + + + Does the actual (time-consuming) work of the merge, + but without holding synchronized lock on IndexWriter + instance + + + + Blocks until all files in syncing are sync'd + + + Walk through all files referenced by the current + segmentInfos and ask the Directory to sync each file, + if it wasn't already. If that succeeds, then we + prepare a new segments_N file but do not fully commit + it. + + + + Returns true iff the index in the named directory is + currently locked. + + the directory to check for a lock + + IOException if there is a low-level IO error + + + Forcibly unlocks the index in the named directory. +

+ Caution: this should only be used by failure recovery code, + when it is known that no other process nor thread is in fact + currently accessing this index. +

+
+ + Casts current mergePolicy to LogMergePolicy, and throws + an exception if the mergePolicy is not a LogMergePolicy. + + + +

Gets or sets the current setting of whether newly flushed + segments will use the compound file format. Note that + this just returns the value previously set with + setUseCompoundFile(boolean), or the default value + (true). You cannot use this to query the status of + previously flushed segments.

+ +

Note that this method is a convenience method: it + just calls mergePolicy.getUseCompoundFile as long as + mergePolicy is an instance of . + Otherwise an IllegalArgumentException is thrown.

+ +

+
+ + Expert: Return the Similarity implementation used by this IndexWriter. + +

This defaults to the current value of . +

+
+ + Expert: Gets or sets the interval between indexed terms. Large values cause less + memory to be used by IndexReader, but slow random-access to terms. Small + values cause more memory to be used by an IndexReader, and speed + random-access to terms. + + This parameter determines the amount of computation required per query + term, regardless of the number of documents that contain that term. In + particular, it is the maximum number of other terms that must be + scanned before a term is located and its frequency and position information + may be processed. In a large index with user-entered query terms, query + processing time is likely to be dominated not by term lookup but rather + by the processing of frequency and positional data. In a small index + or when many uncommon query terms are generated (e.g., by wildcard + queries) term lookup may become a dominant cost. + + In particular, numUniqueTerms/interval terms are read into + memory by an IndexReader, and, on average, interval/2 terms + must be scanned for each random term access. + + + + + + + Expert: returns the current MergePolicy in use by this writer. + + + + + Expert: returns the current MergePolicy in use by this + writer. + + + + + +

Gets or sets the largest segment (measured by document + count) that may be merged with other segments. +

+ Small values (e.g., less than 10,000) are best for + interactive indexing, as this limits the length of + pauses while indexing to a few seconds. Larger values + are best for batched indexing and speedier + searches. +

+ The default value is . +

+ Note that this method is a convenience method: it + just calls mergePolicy.getMaxMergeDocs as long as + mergePolicy is an instance of . + Otherwise an IllegalArgumentException is thrown.

+ + The default merge policy () + also allows you to set this + limit by net size (in MB) of the segment, using + .

+

+ + +
+ + Gets or sets the termsIndexDivisor passed to any readers that + IndexWriter opens, for example when applying deletes + or creating a near-real-time reader in + . Default value is + . + + + Gets or sets the number of segments that are merged at + once and also controls the total number of segments + allowed to accumulate in the index. +

Determines how often segment indices are merged by addDocument(). With + smaller values, less RAM is used while indexing, and searches on + unoptimized indices are faster, but indexing speed is slower. With larger + values, more RAM is used during indexing, and while searches on unoptimized + indices are slower, indexing is faster. Thus larger values (> 10) are best + for batch index creation, and smaller values (< 10) for indices that are + interactively maintained. + +

Note that this method is a convenience method: it + just calls mergePolicy.setMergeFactor as long as + mergePolicy is an instance of . + Otherwise an IllegalArgumentException is thrown.

+ +

This must never be less than 2. The default value is 10. +

+
+ + Gets or sets the default info stream. + If non-null, this will be the default infoStream used + by a newly instantiated IndexWriter. + + + + + + Returns the current infoStream in use by this writer. + + + + + Returns true if verbosing is enabled (i.e., infoStream != null). + + + Gets or sets allowed timeout when acquiring the write lock. + + + Gets or sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in + milliseconds). + + + + Returns the Directory used by this index. + + + Returns the analyzer used by this index. + + + Gets or sets the merged segment warmer. See + . + + + + Holds shared SegmentReader instances. IndexWriter uses + SegmentReaders for 1) applying deletes, 2) doing + merges, 3) handing out a real-time reader. This pool + reuses instances of the SegmentReaders in all these + places if it is in "near real-time mode" (getReader() + has been called on this instance). + + + + Forcefully clear changes for the specifed segments, + and remove from the pool. This is called on succesful merge. + + + + Release the segment reader (i.e. decRef it and close if there + are no more references. + + + + IOException + + + Release the segment reader (i.e. decRef it and close if there + are no more references. + + + + + IOException + + + Remove all our references to readers, and commits + any pending changes. + + + + Commit all segment reader in the pool. + IOException + + + Returns a ref to a clone. NOTE: this clone is not + enrolled in the pool, so you should simply close() + it when you're done (ie, do not call release()). + + + + Obtain a SegmentReader from the readerPool. The reader + must be returned by calling + + + + + + + + IOException + + + Obtain a SegmentReader from the readerPool. The reader + must be returned by calling + + + + + + + + + + + + + IOException + + + Specifies maximum field length (in number of tokens/terms) in constructors. + overrides the value set by + the constructor. + + + + Private type-safe-enum-pattern constructor. + + + instance name + + maximum field length + + + + Public constructor to allow users to specify the maximum field size limit. + + + The maximum field length + + + + Sets the maximum field length to . + + + Sets the maximum field length to + + + + + + If has been called (ie, this writer + is in near real-time mode), then after a merge + completes, this class can be invoked to warm the + reader on the newly merged segment, before the merge + commits. This is not required for near real-time + search, but will reduce search latency on opening a + new near real-time reader after a merge completes. + +

NOTE: This API is experimental and might + change in incompatible ways in the next release.

+ +

NOTE: warm is called before any deletes have + been carried over to the merged segment. +

+
+ + Add a new thread + + + Abort (called after hitting AbortException) + + + Flush a new segment + + + Close doc stores + + + Attempt to free RAM, returning true if any RAM was + freed + + + + This implementation that + keeps only the most recent commit and immediately removes + all prior commits after a new commit is done. This is + the default deletion policy. + + + + Deletes all commits except the most recent one. + + + Deletes all commits except the most recent one. + + + This is a that measures size of a + segment as the total byte size of the segment's files. + + + +

This class implements a that tries + to merge segments into levels of exponentially + increasing size, where each level has fewer segments than + the value of the merge factor. Whenever extra segments + (beyond the merge factor upper bound) are encountered, + all segments within the level are merged. You can get or + set the merge factor using and + respectively.

+ +

This class is abstract and requires a subclass to + define the method which specifies how a + segment's size is determined. + is one subclass that measures size by document count in + the segment. is another + subclass that measures size as the total byte size of the + file(s) for the segment.

+

+
+ +

Expert: a MergePolicy determines the sequence of + primitive merge operations to be used for overall merge + and optimize operations.

+ +

Whenever the segments in an index have been altered by + , either the addition of a newly + flushed segment, addition of many segments from + addIndexes* calls, or a previous merge that may now need + to cascade, invokes + to give the MergePolicy a chance to pick + merges that are now required. This method returns a + instance describing the set of + merges that should be done, or null if no merges are + necessary. When IndexWriter.optimize is called, it calls + and the MergePolicy should + then return the necessary merges.

+ +

Note that the policy can return more than one merge at + a time. In this case, if the writer is using + , the merges will be run + sequentially but if it is using + they will be run concurrently.

+ +

The default MergePolicy is + .

+ +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+ +

NOTE: This class typically requires access to + package-private APIs (e.g. SegmentInfos) to do its job; + if you implement your own MergePolicy, you'll need to put + it in package Lucene.Net.Index in order to use + these APIs. +

+
+ + Determine what set of merge operations are now necessary on the index. + calls this whenever there is a change to the segments. + This call is always synchronized on the instance so + only one thread at a time will call this method. + + + the total set of segments in the index + + + + Determine what set of merge operations is necessary in order to optimize + the index. calls this when its + method is called. This call is always + synchronized on the instance so only one thread at a + time will call this method. + + + the total set of segments in the index + + requested maximum number of segments in the index (currently this + is always 1) + + contains the specific SegmentInfo instances that must be merged + away. This may be a subset of all SegmentInfos. + + + + Determine what set of merge operations is necessary in order to expunge all + deletes from the index. + + + the total set of segments in the index + + + + Release all resources for the policy. + + + Release all resources for the policy. + + + Returns true if a newly flushed (not from merge) + segment should use the compound file format. + + + + Returns true if the doc store files should use the + compound file format. + + + + OneMerge provides the information necessary to perform + an individual primitive merge operation, resulting in + a single new segment. The merge spec includes the + subset of segments to be merged as well as whether the + new segment should use the compound file format. + + + + Record that an exception occurred while executing + this merge + + + + Retrieve previous exception set by + . + + + + Mark this merge as aborted. If this is called + before the merge is committed then the merge will + not be committed. + + + + Returns true if this merge was aborted. + + + A MergeSpecification instance provides the information + necessary to perform multiple merges. It simply + contains a list of instances. + + + + The subset of segments to be included in the primitive merge. + + + Exception thrown if there are any problems while + executing a merge. + + + + Returns the of the index that hit + the exception. + + + + Defines the allowed range of log(size) for each + level. A level is computed by taking the max segment + log size, minus LEVEL_LOG_SPAN, and finding all + segments falling within that range. + + + + Default merge factor, which is how many segments are + merged at a time + + + + Default maximum segment size. A segment of this size + + + + + Default noCFSRatio. If a merge's size is >= 10% of + the index, then we disable compound file for it. + See + + + + Gets or sets whether compound file format should be used for + newly flushed and newly merged segments. + + + + Sets whether compound file format should be used for + newly flushed and newly merged doc store + segment files (term vectors and stored fields). + + + + Returns true if newly flushed and newly merge doc + store segment files (term vectors and stored fields) + + + + + + Returns true if this single info is optimized (has no + pending norms or deletes, is in the same dir as the + writer, and matches the current compound file setting + + + + Returns the merges necessary to optimize the index. + This merge policy defines "optimized" to mean only one + segment in the index, where that segment has no + deletions pending nor separate norms, and it is in + compound file format if the current useCompoundFile + setting is true. This method returns multiple merges + (mergeFactor at a time) so the + in use may make use of concurrency. + + + + Finds merges necessary to expunge all deletes from the + index. We simply merge adjacent segments that have + deletes, up to mergeFactor at a time. + + + + Checks if any merges are now necessary and returns a + if so. A merge + is necessary when there are more than + segments at a given level. When + multiple levels have too many segments, this method + will return multiple merges, allowing the + to use concurrency. + + + + Gets or sets how often segment indices are merged by + addDocument(). With smaller values, less RAM is used + while indexing, and searches on unoptimized indices are + faster, but indexing speed is slower. With larger + values, more RAM is used during indexing, and while + searches on unoptimized indices are slower, indexing is + faster. Thus larger values (> 10) are best for batch + index creation, and smaller values (< 10) for indices + that are interactively maintained. + + + + Gets or sets whether the segment size should be calibrated by + the number of deletes when choosing segments for merge. + + + + + Gets or sets the largest segment (measured by document + count) that may be merged with other segments. +

Determines the largest segment (measured by + document count) that may be merged with other segments. + Small values (e.g., less than 10,000) are best for + interactive indexing, as this limits the length of + pauses while indexing to a few seconds. Larger values + are best for batched indexing and speedier + searches.

+ +

The default value is .

+ +

The default merge policy () + also allows you to set this + limit by net size (in MB) of the segment, using + .

+

+
+ + + + + + Default maximum segment size. A segment of this size + + + + +

Gets or sets the largest segment (measured by total + byte size of the segment's files, in MB) that may be + merged with other segments. Small values (e.g., less + than 50 MB) are best for interactive indexing, as this + limits the length of pauses while indexing to a few + seconds. Larger values are best for batched indexing + and speedier searches.

+ +

Note that is also + used to check whether a segment is too large for + merging (it's either or).

+

+
+ + Gets or sets the minimum size for the lowest level segments. + Any segments below this size are considered to be on + the same level (even if they vary drastically in size) + and will be merged whenever there are mergeFactor of + them. This effectively truncates the "long tail" of + small segments that would otherwise be created into a + single level. If you set this too large, it could + greatly increase the merging cost during indexing (if + you flush many small segments). + + + + This is a that measures size of a + segment as the number of documents (not taking deletions + into account). + + + + + + + + Gets or sets the minimum size for the lowest level segments. + Any segments below this size are considered to be on + the same level (even if they vary drastically in size) + and will be merged whenever there are mergeFactor of + them. This effectively truncates the "long tail" of + small segments that would otherwise be created into a + single level. If you set this too large, it could + greatly increase the merging cost during indexing (if + you flush many small segments). + + + + Remaps docIDs after a merge has completed, where the + merged segments had at least one deletion. This is used + to renumber the buffered deletes in IndexWriter when a + merge of segments with deletions commits. + + + + Allows you to iterate over the for multiple s as + a single . + + + + + Creates a new MultipleTermPositions instance. + + + + + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + + false + + + A PriorityQueue maintains a partial ordering of its elements such that the + least element can always be found in constant time. Put()'s and pop()'s + require log(size) time. + +

NOTE: This class pre-allocates a full array of + length maxSize+1, in . + +

+
+ + Determines the ordering of objects in this priority queue. Subclasses + must define this one method. + + + + Subclass constructors must call this. + + + + Adds an Object to a PriorityQueue in log(size) time. If one tries to add + more objects than maxSize from initialize an + is thrown. + + the new 'top' element in the queue. + + + + Adds an Object to a PriorityQueue in log(size) time. + It returns the object (if any) that was + dropped off the heap because it was full. This can be + the given parameter (in case it is smaller than the + full heap's minimum, and couldn't be added), or another + object that was previously the smallest value in the + heap and now has been replaced by a larger one, or null + if the queue wasn't yet full with maxSize elements. + + + + Returns the least element of the PriorityQueue in constant time. + + + + Removes and returns the least element of the + PriorityQueue in log(size) time. + + + + Should be called when the Object at top changes values. + Still log(n) worst case, but it's at least twice as fast to + + pq.top().change(); + pq.updateTop(); + + instead of + + o = pq.pop(); + o.change(); + pq.push(o); + + + the new 'top' element. + + + Returns the number of elements currently stored in the PriorityQueue. + + + Removes all entries from the PriorityQueue. + + + This method can be overridden by extending classes to return a sentinel + object which will be used by to fill the queue, so + that the code which uses that queue can always assume it's full and only + change the top without attempting to insert any new object.
+ + Those sentinel values should always compare worse than any non-sentinel + value (i.e., should always favor the + non-sentinel values).
+ + By default, this method returns false, which means the queue will not be + filled with sentinel values. Otherwise, the value returned will be used to + pre-populate the queue. Adds sentinel values to the queue.
+ + If this method is extended to return a non-null value, then the following + usage pattern is recommended: + + + // extends getSentinelObject() to return a non-null value. + PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits); + // save the 'top' element, which is guaranteed to not be null. + MyObject pqTop = pq.top(); + <...> + // now in order to add a new element, which is 'better' than top (after + // you've verified it is better), it is as simple as: + pqTop.change(). + pqTop = pq.updateTop(); + + + NOTE: if this method returns a non-null value, it will be called by + times, relying on a new object to + be returned and will not check if it's null again. Therefore you should + ensure any call to this method creates a new instance and behaves + consistently, e.g., it cannot return null if it previously returned + non-null. + +
+ the sentinel object to use to pre-populate the queue, or null if sentinel objects are not supported. +
+ + An IndexReader which reads multiple indexes, appending + their content. + + + +

Construct a MultiReader aggregating the named set of (sub)readers. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the subreaders.

+

Note that all subreaders are closed if this Multireader is closed.

+

+ set of (sub)readers + + IOException +
+ +

Construct a MultiReader aggregating the named set of (sub)readers. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the subreaders.

+

+ indicates whether the subreaders should be closed + when this MultiReader is closed + + set of (sub)readers + + IOException +
+ + Tries to reopen the subreaders. +
+ If one or more subreaders could be re-opened (i. e. subReader.reopen() + returned a new instance != subReader), then a new MultiReader instance + is returned, otherwise this instance is returned. +

+ A re-opened instance might share one or more subreaders with the old + instance. Index modification operations result in undefined behavior + when performed before the old instance is closed. + (see ). +

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. + +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Clones the subreaders. + (see ). +
+

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. +

+
+ + If clone is true then we clone each of the subreaders + + + New IndexReader, or same one (this) if + reopen/clone is not necessary + + CorruptIndexException + IOException + + + Checks recursively if all subreaders are up to date. + + + Not implemented. + UnsupportedOperationException + + + Writes norms. Each thread X field accumulates the norms + for the doc/fields it saw, then the flush method below + merges all of these together into a single _X.nrm file. + + + + Produce _X.nrm if any document had a field with norms + not disabled + + + + Taps into DocInverter, as an InvertedDocEndConsumer, + which is called at the end of inverting each field. We + just look at the length for the field (docState.length) + and record the norm. + + + + An IndexReader which reads multiple, parallel indexes. Each index added + must have the same number of documents, but typically each contains + different fields. Each document contains the union of the fields of all + documents with the same document number. When searching, matches for a + query term are from the first index added that has the field. + +

This is useful, e.g., with collections that have large fields which + change rarely and small fields that change more frequently. The smaller + fields may be re-indexed in a new index and both indexes may be searched + together. + +

Warning: It is up to you to make sure all indexes + are created and modified the same way. For example, if you add + documents to one index, you need to add the same documents in the + same order to the other indexes. Failure to do so will result in + undefined behavior. +

+
+ + Construct a ParallelReader. +

Note that all subreaders are closed if this ParallelReader is closed.

+

+
+ + Construct a ParallelReader. + indicates whether the subreaders should be closed + when this ParallelReader is closed + + + + Add an IndexReader. + IOException if there is a low-level IO error + + + Add an IndexReader whose stored fields will not be returned. This can + accellerate search when stored fields are only needed from a subset of + the IndexReaders. + + + IllegalArgumentException if not all indexes contain the same number + of documents + + IllegalArgumentException if not all indexes have the same value + of + + IOException if there is a low-level IO error + + + Tries to reopen the subreaders. +
+ If one or more subreaders could be re-opened (i. e. subReader.reopen() + returned a new instance != subReader), then a new ParallelReader instance + is returned, otherwise this instance is returned. +

+ A re-opened instance might share one or more subreaders with the old + instance. Index modification operations result in undefined behavior + when performed before the old instance is closed. + (see ). +

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. + +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Checks recursively if all subreaders are up to date. + + + Checks recursively if all subindexes are optimized + + + Not implemented. + UnsupportedOperationException + + + A Payload is metadata that can be stored together with each occurrence + of a term. This metadata is stored inline in the posting list of the + specific term. +

+ To store payloads in the index a has to be used that + produces payload data. +

+ Use and + to retrieve the payloads from the index.
+ +

+
+ + the byte array containing the payload data + + + the offset within the byte array + + + the length of the payload data + + + Creates an empty payload and does not allocate a byte array. + + + Creates a new payload with the the given array as data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + the data of this payload + + + + Creates a new payload with the the given array as data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + the data of this payload + + the offset in the data byte array + + the length of the data + + + + Sets this payloads data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + + Gets or sets a reference to the underlying byte array + that holds this payloads data. Data is not copied. + + + + Gets or sets a reference to the underlying byte array + that holds this payloads data. Data is not copied. + + + + Returns the byte at the given index. + + + Allocates a new byte array, copies the payload data into it and returns it. + + + Copies the payload data to a byte array. + + + the target byte array + + the offset in the target byte array + + + + Clones this payload by creating a copy of the underlying + byte array. + + + + Returns the offset in the underlying byte array + + + Returns the length of the payload data. + + + For each Field, store position by position information. It ignores frequency information +

+ This is not thread-safe. +

+
+ + A Map of Integer and TVPositionInfo + + + Callback for the TermVectorReader. + + + + + + + + + + + Callback mechanism used by the TermVectorReader + The field being read + + The number of terms in the vector + + Whether offsets are available + + Whether positions are available + + + + Never ignores positions. This mapper doesn't make much sense unless there are positions + false + + + Get the mapping between fields and terms, sorted by the comparator + + + A map between field names and a Map. The sub-Map key is the position as the integer, the value is <see cref="Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo" />. + + + Container for a term at a position + + + + The position of the term + + + + Note, there may be multiple terms at the same position + A List of Strings + + + + Parallel list (to ) of TermVectorOffsetInfo objects. + There may be multiple entries since there may be multiple terms at a position + A List of TermVectorOffsetInfo objects, if offsets are store. + + + + $Id + +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Clones the norm bytes. May be overridden by subclasses. New and experimental. + Byte array to clone + + New BitVector + + + + Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental. + BitVector to clone + + New BitVector + + + + + + + + Read norms into a pre-allocated array. + + + Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. + TermVectorsReader + + + + Return a term frequency vector for the specified document and field. The + vector returned contains term numbers and frequencies for all terms in + the specified field of this document, if the field had storeTermVector + flag set. If the flag was not set, the method returns null. + + IOException + + + Return an array of term frequency vectors for the specified document. + The array contains a vector for each vectorized field in the document. + Each vector vector contains term numbers and frequencies for all terms + in a given vectorized field. + If no such fields existed, the method returns null. + + IOException + + + Returns the directory this index resides in. + + + Lotsa tests did hacks like:
+ SegmentReader reader = (SegmentReader) IndexReader.open(dir);
+ They broke. This method serves as a hack to keep hacks working + We do it with R/W access for the tests (BW compatibility) +
+
+ + Return the name of the segment this reader is reading. + + + Return the SegmentInfo of the segment this reader is reading. + + + Sets the initial value + + + Java's builtin ThreadLocal has a serious flaw: + it can take an arbitrarily long amount of time to + dereference the things you had stored in it, even once the + ThreadLocal instance itself is no longer referenced. + This is because there is single, master map stored for + each thread, which all ThreadLocals share, and that + master map only periodically purges "stale" entries. + + While not technically a memory leak, because eventually + the memory will be reclaimed, it can take a long time + and you can easily hit OutOfMemoryError because from the + GC's standpoint the stale entries are not reclaimaible. + + This class works around that, by only enrolling + WeakReference values into the ThreadLocal, and + separately holding a hard reference to each stored + value. When you call , these hard + references are cleared and then GC is freely able to + reclaim space by objects stored in it. + + + + + Byte[] referencing is used because a new norm object needs + to be created for each clone, and the byte array is all + that is needed for sharing between cloned readers. The + current norm referencing is for sharing between readers + whereas the byte[] referencing is for copy on write which + is independent of reader references (i.e. incRef, decRef). + + + + Used by DocumentsWriter to implemented a StringReader + that can be reset to a new string; we use this when + tokenizing the string value from a Field. + + + + Information about a segment such as it's name, directory, and files related + to the segment. + + *

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + Copy everything from src SegmentInfo into our instance. + + + Construct a new SegmentInfo instance by reading a + previously saved SegmentInfo from input. + + + directory to load from + + format of the segments info file + + input handle to read segment info from + + + + Returns total size in bytes of all of files used by + this segment. + + + + Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX). + + + the field index to check + + + + Returns true if any fields in this segment have separate norms. + + + Increment the generation count for the norms file for + this field. + + + field whose norm file will be rewritten + + + + Get the file name for the norms file for this field. + + + field index + + + + Returns true if this segment is stored as a compound + file; else, false. + + + + Returns true if this segment is stored as a compound + file; else, false. + + + + Save this segment's info. + + + Used for debugging + + + We consider another SegmentInfo instance equal if it + has the same dir and same name. + + + + The SegmentMerger class combines two or more Segments, represented by an IndexReader (, + into a single Segment. After adding the appropriate readers, call the merge method to combine the + segments. +

+ If the compoundFile flag is set, then the segments will be merged into a compound file. + + +

+ + + + +
+ + Maximum number of contiguous documents to bulk-copy + when merging stored fields + + + + norms header placeholder + + + This ctor used only by test code. + + + The Directory to merge the other segments into + + The name of the new segment + + + + Add an IndexReader to the collection of readers that are to be merged + + + + + + The index of the reader to return + + The ith reader to be merged + + + + Merges the readers specified by the method into the directory passed to the constructor + The number of documents that were merged + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Merges the readers specified by the method + into the directory passed to the constructor. + + if false, we will not merge the + stored fields nor vectors files + + The number of documents that were merged + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + close all IndexReaders that have been added. + Should not be called before merge(). + + IOException + + + + The number of documents in all of the readers + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Merge the TermVectors from each of the segments into the new one. + IOException + + + Process postings from multiple segments all positioned on the + same term. Writes out merged entries into freqOutput and + the proxOutput streams. + + + array of segments + + number of cells in the array actually occupied + + number of documents across all segments where this term was found + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Records the fact that roughly units amount of work + have been done since this method was last called. + When adding time-consuming code into SegmentMerger, + you should test different values for units to ensure + that the time in between calls to merge.checkAborted + is up to ~ 1 second. + + + + Increments the enumeration to the next element. True if one exists. + + + Optimized scan, without allocating new terms. + Return number of invocations to next(). + + + + Returns the previous Term enumerated. Initially null. + + + Returns the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Sets the argument to the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Returns the docFreq from the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Closes the enumeration to further activity, freeing resources. + + + Returns the current Term in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Called by super.skipTo(). + + + Provides access to stored term vector of + a document field. The vector consists of the name of the field, an array of the terms tha occur in the field of the + and a parallel array of frequencies. Thus, getTermFrequencies()[5] corresponds with the + frequency of getTerms()[5], assuming there are at least 5 terms in the Document. + + + + An Array of term texts in ascending order. + + + + Array of term frequencies. Locations of the array correspond one to one + to the terms in the array obtained from getTerms + method. Each location in the array contains the number of times this + term occurs in the document or the document field. + + + + Return an index in the term numbers array returned from + getTerms at which the term with the specified + term appears. If this term does not appear in the array, + return -1. + + + + Just like indexOf(int) but searches for a number of terms + at the same time. Returns an array that has the same size as the number + of terms searched for, each slot containing the result of searching for + that term number. + + + array containing terms to look for + + index in the array where the list of terms starts + + the number of terms in the list + + + + The name. + The name of the field this vector is associated with. + + + The number of terms in the term vector. + + + + The number of the field this vector is associated with + + + Extends TermFreqVector to provide additional information about + positions in which each of the terms is found. A TermPositionVector not necessarily + contains both positions and offsets, but at least one of these arrays exists. + + + + Returns an array of positions in which the term is found. + Terms are identified by the index at which its number appears in the + term String array obtained from the indexOf method. + May return null if positions have not been stored. + + + + Returns an array of TermVectorOffsetInfo in which the term is found. + May return null if offsets have not been stored. + + + + + + The position in the array to get the offsets from + + An array of TermVectorOffsetInfo objects or the empty list + + + + Returns an array of TermVectorOffsetInfo in which the term is found. + + + The position in the array to get the offsets from + + An array of TermVectorOffsetInfo objects or the empty list + + + + + + Returns an array of positions in which the term is found. + Terms are identified by the index at which its number appears in the + term String array obtained from the indexOf method. + + + + A that simply does each merge + sequentially, using the current thread. + + + + Just do the merges in sequence. We do this + "synchronized" so that even if the application is using + multiple threads, only one merge may run at a time. + + + + A that wraps around any other + and adds the ability to hold and + later release a single "snapshot" of an index. While + the snapshot is held, the will not + remove any files associated with it even if the index is + otherwise being actively, arbitrarily changed. Because + we wrap another arbitrary , this + gives you the freedom to continue using whatever + you would normally want to use with your + index. Note that you can re-use a single instance of + SnapshotDeletionPolicy across multiple writers as long + as they are against the same index Directory. Any + snapshot held when a writer is closed will "survive" + when the next writer is opened. + +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Take a snapshot of the most recent commit to the + index. You must call release() to free this snapshot. + Note that while the snapshot is held, the files it + references will not be deleted, which will consume + additional disk space in your index. If you take a + snapshot at a particularly bad time (say just before + you call optimize()) then in the worst case this could + consume an extra 1X of your total index size, until + you release the snapshot. + + + + Release the currently held snapshot. + + + Store a sorted collection of s. Collects all term information + into a single, SortedSet. +
+ NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not + know what Fields they correlate with. +
+ This is not thread-safe +
+
+ + Stand-in name for the field in . + + + + A Comparator for sorting s + + + + + The term to map + + The frequency of the term + + Offset information, may be null + + Position information, may be null + + + + The TermVectorEntrySet. A SortedSet of objects. Sort is by the comparator passed into the constructor. +
+ This set will be empty until after the mapping process takes place. + +
+ The SortedSet of <see cref="TermVectorEntry" />. +
+ + This exception is thrown when an + tries to make changes to the index (via + , + or ) + but changes have already been committed to the index + since this reader was instantiated. When this happens + you must open a new reader on the current index to make + the changes. + + + + This is a DocFieldConsumer that writes stored fields. + + + Fills in any hole in the docIDs + + + A Term represents a word from text. This is the unit of search. It is + composed of two elements, the text of the word, as a string, and the name of + the field that the text occured in, an interned string. + Note that terms may represent more than words from text fields, but also + things like dates, email addresses, urls, etc. + + + + Constructs a Term with the given field and text. +

Note that a null field or null text value results in undefined + behavior for most Lucene APIs that accept a Term parameter. +

+
+ + Constructs a Term with the given field and empty text. + This serves two purposes: 1) reuse of a Term with the same field. + 2) pattern for a query. + + + + + + + Optimized construction of new Terms by reusing same field as this Term + - avoids field.intern() overhead + + The text of the new term (field is implicitly same as this Term instance) + + A new Term + + + + Compares two terms, returning a negative integer if this + term belongs before the argument, zero if this term is equal to the + argument, and a positive integer if this term belongs after the argument. + The ordering of terms is first by field, then by text. + + + + Returns the field of this term, an interned string. The field indicates + the part of a document which this term came from. + + + + Returns the text of this term. In the case of words, this is simply the + text of the word. In the case of dates and other types, this is an + encoding of the object as a string. + + + + Call this if the IndexInput passed to + stores terms in the "modified UTF8" (pre LUCENE-510) + format. + + + + A TermInfo is the record of information stored for a term. + + + The number of documents which contain the term. + + + This stores a monotonically increasing set of <Term, TermInfo> pairs in a + Directory. Pairs are accessed either by Term or by ordinal position the + set. + + + + Returns the number of term/value pairs in the set. + + + Returns the offset of the greatest index entry which is less than or equal to term. + + + Returns the TermInfo for a Term in the set, or null. + + + Returns the TermInfo for a Term in the set, or null. + + + Returns the position of a Term in the set or -1. + + + Returns an enumeration of all the Terms and TermInfos in the set. + + + Returns an enumeration of terms starting at or after the named term. + + + Per-thread resources managed by ThreadLocal + + + This stores a monotonically increasing set of <Term, TermInfo> pairs in a + Directory. A TermInfos can be written once, in order. + + + + The file format version, a negative number. + + + Expert: The fraction of terms in the "dictionary" which should be stored + in RAM. Smaller values use more memory, but make searching slightly + faster, while larger values use less memory and make searching slightly + slower. Searching is typically not dominated by dictionary lookup, so + tweaking this is rarely useful. + + + + Expert: The fraction of entries stored in skip tables, + used to accellerate . Larger values result in + smaller indexes, greater acceleration, but fewer accelerable cases, while + smaller values result in bigger indexes, less acceleration and more + accelerable cases. More detailed experiments would be useful here. + + + + Expert: The maximum number of skip levels. Smaller values result in + slightly smaller indexes, but slower skipping in big posting lists. + + + + Adds a new <fieldNumber, termBytes>, TermInfo> pair to the set. + Term must be lexicographically greater than all previous Terms added. + TermInfo pointers must be positive and greater than all previous. + + + + Called to complete TermInfos creation. + + + This class implements , which + is passed each token produced by the analyzer on each + field. It stores these tokens in a hash table, and + allocates separate byte streams per token. Consumers of + this class, eg and + , write their own byte streams + under each term. + + + + Collapse the hash table & sort in-place. + + + Compares term text for two Posting instance and + returns -1 if p1 < p2; 1 if p1 > p2; else 0. + + + + Test whether the text for current RawPostingList p equals + current tokenText. + + + + Called when postings hash is too small (> 50% + occupied) or too large (< 20% occupied). + + + + Convenience class for holding TermVector information. + + + Compares s first by frequency and then by + the term (case-sensitive) + + + + + + The TermVectorOffsetInfo class holds information pertaining to a Term in a 's + offset information. This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the + original content). + + + + Convenience declaration when creating a that stores only position information. + + + The accessor for the ending offset for the term + The offset + + + The accessor for the starting offset of the term. + + + The offset + + + Retrieve the length (in bytes) of the tvd and tvf + entries for the next numDocs starting with + startDocID. This is used for bulk copying when + merging segments, if the field numbers are + congruent. Once this returns, the tvf & tvd streams + are seeked to the startDocID. + + + + + The number of documents in the reader + + + + Retrieve the term vector for the given document and field + The document number to retrieve the vector for + + The field within the document to retrieve + + The TermFreqVector for the document and field or null if there is no termVector for this field. + + IOException if there is an error reading the term vector files + + + Return all term vectors stored for this document or null if the could not be read in. + + + The document number to retrieve the vector for + + All term frequency vectors + + IOException if there is an error reading the term vector files + + + + The field to read in + + The pointer within the tvf file where we should start reading + + The mapper used to map the TermVector + + IOException + + + Models the existing parallel array structure + + + Construct the vector + The based on the mappings. + + + + Fills in no-term-vectors for all docs we haven't seen + since the last doc that had term vectors. + + + + Called once per field per document if term vectors + are enabled, to write the vectors to + RAMOutputStream, which is then quickly flushed to + the real term vectors files in the Directory. + + + + Add a complete document specified by all its term vectors. If document has no + term vectors, add value for tvx. + + + + + IOException + + + Do a bulk copy of numDocs documents from reader to our + streams. This is used to expedite merging, if the + field numbers are congruent. + + + + Close all streams. + + + Lucene's package information, including version. * + + + Message Interface for a lazy loading. + For Native Language Support (NLS), system of software internationalization. + + + + Default implementation of Message interface. + For Native Language Support (NLS), system of software internationalization. + + + + MessageBundles classes extend this class, to implement a bundle. + + For Native Language Support (NLS), system of software internationalization. + + This interface is similar to the NLS class in eclipse.osgi.util.NLS class - + initializeMessages() method resets the values of all static strings, should + only be called by classes that extend from NLS (see TestMessages.java for + reference) - performs validation of all message in a bundle, at class load + time - performs per message validation at runtime - see NLSTest.java for + usage reference + + MessageBundle classes may subclass this type. + + + + Initialize a given class with the message bundle Keys Should be called from + a class that extends NLS in a static block at class load time. + + + Property file with that contains the message bundle + + where constants will reside + + + + + + + + - Message Key + + + + + Performs the priviliged action. + + A value that may represent the result of the action. + + + Interface that exceptions should implement to support lazy loading of messages. + + For Native Language Support (NLS), system of software internationalization. + + This Interface should be implemented by all exceptions that require + translation + + + + + a instance of a class that implements the Message interface + + + This interface describes a character stream that maintains line and + column number positions of the characters. It also has the capability + to backup the stream to some extent. An implementation of this + interface is used in the TokenManager implementation generated by + JavaCCParser. + + All the methods except backup can be implemented in any fashion. backup + needs to be implemented correctly for the correct operation of the lexer. + Rest of the methods are all used to get information like line number, + column number and the String that constitutes a token and are not used + by the lexer. Hence their implementation won't affect the generated lexer's + operation. + + + + Returns the next character from the selected input. The method + of selecting the input is the responsibility of the class + implementing this interface. Can throw any java.io.IOException. + + + + Backs up the input stream by amount steps. Lexer calls this method if it + had already read some characters, but could not use them to match a + (longer) token. So, they will be used again as the prefix of the next + token and it is the implemetation's responsibility to do this right. + + + + Returns the next character that marks the beginning of the next token. + All characters must remain in the buffer between two successive calls + to this method to implement backup correctly. + + + + Returns an array of characters that make up the suffix of length 'len' for + the currently matched token. This is used to build up the matched string + for use in actions in the case of MORE. A simple and inefficient + implementation of this is as follows : + + { + String t = GetImage(); + return t.substring(t.length() - len, t.length()).toCharArray(); + } + + + + The lexer calls this function to indicate that it is done with the stream + and hence implementations can free any resources held by this class. + Again, the body of this function can be just empty and it will not + affect the lexer's operation. + + + + Returns the column position of the character last read. + + + + + + + Returns the line number of the character last read. + + + + + + + Returns the column number of the last character for current token (being + matched after the last call to BeginTOken). + + + + Returns the line number of the last character for current token (being + matched after the last call to BeginTOken). + + + + Returns the column number of the first character for current token (being + matched after the last call to BeginTOken). + + + + Returns the line number of the first character for current token (being + matched after the last call to BeginTOken). + + + + Returns a string made up of characters from the marked token beginning + to the current buffer position. Implementations have the choice of returning + anything that they want to. For example, for efficiency, one might decide + to just return null, which is a valid implementation. + + + + An efficient implementation of JavaCC's CharStream interface.

Note that + this does not do line-number counting, but instead keeps track of the + character position of the token in the input, as required by Lucene's + API. + +

+
+ + Constructs from a Reader. + + + A QueryParser which constructs queries to search multiple fields. + + + $Revision: 829231 $ + + + + This class is generated by JavaCC. The most important method is + . + + The syntax for query strings is as follows: + A Query is a series of clauses. + A clause may be prefixed by: + + a plus (+) or a minus (-) sign, indicating + that the clause is required or prohibited respectively; or + a term followed by a colon, indicating the field to be searched. + This enables one to construct queries which search multiple fields. + + + A clause may be either: + + a term, indicating all the documents that contain this term; or + a nested query, enclosed in parentheses. Note that this may be used + with a +/- prefix to require any of a set of + terms. + + + Thus, in BNF, the query grammar is: + + Query ::= ( Clause )* + Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) + + +

+ Examples of appropriately formatted queries can be found in the query syntax + documentation. +

+ +

+ In s, QueryParser tries to detect date values, e.g. + date:[6/1/2005 TO 6/4/2005] produces a range query that searches + for "date" fields between 2005-06-01 and 2005-06-04. Note that the format + of the accepted input depends on the . + By default a date is converted into a search term using the deprecated + for compatibility reasons. + To use the new to convert dates, a + has to be set. +

+

+ The date resolution that shall be used for RangeQueries can be set + using + or . The former + sets the default date resolution for all fields, whereas the latter can + be used to set field specific date resolutions. Field specific date + resolutions take, if set, precedence over the default date resolution. +

+

+ If you use neither nor in your + index, you can create your own + query parser that inherits QueryParser and overwrites + to + use a different method for date conversion. +

+ +

Note that QueryParser is not thread-safe.

+ +

NOTE: there is a new QueryParser in contrib, which matches + the same syntax as this class, but is more modular, + enabling substantial customization to how a query is created. + +

NOTE: there is a new QueryParser in contrib, which matches + the same syntax as this class, but is more modular, + enabling substantial customization to how a query is created. + NOTE: You must specify the required compatibility when + creating QueryParser: + + As of 2.9, is true by default. + +

+
+ + Token literal values and constants. + Generated by org.javacc.parser.OtherFilesGen#start() + + + + End of File. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + Lexical state. + + + Lexical state. + + + Lexical state. + + + Lexical state. + + + Literal token values. + + + Alternative form of QueryParser.Operator.AND + + + Alternative form of QueryParser.Operator.OR + + + The actual operator that parser uses to combine query terms + + + Parses a query string, returning a {@link Lucene.Net.Search.Query}. + the query string to be parsed. + + ParseException if the parsing fails + + + Sets the default date resolution used by RangeQueries for fields for which no + specific date resolutions has been set. Field specific resolutions can be set + with {@link #SetDateResolution(String, DateTools.Resolution)}. + + + the default date resolution to set + + + + Sets the date resolution used by RangeQueries for a specific field. + + + field for which the date resolution is to be set + + date resolution to set + + + + Returns the date resolution that is used by RangeQueries for the given field. + Returns null, if no default or field specific date resolution has been set + for the given field. + + + + throw in overridden method to disallow + + + + Base implementation delegates to {@link #GetFieldQuery(String,String)}. + This method may be overridden, for example, to return + a SpanNearQuery instead of a PhraseQuery. + + + throw in overridden method to disallow + + + + throw in overridden method to disallow + + + + Builds a new BooleanQuery instance + disable coord + + new BooleanQuery instance + + + + Builds a new BooleanClause instance + sub query + + how this clause should occur when matching documents + + new BooleanClause instance + + + + Builds a new TermQuery instance + term + + new TermQuery instance + + + + Builds a new PhraseQuery instance + new PhraseQuery instance + + + + Builds a new MultiPhraseQuery instance + new MultiPhraseQuery instance + + + + Builds a new PrefixQuery instance + Prefix term + + new PrefixQuery instance + + + + Builds a new FuzzyQuery instance + Term + + minimum similarity + + prefix length + + new FuzzyQuery Instance + + + + Builds a new TermRangeQuery instance + Field + + min + + max + + true if range is inclusive + + new TermRangeQuery instance + + + + Builds a new MatchAllDocsQuery instance + new MatchAllDocsQuery instance + + + + Builds a new WildcardQuery instance + wildcard term + + new WildcardQuery instance + + + + Factory method for generating query, given a set of clauses. + By default creates a boolean query composed of clauses passed in. + + Can be overridden by extending classes, to modify query being + returned. + + + List that contains {@link BooleanClause} instances + to join. + + + Resulting {@link Query} object. + + throw in overridden method to disallow + + + + Factory method for generating query, given a set of clauses. + By default creates a boolean query composed of clauses passed in. + + Can be overridden by extending classes, to modify query being + returned. + + + List that contains {@link BooleanClause} instances + to join. + + true if coord scoring should be disabled. + + + Resulting {@link Query} object. + + throw in overridden method to disallow + + + + Factory method for generating a query. Called when parser + parses an input term token that contains one or more wildcard + characters (? and *), but is not a prefix term token (one + that has just a single * character at the end) +

+ Depending on settings, prefix term may be lower-cased + automatically. It will not go through the default Analyzer, + however, since normal Analyzers are unlikely to work properly + with wildcard templates. +

+ Can be overridden by extending classes, to provide custom handling for + wildcard queries, which may be necessary due to missing analyzer calls. + +

+ Name of the field query will use. + + Term token that contains one or more wild card + characters (? or *), but is not simple prefix term + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + +
+ + Factory method for generating a query (similar to + {@link #getWildcardQuery}). Called when parser parses an input term + token that uses prefix notation; that is, contains a single '*' wildcard + character as its last character. Since this is a special case + of generic wildcard term, and such a query can be optimized easily, + this usually results in a different query object. +

+ Depending on settings, a prefix term may be lower-cased + automatically. It will not go through the default Analyzer, + however, since normal Analyzers are unlikely to work properly + with wildcard templates. +

+ Can be overridden by extending classes, to provide custom handling for + wild card queries, which may be necessary due to missing analyzer calls. + +

+ Name of the field query will use. + + Term token to use for building term for the query + (without trailing '*' character!) + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + +
+ + Factory method for generating a query (similar to + {@link #getWildcardQuery}). Called when parser parses + an input term token that has the fuzzy suffix (~) appended. + + + Name of the field query will use. + + Term token to use for building term for the query + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + + + + Returns a String where the escape char has been + removed, or kept only once if there was a double escape. + + Supports escaped unicode characters, e. g. translates + \\u0041 to A. + + + + + Returns the numeric value of the hexadecimal character + + + Returns a String where those characters that QueryParser + expects to be escaped are escaped by a preceding \. + + + + Command line tool to test QueryParser, using {@link Lucene.Net.Analysis.SimpleAnalyzer}. + Usage:
+ java Lucene.Net.QueryParsers.QueryParser <input> +
+
+ + Constructor with user supplied CharStream. + + + Reinitialise. + + + Constructor with generated Token Manager. + + + Reinitialise. + + + Get the next Token. + + + Get the specific Token. + + + Generate ParseException. + + + Enable tracing. + + + Disable tracing. + + + Returns the analyzer. + + + Returns the field. + + + + Gets or sets the minimal similarity for fuzzy queries. + Default is 0.5f. + + + + Gets or sets the prefix length for fuzzy queries. + Returns the fuzzyPrefixLength. + + + Gets or sets the default slop for phrases. If zero, then exact phrase matches + are required. Default value is zero. + + + + Set to true to allow leading wildcard characters. +

+ When set, * or ? are allowed as + the first character of a PrefixQuery and WildcardQuery. + Note that this can produce very slow + queries on big indexes. +

+ Default: false. +

+
+ + Set to true to enable position increments in result query. +

+ When set, result phrase and multi-phrase queries will + be aware of position increments. + Useful when e.g. a StopFilter increases the position increment of + the token that follows an omitted token. +

+ Default: false. +

+
+ + Gets or sets the boolean operator of the QueryParser. + In default mode (OR_OPERATOR) terms without any modifiers + are considered optional: for example capital of Hungary is equal to + capital OR of OR Hungary.
+ In AND_OPERATOR mode terms are considered to be in conjunction: the + above mentioned query is parsed as capital AND of AND Hungary +
+
+ + Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + lower-cased or not. Default is true. + + + + By default QueryParser uses + when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + a) Runs faster b) Does not have the scarcity of terms unduly influence score + c) avoids any "TooManyBooleanClauses" exception. + However, if your application really needs to use the + old-fashioned BooleanQuery expansion rewriting and the above + points are not relevant then use this to change + the rewrite method. + + + + Gets or sets locale used by date range parsing. + + + Gets or sets the collator used to determine index term inclusion in ranges + for RangeQuerys. +

+ WARNING: Setting the rangeCollator to a non-null + collator using this method will cause every single index Term in the + Field referenced by lowerTerm and/or upperTerm to be examined. + Depending on the number of index Terms in this Field, the operation could + be very slow. + +

+ the collator to use when constructing RangeQuerys +
+ + Creates a MultiFieldQueryParser. Allows passing of a map with term to + Boost, and the boost to apply to each term. + +

+ It will, when parse(String query) is called, construct a query like this + (assuming the query consists of two terms and you specify the two fields + title and body): +

+ + + (title:term1 body:term1) (title:term2 body:term2) + + +

+ When setDefaultOperator(AND_OPERATOR) is set, the result will be: +

+ + + +(title:term1 body:term1) +(title:term2 body:term2) + + +

+ When you pass a boost (title=>5 body=>10) you can get +

+ + + +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) + + +

+ In other words, all the query's terms must appear, but it doesn't matter + in what fields they appear. +

+

+
+ + Creates a MultiFieldQueryParser. + +

+ It will, when parse(String query) is called, construct a query like this + (assuming the query consists of two terms and you specify the two fields + title and body): +

+ + + (title:term1 body:term1) (title:term2 body:term2) + + +

+ When setDefaultOperator(AND_OPERATOR) is set, the result will be: +

+ + + +(title:term1 body:term1) +(title:term2 body:term2) + + +

+ In other words, all the query's terms must appear, but it doesn't matter + in what fields they appear. +

+

+
+ + Parses a query which searches on the fields specified. +

+ If x fields are specified, this effectively constructs: + + + (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx) + + +

+ Lucene version to match; this is passed through to + QueryParser. + + Queries strings to parse + + Fields to search on + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the queries array differs from the length of + the fields array + +
+ + Parses a query, searching on the fields specified. Use this if you need + to specify certain fields as required, and others as prohibited. +

+ Uasge: + + String[] fields = {"filename", "contents", "description"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + BooleanClause.Occur.MUST, + BooleanClause.Occur.MUST_NOT}; + MultiFieldQueryParser.parse("query", fields, flags, analyzer); + +

+ The code above would construct a query: + + + (filename:query) +(contents:query) -(description:query) + + +

+ Lucene version to match; this is passed through to + QueryParser. + + Query string to parse + + Fields to search on + + Flags describing the fields + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the fields array differs from the length of + the flags array + +
+ + Parses a query, searching on the fields specified. Use this if you need + to specify certain fields as required, and others as prohibited. +

+ Usage: + + String[] query = {"query1", "query2", "query3"}; + String[] fields = {"filename", "contents", "description"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + BooleanClause.Occur.MUST, + BooleanClause.Occur.MUST_NOT}; + MultiFieldQueryParser.parse(query, fields, flags, analyzer); + +

+ The code above would construct a query: + + + (filename:query1) +(contents:query2) -(description:query3) + + +

+ Lucene version to match; this is passed through to + QueryParser. + + Queries string to parse + + Fields to search on + + Flags describing the fields + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the queries, fields, and flags array differ + +
+ + This exception is thrown when parse errors are encountered. + You can explicitly create objects of this exception type by + calling the method generateParseException in the generated + parser. + + You can modify this class to customize your error reporting + mechanisms so long as you retain the public fields. + + + + This constructor is used by the method "generateParseException" + in the generated parser. Calling this constructor generates + a new object of this type with the fields "currentToken", + "expectedTokenSequences", and "tokenImage" set. The boolean + flag "specialConstructor" is also set to true to indicate that + this constructor was used to create this object. + This constructor calls its super class with the empty string + to force the "toString" method of parent class "Throwable" to + print the error message in the form: + ParseException: <result of getMessage> + + + + The following constructors are for use by you for whatever + purpose you can think of. Constructing the exception in this + manner makes the exception behave in the normal way - i.e., as + documented in the class "Throwable". The fields "errorToken", + "expectedTokenSequences", and "tokenImage" do not contain + relevant information. The JavaCC generated code does not use + these constructors. + + + + Constructor with message. + + + Constructor with message. + + + This variable determines which constructor was used to create + this object and thereby affects the semantics of the + "getMessage" method (see below). + + + + This is the last token that has been consumed successfully. If + this object has been created due to a parse error, the token + followng this token will (therefore) be the first error token. + + + + Each entry in this array is an array of integers. Each array + of integers represents a sequence of tokens (by their ordinal + values) that is expected at this point of the parse. + + + + This is a reference to the "tokenImage" array of the generated + parser within which the parse error occurred. This array is + defined in the generated ...Constants interface. + + + + The end of line string for this machine. + + + Used to convert raw characters to their escaped version + when these raw version cannot be used as part of an ASCII + string literal. + + + + This method has the standard behavior when this object has been + created using the standard constructors. Otherwise, it uses + "currentToken" and "expectedTokenSequences" to generate a parse + error message and returns it. If this object has been created + due to a parse error, and you do not catch it (it gets thrown + from the parser), then this method is called during the printing + of the final stack trace, and hence the correct error message + gets displayed. + + + + Token Manager. + + + Debug output. + + + Set debug output. + + + Token literal values. + + + Lexer state names. + + + Lex State array. + + + Constructor. + + + Constructor. + + + Reinitialise parser. + + + Reinitialise parser. + + + Switch to specified lex state. + + + Get the next Token. + + + Describes the input token stream. + + + An integer that describes the kind of this token. This numbering + system is determined by JavaCCParser, and a table of these numbers is + stored in the file ...Constants.java. + + + + The line number of the first character of this Token. + + + The column number of the first character of this Token. + + + The line number of the last character of this Token. + + + The column number of the last character of this Token. + + + The string image of the token. + + + A reference to the next regular (non-special) token from the input + stream. If this is the last token from the input stream, or if the + token manager has not read tokens beyond this one, this field is + set to null. This is true only if this token is also a regular + token. Otherwise, see below for a description of the contents of + this field. + + + + This field is used to access special tokens that occur prior to this + token, but after the immediately preceding regular (non-special) token. + If there are no such special tokens, this field is set to null. + When there are more than one such special token, this field refers + to the last of these special tokens, which in turn refers to the next + previous special token through its specialToken field, and so on + until the first special token (whose specialToken field is null). + The next fields of special tokens refer to other special tokens that + immediately follow it (without an intervening regular token). If there + is no such token, this field is null. + + + + No-argument constructor + + + Constructs a new token for the specified Image. + + + Constructs a new token for the specified Image and Kind. + + + Returns the image. + + + Returns a new Token object, by default. However, if you want, you + can create and return subclass objects based on the value of ofKind. + Simply add the cases to the switch for all those special cases. + For example, if you have a subclass of Token called IDToken that + you want to create if ofKind is ID, simply add something like : + + case MyParserConstants.ID : return new IDToken(ofKind, image); + + to the following switch statement. Then you can cast matchedToken + variable to the appropriate type and use sit in your lexical actions. + + + + An optional attribute value of the Token. + Tokens which are not used as syntactic sugar will often contain + meaningful values that will be used later on by the compiler or + interpreter. This attribute value is often different from the image. + Any subclass of Token that actually wants to return a non-null value can + override this method as appropriate. + + + + Token Manager Error. + + + Lexical error occurred. + + + An attempt was made to create a second instance of a static token manager. + + + Tried to change to an invalid lexical state. + + + Detected (and bailed out of) an infinite loop in the token manager. + + + Indicates the reason why the exception is thrown. It will have + one of the above 4 values. + + + + Replaces unprintable characters by their escaped (or unicode escaped) + equivalents in the given string + + + + Returns a detailed message for the Error when it is thrown by the + token manager to indicate a lexical error. + Parameters : + EOFSeen : indicates if EOF caused the lexical error + curLexState : lexical state in which this error occurred + errorLine : line number when the error occurred + errorColumn : column number when the error occurred + errorAfter : prefix that was seen before this error occurred + curchar : the offending character + Note: You can customize the lexical error message by modifying this method. + + + + No arg constructor. + + + Constructor with message and reason. + + + Full Constructor. + + + You can also modify the body of this method to customize your error messages. + For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + of end-users concern, so you can return something like : + + "Internal Error : Please file a bug report .... " + + from this method for such cases in the release version of your parser. + + + + A clause in a BooleanQuery. + + + Constructs a BooleanClause. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + The query whose matching documents are combined by the boolean query. + + + A Query that matches documents matching boolean combinations of other + queries, e.g. s, s or other + BooleanQuerys. + + + + The abstract base class for queries. +

Instantiable subclasses are: + + + + + + + + + + + + + +

A parser for queries is contained in: + + QueryParser + +

+
+ + Prints a query to a string, with field assumed to be the + default field and omitted. +

The representation used is one that is supposed to be readable + by QueryParser. However, + there are the following limitations: + + If the query was created by the parser, the printed + representation may not be exactly what was parsed. For example, + characters that need to be escaped will be represented without + the required backslash. + Some of the more complicated queries (e.g. span queries) + don't have a representation that can be parsed by QueryParser. + +

+
+ + Prints a query to a string. + + + Expert: Constructs an appropriate Weight implementation for this query. + +

+ Only implemented by primitive queries, which re-write to themselves. +

+
+ + Expert: Constructs and initializes a Weight for a top-level query. + + + Expert: called to re-write queries into primitive queries. For example, + a PrefixQuery will be rewritten into a BooleanQuery that consists + of TermQuerys. + + + + Expert: called when re-writing queries under MultiSearcher. + + Create a single query suitable for use by all subsearchers (in 1-1 + correspondence with queries). This is an optimization of the OR of + all queries. We handle the common optimization cases of equal + queries and overlapping clauses of boolean OR queries (as generated + by MultiTermQuery.rewrite()). + Be careful overriding this method as queries[0] determines which + method will be called and is not necessarily of the same type as + the other queries. + + + + Expert: adds all terms occuring in this query to the terms set. Only + works if this query is in its rewritten form. + + + UnsupportedOperationException if this query is not yet rewritten + + + Expert: merges the clauses of a set of BooleanQuery's into a single + BooleanQuery. + +

A utility for use by implementations. +

+
+ + Expert: Returns the Similarity implementation to be used for this query. + Subclasses may override this method to specify their own Similarity + implementation, perhaps one that delegates through that of the Searcher. + By default the Searcher's Similarity implementation is returned. + + + + Returns a clone of this query. + + + Gets or sets the boost for this query clause to b. Documents + matching this clause will (in addition to the normal weightings) have + their score multiplied by b. The boost is 1.0 by default. + + + + Constructs an empty boolean query. + + + Constructs an empty boolean query. + + may be disabled in scoring, as + appropriate. For example, this score factor does not make sense for most + automatically generated queries, like and + . + + + disables in scoring. + + + + Returns true iff is disabled in + scoring for this query instance. + + + + + + Adds a clause to a boolean query. + + + TooManyClauses if the new number of clauses exceeds the maximum clause number + + + + + Adds a clause to a boolean query. + TooManyClauses if the new number of clauses exceeds the maximum clause number + + + + + Returns the set of clauses in this query. + + + + Returns an iterator on the clauses in this query. + + + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Gets or sets the maximum number of clauses permitted, 1024 by default. + Attempts to add more than the permitted number of clauses cause + to be thrown. + + + + + Specifies a minimum number of the optional BooleanClauses + which must be satisfied. + + By default no optional clauses are necessary for a match + (unless there are no required clauses). If this method is used, + then the specified number of clauses is required. + + + Use of this method is totally independent of specifying that + any specific clauses are required (or prohibited). This number will + only be compared against the number of matching optional clauses. + + + + + Returns the list of clauses in this query. + + + Expert: Delegating scoring implementation. Useful in + implementations, to override only certain + methods of a Searcher's Similiarty implementation.. + + + + Expert: Scoring API. +

Subclasses implement search scoring. + +

The score of query q for document d correlates to the + cosine-distance or dot-product between document and query vectors in a + + Vector Space Model (VSM) of Information Retrieval. + A document whose vector is closer to the query vector in that model is scored higher. + + The score is computed as follows: + +

+ + +
+ + + + + + + + + + + +
+ score(q,d)   =   + coord(q,d)  ·  + queryNorm(q)  ·  + + + + ( + tf(t in d)  ·  + idf(t)2  ·  + t.Boost ·  + norm(t,d) + ) +
t in q
+
+ +

where + + + + tf(t in d) + correlates to the term's frequency, + defined as the number of times term t appears in the currently scored document d. + Documents that have more occurrences of a given term receive a higher score. + The default computation for tf(t in d) in + DefaultSimilarity is: + +
 
+ + + + + +
+ tf(t in d)   =   + + frequency½ +
+
 
+
+ + + + idf(t) stands for Inverse Document Frequency. This value + correlates to the inverse of docFreq + (the number of documents in which the term t appears). + This means rarer terms give higher contribution to the total score. + The default computation for idf(t) in + DefaultSimilarity is: + +
 
+ + + + + + + +
+ idf(t)  =   + + 1 + log ( + + + + + +
numDocs
–––––––––
docFreq+1
+
+ ) +
+
 
+
+ + + + coord(q,d) + is a score factor based on how many of the query terms are found in the specified document. + Typically, a document that contains more of the query's terms will receive a higher score + than another document with fewer query terms. + This is a search time factor computed in + coord(q,d) + by the Similarity in effect at search time. +
 
+
+ + + + queryNorm(q) + + is a normalizing factor used to make scores between queries comparable. + This factor does not affect document ranking (since all ranked documents are multiplied by the same factor), + but rather just attempts to make scores from different queries (or even different indexes) comparable. + This is a search time factor computed by the Similarity in effect at search time. + + The default computation in + DefaultSimilarity + is: +
 
+ + + + + +
+ queryNorm(q)   =   + queryNorm(sumOfSquaredWeights) +   =   + + + + + +
1
+ –––––––––––––– +
sumOfSquaredWeights½
+
+
 
+ + The sum of squared weights (of the query terms) is + computed by the query object. + For example, a boolean query + computes this value as: + +
 
+ + + + + + + + + + + +
+ GetSumOfSquaredWeights   =   + q.Boost 2 +  ·  + + + + ( + idf(t)  ·  + t.Boost + ) 2 +
t in q
+
 
+ +
+ + + + t.Boost + is a search time boost of term t in the query q as + specified in the query text + (see query syntax), + or as set by application calls to + . + Notice that there is really no direct API for accessing a boost of one term in a multi term query, + but rather multi terms are represented in a query as multi + TermQuery objects, + and so the boost of a term in the query is accessible by calling the sub-query + . +
 
+
+ + + + norm(t,d) encapsulates a few (indexing time) boost and length factors: + + + Document boost - set by calling + doc.Boost + before adding the document to the index. + + Field boost - set by calling + field.Boost + before adding the field to a document. + + LengthNorm(field) - computed + when the document is added to the index in accordance with the number of tokens + of this field in the document, so that shorter fields contribute more to the score. + LengthNorm is computed by the Similarity class in effect at indexing. + + + +

+ When a document is added to the index, all the above factors are multiplied. + If the document has multiple fields with the same name, all their boosts are multiplied together: + +
 
+ + + + + + + + + + + +
+ norm(t,d)   =   + doc.Boost +  ·  + LengthNorm(field) +  ·  + + + + field.Boost +
field f in d named as t
+
 
+ However the resulted norm value is encoded as a single byte + before being stored. + At search time, the norm byte value is read from the index + directory and + decoded back to a float norm value. + This encoding/decoding, while reducing index size, comes with the price of + precision loss - it is not guaranteed that decode(encode(x)) = x. + For instance, decode(encode(0.89)) = 0.75. + Also notice that search time is too late to modify this norm part of scoring, e.g. by + using a different for search. +
 
+ + + +

+ + + + + + +
+ + The Similarity implementation used by default. + + + Cache of decoded bytes. + + + Decodes a normalization factor stored in an index. + + + + + Returns a table for decoding normalization bytes. + + + + + Compute the normalization value for a field, given the accumulated + state of term processing for this field (see ). + +

Implementations should calculate a float value based on the field + state and then return that value. + +

For backward compatibility this method by default calls + passing + as the second argument, and + then multiplies this value by .

+ +

WARNING: This API is new and experimental and may + suddenly change.

+ +

+ field name + + current processing state for this field + + the calculated float norm + +
+ + Computes the normalization value for a field given the total number of + terms contained in a field. These values, together with field boosts, are + stored in an index and multipled into scores for hits on each field by the + search code. + +

Matches in longer fields are less precise, so implementations of this + method usually return smaller values when numTokens is large, + and larger values when numTokens is small. + +

Note that the return values are computed under + + and then stored using + . + Thus they have limited precision, and documents + must be re-indexed if this method is altered. + +

+ the name of the field + + the total number of tokens contained in fields named + fieldName of doc. + + a normalization factor for hits on this field of this document + + + +
+ + Computes the normalization value for a query given the sum of the squared + weights of each of the query terms. This value is then multipled into the + weight of each query term. + +

This does not affect ranking, but rather just attempts to make scores + from different queries comparable. + +

+ the sum of the squares of query term weights + + a normalization factor for query weights + +
+ + Encodes a normalization factor for storage in an index. + +

The encoding uses a three-bit mantissa, a five-bit exponent, and + the zero-exponent point at 15, thus + representing values from around 7x10^9 to 2x10^-9 with about one + significant decimal digit of accuracy. Zero is also represented. + Negative numbers are rounded up to zero. Values too large to represent + are rounded down to the largest representable value. Positive values too + small to represent are rounded up to the smallest positive representable + value. + +

+ + +
+ + Computes a score factor based on a term or phrase's frequency in a + document. This value is multiplied by the + factor for each term in the query and these products are then summed to + form the initial score for a document. + +

Terms and phrases repeated in a document indicate the topic of the + document, so implementations of this method usually return larger values + when freq is large, and smaller values when freq + is small. + +

The default implementation calls . + +

+ the frequency of a term within a document + + a score factor based on a term's within-document frequency + +
+ + Computes the amount of a sloppy phrase match, based on an edit distance. + This value is summed for each sloppy phrase match in a document to form + the frequency that is passed to . + +

A phrase match with a small edit distance to a document passage more + closely matches the document, so implementations of this method usually + return larger values when the edit distance is small and smaller values + when it is large. + +

+ + the edit distance of this sloppy phrase match + the frequency increment for this match +
+ + Computes a score factor based on a term or phrase's frequency in a + document. This value is multiplied by the + factor for each term in the query and these products are then summed to + form the initial score for a document. + +

Terms and phrases repeated in a document indicate the topic of the + document, so implementations of this method usually return larger values + when freq is large, and smaller values when freq + is small. + +

+ the frequency of a term within a document + + a score factor based on a term's within-document frequency + +
+ + Computes a score factor for a simple term and returns an explanation + for that score factor. + +

+ The default implementation uses: + + + idf(searcher.docFreq(term), searcher.MaxDoc); + + + Note that is used instead of + because it is + proportional to , i.e., when one is + inaccurate, so is the other, and in the same direction. + +

+ the term in question + + the document collection being searched + + an IDFExplain object that includes both an idf score factor + and an explanation for the term. + + IOException +
+ + Computes a score factor for a phrase. + +

+ The default implementation sums the idf factor for + each term in the phrase. + +

+ the terms in the phrase + + the document collection being searched + + an IDFExplain object that includes both an idf + score factor for the phrase and an explanation + for each term. + + IOException +
+ + Computes a score factor based on a term's document frequency (the number + of documents which contain the term). This value is multiplied by the + factor for each term in the query and these products are + then summed to form the initial score for a document. + +

Terms that occur in fewer documents are better indicators of topic, so + implementations of this method usually return larger values for rare terms, + and smaller values for common terms. + +

+ the number of documents which contain the term + + the total number of documents in the collection + + a score factor based on the term's document frequency + +
+ + Computes a score factor based on the fraction of all query terms that a + document contains. This value is multiplied into scores. + +

The presence of a large portion of the query terms indicates a better + match with the query, so implementations of this method usually return + larger values when the ratio between these parameters is large and smaller + values when the ratio between them is small. + +

+ the number of query terms matched in the document + + the total number of terms in the query + + a score factor based on term overlap with the query + +
+ + Calculate a scoring factor based on the data in the payload. Overriding implementations + are responsible for interpreting what is in the payload. Lucene makes no assumptions about + what is in the byte array. +

+ The default implementation returns 1. + +

+ The docId currently being scored. If this value is , then it should be assumed that the PayloadQuery implementation does not provide document information + + The fieldName of the term this payload belongs to + + The start position of the payload + + The end position of the payload + + The payload byte array to be scored + + The offset into the payload array + + The length in the array + + An implementation dependent float to be used as a scoring factor + + +
+ + Gets or sets the default Similarity implementation + used by indexing and search code. +

This is initially an instance of . +

+ + + + +
+ + Small Util class used to pass both an idf factor as well as an + explanation for that factor. + + This class will likely be held on a , so be aware + before storing any large or un-serializable fields. + + + + + Expert: Describes the score computation for document and query. + + + The sub-nodes of this explanation node. + + + Adds a sub-node to this explanation node. + + + Render an explanation as text. + + + Render an explanation as HTML. + + + Indicates whether or not this Explanation models a good match. + +

+ By default, an Explanation represents a "match" if the value is positive. +

+

+ + +
+ + The value assigned to this explanation node. + + + A description of this explanation node. + + + A short one line summary which should contain all high level + information about this Explanation, without the "Details" + + + + Small Util class used to pass both an idf factor as well as an + explanation for that factor. + + This class will likely be held on a , so be aware + before storing any large or un-serializable fields. + + + + + This should be calculated lazily if possible. + + + the explanation for the idf factor. + + + + the idf factor + + + Construct a that delegates all methods to another. + the Similarity implementation to delegate to + + + Thrown when an attempt is made to add more than + clauses. This typically happens if + a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery + is expanded to many terms during search. + + + + Expert: the Weight for BooleanQuery, used to + normalize, score and explain these queries. + +

NOTE: this API and implementation is subject to + change suddenly in the next release.

+

+
+ + Expert: Calculate query weights and build query scorers. +

+ The purpose of is to ensure searching does not + modify a , so that a instance can be reused.
+ dependent state of the query should reside in the + .
+ dependent state should reside in the . +

+ A Weight is used in the following way: + + A Weight is constructed by a top-level query, given a + Searcher (). + The method is called on the + Weight to compute the query normalization factor + of the query clauses contained in the + query. + The query normalization factor is passed to . At + this point the weighting is complete. + A Scorer is constructed by . + + +

+ 2.9 + +
+ + An explanation of the score computation for the named document. + + + sub-reader containing the give doc + + + + an Explanation for the score + + IOException + + + Assigns the query normalization factor to this. + + + Returns a which scores documents in/out-of order according + to scoreDocsInOrder. +

+ NOTE: even if scoreDocsInOrder is false, it is + recommended to check whether the returned Scorer indeed scores + documents out of order (i.e., call ), as + some Scorer implementations will always return documents + in-order.
+ NOTE: null can be returned if no documents will be scored by this + query. + +

+ + the for which to return the . + + specifies whether in-order scoring of documents is required. Note + that if set to false (i.e., out-of-order scoring is required), + this method can return whatever scoring mode it supports, as every + in-order scorer is also an out-of-order one. However, an + out-of-order scorer may not support + and/or , therefore it is recommended to + request an in-order scorer if use of these methods is required. + + + if true, will be called; if false, + and/or will + be called. + + a which scores documents in/out-of order. + + IOException +
+ + The sum of squared weights of contained query clauses. + + + Returns true iff this implementation scores docs only out of order. This + method is used in conjunction with 's + AcceptsDocsOutOfOrder and + to + create a matching instance for a given , or + vice versa. +

+ NOTE: the default implementation returns false, i.e. + the Scorer scores documents in-order. +

+
+ + The query that this concerns. + + + The weight for this query. + + + The Similarity implementation. + + + Expert: Common scoring functionality for different types of queries. + +

+ A Scorer iterates over documents matching a + query in increasing order of doc Id. +

+

+ Document scores are computed using a given Similarity + implementation. +

+ +

NOTE: The values Float.Nan, + Float.NEGATIVE_INFINITY and Float.POSITIVE_INFINITY are + not valid scores. Certain collectors (eg + ) will not properly collect hits + with these scores. +

+
+ + This abstract class defines methods to iterate over a set of non-decreasing + doc ids. Note that this class assumes it iterates on doc Ids, and therefore + is set to Int32.MaxValue in order to be used as + a sentinel object. Implementations of this class are expected to consider + as an invalid value. + + + + When returned by , and + it means there are no more docs in the iterator. + + + + Returns the following: + + -1 or if or + were not called yet. + if the iterator has exhausted. + Otherwise it should return the doc ID it is currently on. + +

+

+
+ + Advances to the next document in the set and returns the doc it is + currently on, or if there are no more docs in the + set.
+ + NOTE: after the iterator has exhausted you should not call this + method, as it may result in unpredicted behavior. + +
+
+ + Advances to the first beyond the current whose document number is greater + than or equal to target. Returns the current document number or + if there are no more docs in the set. +

+ Behaves as if written: + + + int advance(int target) { + int doc; + while ((doc = nextDoc()) < target) { + } + return doc; + } + + + Some implementations are considerably more efficient than that. +

+ NOTE: certain implemenations may return a different value (each + time) if called several times in a row with the same target. +

+ NOTE: this method may be called with for + efficiency by some Scorers. If your implementation cannot efficiently + determine that it should exhaust, it is recommended that you check for that + value in each call to this method. +

+ NOTE: after the iterator has exhausted you should not call this + method, as it may result in unpredicted behavior. +

+ +

+ 2.9 +
+ + Constructs a Scorer. + The Similarity implementation used by this scorer. + + + + Scores and collects all matching documents. + The collector to which all matching documents are passed. + + + + Expert: Collects matching documents in a range. Hook for optimization. + Note, is added to ensure that + was called before this method. + + + The collector to which all matching documents are passed. + + Do not score documents past this. + + + The first document ID (ensures is called before + this method. + + true if more matching documents may remain. + + + + Returns the score of the current document matching the query. + Initially invalid, until or + is called the first time, or when called from within + . + + + + Returns the Similarity implementation used by this scorer. + + +

Expert: Collectors are primarily meant to be used to + gather raw results from a search, and implement sorting + or custom result filtering, collation, etc.

+ +

Lucene's core collectors are derived from Collector. + Likely your application can use one of these classes, or + subclass , instead of + implementing Collector directly: + + + + is an abstract base class + that assumes you will retrieve the top N docs, + according to some criteria, after collection is + done. + + is a concrete subclass + and sorts according to score + + docID. This is used internally by the + search methods that do not take an + explicit . It is likely the most frequently + used collector. + + subclasses + and sorts according to a specified + object (sort by field). This is used + internally by the search methods + that take an explicit . + + , which wraps any other + Collector and aborts the search if it's taken too much + time. + + wraps any other + Collector and prevents collection of hits whose score + is <= 0.0 + + + +

Collector decouples the score from the collected doc: + the score computation is skipped entirely if it's not + needed. Collectors that do need the score should + implement the method, to hold onto the + passed instance, and call + within the collect method to compute the + current hit's score. If your collector may request the + score for a single hit multiple times, you should use + .

+ +

NOTE: The doc that is passed to the collect + method is relative to the current reader. If your + collector needs to resolve this to the docID space of the + Multi*Reader, you must re-base it by recording the + docBase from the most recent setNextReader call. Here's + a simple example showing how to collect docIDs into a + BitSet:

+ + + Searcher searcher = new IndexSearcher(indexReader); + final BitSet bits = new BitSet(indexReader.MaxDoc); + searcher.search(query, new Collector() { + private int docBase; + + // ignore scorer + public void setScorer(Scorer scorer) { + } + + // accept docs out of order (for a BitSet it doesn't matter) + public boolean acceptsDocsOutOfOrder() { + return true; + } + + public void collect(int doc) { + bits.set(doc + docBase); + } + + public void setNextReader(IndexReader reader, int docBase) { + this.docBase = docBase; + } + }); + + +

Not all collectors will need to rebase the docID. For + example, a collector that simply counts the total number + of hits would skip it.

+ +

NOTE: Prior to 2.9, Lucene silently filtered + out hits with score <= 0. As of 2.9, the core Collectors + no longer do that. It's very unusual to have such hits + (a negative query boost, or function query returning + negative custom scores, could cause it to happen). If + you need that behavior, use + .

+ +

NOTE: This API is experimental and might change + in incompatible ways in the next release.

+ +

+ 2.9 + +
+ + Called before successive calls to . Implementations + that need the score of the current document (passed-in to + ), should save the passed-in Scorer and call + scorer.score() when needed. + + + + Called once for every document matching a query, with the unbased document + number. + +

+ Note: This is called in an inner search loop. For good search performance, + implementations of this method should not call or + on every hit. + Doing so can slow searches by an order of magnitude or more. +

+
+ + Called before collecting from each IndexReader. All doc ids in + will correspond to reader. + + Add docBase to the current IndexReaders internal document id to re-base ids + in . + + + next IndexReader + + + + + + + Return true if this collector does not + require the matching docIDs to be delivered in int sort + order (smallest to largest) to . +

Most Lucene Query implementations will visit + matching docIDs in order. However, some queries + (currently limited to certain cases of ) + can achieve faster searching if the + Collector allows them to deliver the + docIDs out of order. +

Many collectors don't mind getting docIDs out of + order, so it's important to return true + here. +

+ +
+ + A simple hash table of document scores within a range. + + + An alternative to BooleanScorer that also allows a minimum number + of optional scorers that should match. +
Implements skipTo(), and has no limitations on the numbers of added scorers. +
Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. +
+
+ + The scorer to which all scoring will be delegated, + except for computing and using the coordination factor. + + + + The number of optionalScorers that need to match (if there are any) + + + Creates a with the given similarity and lists of required, + prohibited and optional scorers. In no required scorers are added, at least + one of the optional scorers will have to match during the search. + + + The similarity to be used. + + The minimum number of optional added scorers that should match + during the search. In case no required scorers are added, at least + one of the optional scorers will have to match during the search. + + the list of required scorers. + + the list of prohibited scorers. + + the list of optional scorers. + + + + Returns the scorer to be used for match counting and score summing. + Uses requiredScorers, optionalScorers and prohibitedScorers. + + + + Returns the scorer to be used for match counting and score summing. + Uses the given required scorer and the prohibitedScorers. + + A required scorer already built. + + + + Scores and collects all matching documents. + The collector to which all matching documents are passed through. + + + + A Scorer for OR like queries, counterpart of ConjunctionScorer. + This Scorer implements and uses skipTo() on the given Scorers. + + + + The number of subscorers. + + + The subscorers. + + + The minimum number of scorers that should match. + + + The scorerDocQueue contains all subscorers ordered by their current doc(), + with the minimum at the top. +
The scorerDocQueue is initialized the first time next() or skipTo() is called. +
An exhausted scorer is immediately removed from the scorerDocQueue. +
If less than the minimumNrMatchers scorers + remain in the scorerDocQueue next() and skipTo() return false. +

+ After each to call to next() or skipTo() + currentSumScore is the total score of the current matching doc, + nrMatchers is the number of matching scorers, + and all scorers are after the matching doc, or are exhausted. +

+
+ + The document number of the current match. + + + The number of subscorers that provide the current match. + + + Construct a DisjunctionScorer. + A collection of at least two subscorers. + + The positive minimum number of subscorers that should + match to match this query. +
When minimumNrMatchers is bigger than + the number of subScorers, + no matches will be produced. +
When minimumNrMatchers equals the number of subScorers, + it more efficient to use ConjunctionScorer. + +
+ + Construct a DisjunctionScorer, using one as the minimum number + of matching subscorers. + + + + Called the first time next() or skipTo() is called to + initialize scorerDocQueue. + + + + Scores and collects all matching documents. + The collector to which all matching documents are passed through. + + + Expert: Collects matching documents in a range. Hook for optimization. + Note that must be called once before this method is called + for the first time. + + The collector to which all matching documents are passed through. + + Do not score documents past this. + + + true if more matching documents may remain. + + + + Advance all subscorers after the current document determined by the + top of the scorerDocQueue. + Repeat until at least the minimum number of subscorers match on the same + document and all subscorers are after that document or are exhausted. +
On entry the scorerDocQueue has at least minimumNrMatchers + available. At least the scorer with the minimum document number will be advanced. +
+ true iff there is a match. +
In case there is a match, currentDoc, currentSumScore, + and nrMatchers describe the match. + + TODO: Investigate whether it is possible to use skipTo() when + the minimum number of matchers is bigger than one, ie. try and use the + character of ConjunctionScorer for the minimum number of matchers. + Also delay calling score() on the sub scorers until the minimum number of + matchers is reached. +
For this, a Scorer array with minimumNrMatchers elements might + hold Scorers at currentDoc that are temporarily popped from scorerQueue. +
+
+ + Returns the score of the current document matching the query. + Initially invalid, until is called the first time. + + + + Returns the number of subscorers matching the current document. + Initially invalid, until is called the first time. + + + + Advances to the first match beyond the current whose document number is + greater than or equal to a given target.
+ The implementation uses the skipTo() method on the subscorers. + +
+ The target document number. + + the document whose number is greater than or equal to the given + target, or -1 if none exist. + +
+ + Scorer for conjunctions, sets of queries, all of which are required. + + + Count a scorer as a single match. + + + Wraps another SpanFilter's result and caches it. The purpose is to allow + filters to simply filter, and then wrap with this class to add caching. + + + + Abstract base class providing a mechanism to restrict searches to a subset + of an index and also maintains and returns position information. + This is useful if you want to compare the positions from a SpanQuery with the positions of items in + a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, + and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could + then compare position information for post processing. + + + + Abstract base class for restricting which documents may be returned during searching. + + + + Creates a enumerating the documents that should be + permitted in search results. NOTE: null can be + returned if no documents are accepted by this Filter. +

+ Note: This method will be called once per segment in + the index during searching. The returned + must refer to document IDs for that segment, not for + the top-level reader. +

+ a DocIdSet that provides the documents which should be permitted or + prohibited in search results. NOTE: null can be returned if + no documents will be accepted by this Filter. + + + A instance opened on the index currently + searched on. Note, it is likely that the provided reader does not + represent the whole underlying index i.e. if the index has more than + one segment the given reader only represents a single segment. + + + +
+ + Returns a SpanFilterResult with true for documents which should be permitted in + search results, and false for those that should not and Spans for where the true docs match. + + The to load position and DocIdSet information from + + A + + java.io.IOException if there was an issue accessing the necessary information + + + + + A transient Filter cache (internal because of test) + + + + New deletions always result in a cache miss, by default + (. + Filter to cache results of + + + + + New deletions always result in a cache miss, specify the + Filter to cache results of + See + + + Wraps another filter's result and caches it. The purpose is to allow + filters to simply filter, and then wrap with this class to add caching. + + + + + New deletes are ignored by default, which gives higher + cache hit rate on reopened readers. Most of the time + this is safe, because the filter will be AND'd with a + Query that fully enforces deletions. If instead you + need this filter to always enforce deletions, pass + either or + . + + Filter to cache results of + + + + + Expert: by default, the cached filter will be shared + across reopened segments that only had changes to their + deletions. + + Filter to cache results of + See + + + + Provide the DocIdSet to be cached, using the DocIdSet provided + by the wrapped Filter. + This implementation returns the given DocIdSet. + + + + + Expert: Specifies how new deletions against a reopened + reader should be handled. + + The default is IGNORE, which means the cache entry + will be re-used for a given segment, even when that + segment has been reopened due to changes in deletions. + This is a big performance gain, especially with + near-real-timer readers, since you don't hit a cache + miss on every reopened reader for prior segments. + + However, in some cases this can cause invalid query + results, allowing deleted documents to be returned. + This only happens if the main query does not rule out + deleted documents on its own, such as a toplevel + ConstantScoreQuery. To fix this, use RECACHE to + re-create the cached filter (at a higher per-reopen + cost, but at faster subsequent search performance), or + use DYNAMIC to dynamically intersect deleted docs (fast + reopen time but some hit to search performance). + + + + Abstract decorator class for a DocIdSet implementation + that provides on-demand filtering/validation + mechanism on a given DocIdSet. + +

+ + Technically, this same functionality could be achieved + with ChainedFilter (under contrib/misc), however the + benefit of this class is it never materializes the full + bitset for the filter. Instead, the + method is invoked on-demand, per docID visited during + searching. If you know few docIDs will be visited, and + the logic behind is relatively costly, + this may be a better way to filter than ChainedFilter. + +

+ + +
+ + A DocIdSet contains a set of doc ids. Implementing classes must + only implement to provide access to the set. + + + + An empty instance for easy use, e.g. in Filters that hit no documents. + + + Provides a to access the set. + This implementation can return null or + EMPTY_DOCIDSET.Iterator() if there + are no docs that match. + + + + This method is a hint for , if this DocIdSet + should be cached without copying it into a BitSet. The default is to return + false. If you have an own DocIdSet implementation + that does its iteration very effective and fast without doing disk I/O, + override this method and return true. + + + + Constructor. + Underlying DocIdSet + + + + Validation method to determine whether a docid should be in the result set. + docid to be tested + + true if input docid should be in the result set, false otherwise. + + + + Implementation of the contract to build a DocIdSetIterator. + + + + + + + This DocIdSet implementation is cacheable if the inner set is cacheable. + + + Abstract decorator class of a DocIdSetIterator + implementation that provides on-demand filter/validation + mechanism on an underlying DocIdSetIterator. See + . + + + + Constructor. + Underlying DocIdSetIterator. + + + + Validation method to determine whether a docid should be in the result set. + docid to be tested + + true if input docid should be in the result set, false otherwise. + + + + + + Expert: Describes the score computation for document and query, and + can distinguish a match independent of a positive value. + + + + The match status of this explanation node. + May be null if match status is unknown + + + + Indicates whether or not this Explanation models a good match. + +

+ If the match status is explicitly set (i.e.: not null) this method + uses it; otherwise it defers to the superclass. +

+

+
+ + A query that wraps a filter and simply returns a constant score equal to the + query boost for every document in the filter. + + + + Prints a user-readable version of this query. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Returns the encapsulated filter + + + Expert: Default scoring implementation. + + + Implemented as + state.getBoost()*lengthNorm(numTerms), where + numTerms is if + is false, else it's + - + . + +

WARNING: This API is new and experimental, and may suddenly + change.

+

+
+ + Implemented as 1/sqrt(numTerms). + + + Implemented as 1/sqrt(sumOfSquaredWeights). + + + Implemented as sqrt(freq). + + + Implemented as 1 / (distance + 1). + + + Implemented as log(numDocs/(docFreq+1)) + 1. + + + Implemented as overlap / maxOverlap. + + + + + + + Determines whether overlap tokens (Tokens with + 0 position increment) are ignored when computing + norm. By default this is false, meaning overlap + tokens are counted just like non-overlap tokens. + +

WARNING: This API is new and experimental, and may suddenly + change.

+ +

+ + +
+ + A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum + score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. + This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be + combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost, + not the sum of the field scores (as BooleanQuery would give). + If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching + another gets a higher score than "albino" matching both fields. + To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in + each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery. + The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that + include this term in only the best of those multiple fields, without confusing this with the better case of two different terms + in the multiple fields. + + + + Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. + the score of each non-maximum disjunct for a document is multiplied by this weight + and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that + 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique + word in the lower scored field (i.e., one that is not in any higher scored field. + + + + Creates a new DisjunctionMaxQuery + a Collection<Query> of all the disjuncts to add + + the weight to give to each matching non-maximum disjunct + + + + Add a subquery to this disjunction + the disjunct added + + + + Add a collection of disjuncts to this disjunction + via Iterable + + + + An Iterator<Query> over the disjuncts + + + Optimize our representation and our subqueries representations + the IndexReader we query + + an optimized copy of us (which may not be a copy if there is nothing to optimize) + + + + Create a shallow copy of us -- used in rewriting if necessary + a copy of us (but reuse, don't copy, our subqueries) + + + + Prettyprint us. + the field to which we are applied + + a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" + + + + Return true iff we represent the same query as o + another object + + true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us + + + + Compute a hash code for hashing us + the hash code + + + + Expert: the Weight for DisjunctionMaxQuery, used to + normalize, score and explain these queries. + +

NOTE: this API and implementation is subject to + change suddenly in the next release.

+

+
+ + The Similarity implementation. + + + The Weights for our subqueries, in 1-1 correspondence with disjuncts + + + The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers + is generated in document number order. The score for each document is the maximum of the scores computed + by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores + for the other subqueries that generate the document. + + + + Creates a new instance of DisjunctionMaxScorer + + + Multiplier applied to non-maximum-scoring subqueries for a + document as they are summed into the result. + + -- not used since our definition involves neither coord nor terms + directly + + The sub scorers this Scorer should iterate on + + The actual number of scorers to iterate on. Note that the array's + length may be larger than the actual number of scorers. + + + + Determine the current document score. Initially invalid, until is called the first time. + the score of the current generated document + + + + Expert: Scoring functionality for phrase queries. +
A document is considered matching if it contains the phrase-query terms + at "valid" positons. What "valid positions" are + depends on the type of the phrase query: for an exact phrase query terms are required + to appear in adjacent locations, while for a sloppy phrase query some distance between + the terms is allowed. The abstract method of extending classes + is invoked for each document containing all the phrase query terms, in order to + compute the frequency of the phrase query in that document. A non zero frequency + means a match. +
+
+ + + Phrase frequency in current doc as computed by PhraseFreq() + + + + + For a document containing all the phrase query terms, compute the + frequency of the phrase in that document. + A non zero frequency means a match. +
Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations. +
+ frequency of the phrase in current doc, 0 if not found. + +
+ + Expert: Maintains caches of term values. + +

Created: May 19, 2004 11:13:14 AM + +

+ lucene 1.4 + + $Id: FieldCache.java 807841 2009-08-25 22:27:31Z markrmiller $ + + + +
+ + Expert: Stores term text values and document ordering data. + + + All the term values, in natural order. + + + For each document, an index into the lookup array. + + + Creates one of these objects + + + EXPERT: A unique Identifier/Description for each item in the FieldCache. + Can be useful for logging/debugging. +

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ + + + + + Computes (and stores) the estimated size of the cache Value + + + + + The most recently estimated size of the value, null unless + estimateSize has been called. + + + + Indicator for StringIndex values in the cache. + + + Expert: The cache used internally by sorting and range query classes. + + + The default parser for byte values, which are encoded by + + + The default parser for short values, which are encoded by + + + The default parser for int values, which are encoded by + + + The default parser for float values, which are encoded by + + + The default parser for long values, which are encoded by + + + The default parser for double values, which are encoded by + + + A parser instance for int values encoded by , e.g. when indexed + via /. + + + + A parser instance for float values encoded with , e.g. when indexed + via /. + + + + A parser instance for long values encoded by , e.g. when indexed + via /. + + + + A parser instance for double values encoded with , e.g. when indexed + via /. + + + + Interface to parse bytes from document fields. + + + + + Marker interface as super-interface to all parsers. It + is used to specify a custom parser to . + + + + Return a single Byte representation of this field's value. + + + Interface to parse shorts from document fields. + + + + + Return a short representation of this field's value. + + + Interface to parse ints from document fields. + + + + + Return an integer representation of this field's value. + + + Interface to parse floats from document fields. + + + + + Return an float representation of this field's value. + + + Interface to parse long from document fields. + + + Use , this will be removed in Lucene 3.0 + + + + Return an long representation of this field's value. + + + Interface to parse doubles from document fields. + + + Use , this will be removed in Lucene 3.0 + + + + Return an long representation of this field's value. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as a single byte and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the single byte values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as bytes and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + Used to get field values. + + Which field contains the bytes. + + Computes byte for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as shorts and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the shorts. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as shorts and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + Used to get field values. + + Which field contains the shorts. + + Computes short for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as integers and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the integers. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as integers and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + Used to get field values. + + Which field contains the integers. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if + none is found, reads the terms in field as floats and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the floats. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if + none is found, reads the terms in field as floats and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the floats. + + Computes float for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as longs and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + + Used to get field values. + + Which field contains the longs. + + The values in the given field for each document. + + java.io.IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as longs and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + + Used to get field values. + + Which field contains the longs. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as integers and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + + Used to get field values. + + Which field contains the doubles. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as doubles and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + + Used to get field values. + + Which field contains the doubles. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none + is found, reads the term values in field and returns an array + of size reader.MaxDoc containing the value each document + has in the given field. + + Used to get field values. + + Which field contains the strings. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none + is found reads the term values in field and returns + an array of them in natural order, along with an array telling + which element in the term array each document uses. + + Used to get field values. + + Which field contains the strings. + + Array of terms and index into the array for each document. + + IOException If any error occurs. + + + EXPERT: Generates an array of CacheEntry objects representing all items + currently in the FieldCache. +

+ NOTE: These CacheEntry objects maintain a strong refrence to the + Cached Values. Maintaining refrences to a CacheEntry the IndexReader + associated with it has garbage collected will prevent the Value itself + from being garbage collected when the Cache drops the WeakRefrence. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ +

+ EXPERT: Instructs the FieldCache to forcibly expunge all entries + from the underlying caches. This is intended only to be used for + test methods as a way to ensure a known base state of the Cache + (with out needing to rely on GC to free WeakReferences). + It should not be relied on for "Cache maintenance" in general + application code. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ + + Expert: drops all cache entries associated with this + reader. NOTE: this reader must precisely match the + reader that the cache entry is keyed on. If you pass a + top-level reader, it usually will have no effect as + Lucene now caches at the segment reader level. + + + + Gets or sets the InfoStream for this FieldCache. + If non-null, FieldCacheImpl will warn whenever + entries are created that are not sane according to + . + + + + + Expert: The default cache implementation, storing all values in memory. + A WeakDictionary is used for storage. + +

Created: May 19, 2004 4:40:36 PM + +

+ lucene 1.4 + +
+ + Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops + processing terms and returns the current FieldCache + array. + + + + Expert: Internal cache. + + + Expert: Every composite-key in the internal cache is of this type. + + + Creates one of these objects for a custom comparator/parser. + + + Two of these are equal iff they reference the same field and type. + + + Composes a hashcode based on the field and type. + + + A range filter built on top of a cached single term field (in ). + +

builds a single cache for the field the first time it is used. + Each subsequent on the same field then reuses this cache, + even if the range itself changes. + +

This means that is much faster (sometimes more than 100x as fast) + as building a if using a . However, if the range never changes it + is slower (around 2x as slow) than building a CachingWrapperFilter on top of a single . + + For numeric data types, this filter may be significantly faster than . + Furthermore, it does not need the numeric values encoded by . But + it has the problem that it only works with exact one value/document (see below). + +

As with all based functionality, is only valid for + fields which exact one term for each document (except for + where 0 terms are also allowed). Due to a restriction of , for numeric ranges + all terms that do not have a numeric value, 0 is assumed. + +

Thus it works on dates, prices and other single value fields but will not work on + regular text fields. It is preferable to use a NOT_ANALYZED field to ensure that + there is only a single term. + +

This class does not have an constructor, use one of the static factory methods available, + that create a correct instance for different data types supported by . +

+
+ + Creates a string range filter using . This works with all + fields containing zero or one term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range filter using . This works with all + byte fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range filter using . This works with all + byte fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + short fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + short fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + int fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + int fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + long fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + long fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + float fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + float fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + double fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + double fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + This method is implemented for each data type + + + + Returns the field name for this filter + + + + + Returns true if the lower endpoint is inclusive + + + + + Returns true if the upper endpoint is inclusive + + + + + Returns the lower value of the range filter + + + + + Returns the upper value of this range filter + + + + this method checks, if a doc is a hit, should throw AIOBE, when position invalid + + + this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs + + + A that only accepts documents whose single + term value in the specified field is contained in the + provided set of allowed terms. + +

+ + This is the same functionality as TermsFilter (from + contrib/queries), except this filter requires that the + field contains only a single term for all documents. + Because of drastically different implementations, they + also have different performance characteristics, as + described below. + +

+ + The first invocation of this filter on a given field will + be slower, since a must be + created. Subsequent invocations using the same field + will re-use this cache. However, as with all + functionality based on , persistent RAM + is consumed to hold the cache, and is not freed until the + is closed. In contrast, TermsFilter + has no persistent RAM consumption. + + +

+ + With each search, this filter translates the specified + set of Terms into a private keyed by + term number per unique (normally one + reader per segment). Then, during matching, the term + number for each docID is retrieved from the cache and + then checked for inclusion using the . + Since all testing is done using RAM resident data + structures, performance should be very fast, most likely + fast enough to not require further caching of the + DocIdSet for each possible combination of terms. + However, because docIDs are simply scanned linearly, an + index with a great many small documents may find this + linear scan too costly. + +

+ + In contrast, TermsFilter builds up an , + keyed by docID, every time it's created, by enumerating + through all matching docs using to seek + and scan through each term's docID list. While there is + no linear scan of all docIDs, besides the allocation of + the underlying array in the , this + approach requires a number of "disk seeks" in proportion + to the number of terms, which can be exceptionally costly + when there are cache misses in the OS's IO cache. + +

+ + Generally, this filter will be slower on the first + invocation for a given field, but subsequent invocations, + even if you change the allowed set of Terms, should be + faster than TermsFilter, especially as the number of + Terms being matched increases. If you are matching only + a very small number of terms, and those terms in turn + match a very small number of documents, TermsFilter may + perform faster. + +

+ + Which filter is best is very application dependent. +

+
+ + This DocIdSet implementation is cacheable. + + + Expert: a FieldComparator compares hits so as to determine their + sort order when collecting the top results with + . The concrete public FieldComparator + classes here correspond to the SortField types. + +

This API is designed to achieve high performance + sorting, by exposing a tight interaction with + as it visits hits. Whenever a hit is + competitive, it's enrolled into a virtual slot, which is + an int ranging from 0 to numHits-1. The + is made aware of segment transitions + during searching in case any internal state it's tracking + needs to be recomputed during these transitions.

+ +

A comparator must define these functions:

+ + + + Compare a hit at 'slot a' + with hit 'slot b'. + + This method is called by + to notify the + FieldComparator of the current weakest ("bottom") + slot. Note that this slot may not hold the weakest + value according to your comparator, in cases where + your comparator is not the primary one (ie, is only + used to break ties from the comparators before it). + + Compare a new hit (docID) + against the "weakest" (bottom) entry in the queue. + + Installs a new hit into the + priority queue. The + calls this method when a new hit is competitive. + + Invoked + when the search is switching to the next segment. + You may need to update internal state of the + comparator, for example retrieving new values from + the . + + Return the sort value stored in + the specified slot. This is only called at the end + of the search, in order to populate + when returning the top results. + + + NOTE: This API is experimental and might change in + incompatible ways in the next release. +

+
+ + Compare hit at slot1 with hit at slot2. + + + first slot to compare + + second slot to compare + + any N < 0 if slot2's value is sorted after + slot1, any N > 0 if the slot2's value is sorted before + slot1 and 0 if they are equal + + + + Set the bottom slot, ie the "weakest" (sorted last) + entry in the queue. When is + called, you should compare against this slot. This + will always be called before . + + + the currently weakest (sorted last) slot in the queue + + + + Compare the bottom of the queue with doc. This will + only invoked after setBottom has been called. This + should return the same result as + } as if bottom were slot1 and the new + document were slot 2. + +

For a search that hits many results, this method + will be the hotspot (invoked by far the most + frequently).

+ +

+ that was hit + + any N < 0 if the doc's value is sorted after + the bottom entry (not competitive), any N > 0 if the + doc's value is sorted before the bottom entry and 0 if + they are equal. + +
+ + This method is called when a new hit is competitive. + You should copy any state associated with this document + that will be required for future comparisons, into the + specified slot. + + + which slot to copy the hit to + + docID relative to current reader + + + + Set a new Reader. All doc correspond to the current Reader. + + + current reader + + docBase of this reader + + IOException + IOException + + + Sets the Scorer to use in case a document's score is + needed. + + + Scorer instance that you should use to + obtain the current hit's score, if necessary. + + + + Return the actual value in the slot. + + + the value + + value in this slot upgraded to Comparable + + + + Parses field's values as byte (using + and sorts by ascending value + + + + Sorts by ascending docID + + + Parses field's values as double (using + and sorts by ascending value + + + + Parses field's values as float (using + and sorts by ascending value + + + + Parses field's values as int (using + and sorts by ascending value + + + + Parses field's values as long (using + and sorts by ascending value + + + + Sorts by descending relevance. NOTE: if you are + sorting only by descending relevance and then + secondarily by ascending docID, peformance is faster + using directly (which + uses when no is + specified). + + + + Parses field's values as short (using ) + and sorts by ascending value + + + + Sorts by a field's value using the Collator for a + given Locale. + + + + Sorts by field's natural String sort order, using + ordinals. This is functionally equivalent to + , but it first resolves the string + to their relative ordinal positions (using the index + returned by ), and + does most comparisons using the ordinals. For medium + to large results, this comparator will be much faster + than . For very small + result sets it may be slower. + + + + Sorts by field's natural String sort order. All + comparisons are done using String.compareTo, which is + slow for medium to large result sets but possibly + very fast for very small results sets. + + + + Provides a for custom field sorting. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + + + Creates a comparator for the field in the given index. + + + Name of the field to create comparator for. + + FieldComparator. + + IOException + If an error occurs reading the index. + + + + Expert: A ScoreDoc which also contains information about + how to sort the referenced document. In addition to the + document number and score, this object contains an array + of values for the document from the field(s) used to sort. + For example, if the sort criteria was to sort by fields + "a", "b" then "c", the fields object array + will have three elements, corresponding respectively to + the term values for the document in fields "a", "b" and "c". + The class of each element in the array will be either + Integer, Float or String depending on the type of values + in the terms of each field. + +

Created: Feb 11, 2004 1:23:38 PM + +

+ + +
+ + Expert: Returned by low-level search implementations. + + + + + Expert: Constructs a ScoreDoc. + + + Expert: The score of this document for the query. + + + Expert: A hit document's number. + + + + + Expert: The values which are used to sort the referenced document. + The order of these will match the original sort criteria given by a + Sort object. Each Object will be either an Integer, Float or String, + depending on the type of values in the terms of the original field. + + + + + + + + Expert: Creates one of these objects with empty sort information. + + + Expert: Creates one of these objects with the given sort information. + + + Expert: Collects sorted results from Searchable's and collates them. + The elements put into this queue must be of type FieldDoc. + +

Created: Feb 11, 2004 2:04:21 PM + +

+ lucene 1.4 + +
+ + Creates a hit queue sorted by the given list of fields. + The number of hits to retain. Must be greater than zero. + + + Allows redefinition of sort fields if they are null. + This is to handle the case using ParallelMultiSearcher where the + original list contains AUTO and we don't know the actual sort + type until the values come back. The fields can only be set once. + This method is thread safe. + + + + + Returns the fields being used to sort. + + + Returns an array of collators, possibly null. The collators + correspond to any SortFields which were given a specific locale. + + Array of sort fields. + Array, possibly null. + + + Returns whether a is less relevant than b. + ScoreDoc + ScoreDoc + true if document a should be sorted after document b. + + + Expert: A hit queue for sorting by hits by terms in more than one field. + Uses FieldCache.DEFAULT for maintaining + internal term lookup tables. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + + + + + Creates a hit queue sorted by the given list of fields. + +

NOTE: The instances returned by this method + pre-allocate a full array of length numHits. + +

+ SortField array we are sorting by in priority order (highest + priority first); cannot be null or empty + + The number of hits to retain. Must be greater than zero. + + IOException +
+ + Stores the sort criteria being used. + + + Given a queue Entry, creates a corresponding FieldDoc + that contains the values used to sort the given document. + These values are not the raw values out of the index, but the internal + representation of them. This is so the given search hit can be collated by + a MultiSearcher with other search hits. + + + The Entry used to create a FieldDoc + + The newly created FieldDoc + + + + + + Returns the SortFields being used by this hit queue. + + + An implementation of which is optimized in case + there is just one comparator. + + + + Returns whether a is less relevant than b. + ScoreDoc + ScoreDoc + true if document a should be sorted after document b. + + + An implementation of which is optimized in case + there is more than one comparator. + + + + A query that applies a filter to the results of another query. + +

Note: the bits are retrieved from the filter each time this + query is used in a search - use a CachingWrapperFilter to avoid + regenerating the bits every time. + +

Created: Apr 20, 2004 8:58:29 AM + +

+ 1.4 + +
+ + Constructs a new query which applies a filter to the results of the original query. + Filter.getDocIdSet() will be called every time this query is used in a search. + + Query to be filtered, cannot be null. + + Filter to apply to query results, cannot be null. + + + + Returns a Weight that applies the filter to the enclosed query's Weight. + This is accomplished by overriding the Scorer returned by the Weight. + + + + Rewrites the wrapped query. + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Abstract class for enumerating a subset of all terms. +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + the current term + + + the delegate enum - to set this member use + + + Equality compare on the term + + + Equality measure on the term + + + Indicates the end of the enumeration has been reached + + + use this method to set the actual TermEnum (e.g. in ctor), + it will be automatically positioned on the first matching term. + + + + Returns the docFreq of the current Term in the enumeration. + Returns -1 if no Term matches or all terms have been enumerated. + + + + Increments the enumeration to the next element. True if one exists. + + + Returns the current Term in the enumeration. + Returns null if no Term matches or all terms have been enumerated. + + + + Filter caching singleton. It can be used + to save filters locally for reuse. + This class makes it possble to cache Filters even when using RMI, as it + keeps the cache on the seaercher side of the RMI connection. + + Also could be used as a persistent storage for any filter as long as the + filter provides a proper hashCode(), as that is used as the key in the cache. + + The cache is periodically cleaned up from a separate thread to ensure the + cache doesn't exceed the maximum size. + + + + The default maximum number of Filters in the cache + + + The default frequency of cache clenup + + + The cache itself + + + Maximum allowed cache size + + + Cache cleaning frequency + + + Cache cleaner that runs in a separate thread + + + Sets up the FilterManager singleton. + + + Sets the max size that cache should reach before it is cleaned up + maximum allowed cache size + + + Sets the cache cleaning frequency in milliseconds. + cleaning frequency in millioseconds + + + Returns the cached version of the filter. Allows the caller to pass up + a small filter but this will keep a persistent version around and allow + the caching filter to do its job. + + + The input filter + + The cached version of the filter + + + + Holds the filter and the last time the filter was used, to make LRU-based + cache cleaning possible. + TODO: Clean this up when we switch to Java 1.5 + + + + Keeps the cache from getting too big. + If we were using Java 1.5, we could use LinkedHashMap and we would not need this thread + to clean out the cache. + + The SortedSet sortedFilterItems is used only to sort the items from the cache, + so when it's time to clean up we have the TreeSet sort the FilterItems by + timestamp. + + Removes 1.5 * the numbers of items to make the cache smaller. + For example: + If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 round up to 8. + This way we clean the cache a bit more, and avoid having the cache cleaner having to do it frequently. + + + + Expert: obtains single byte field values from the + FieldCache + using getBytes() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements" + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+ + + +

Expert: A base class for ValueSource implementations that retrieve values for + a single field from the FieldCache. +

+ Fields used herein nust be indexed (doesn't matter if these fields are stored or not). +

+ It is assumed that each such indexed field is untokenized, or at least has a single token in a document. + For documents with multiple tokens of the same field, behavior is undefined (It is likely that current + code would use the value of one of these tokens, but this is not guaranteed). +

+ Document with no tokens in this field are assigned the Zero value. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+

+
+ + Expert: source of values for basic function queries. +

At its default/simplest form, values - one per doc - are used as the score of that doc. +

Values are instantiated as + DocValues for a particular reader. +

ValueSource implementations differ in RAM requirements: it would always be a factor + of the number of documents, but for each document the number of bytes can be 1, 2, 4, or 8. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + + +

+
+ + Return the DocValues used by the function query. + the IndexReader used to read these values. + If any caching is involved, that caching would also be IndexReader based. + + IOException for any error. + + + description of field, used in explain() + + + Needed for possible caching of query results - used by . + + + + + Needed for possible caching of query results - used by . + + + + + Create a cached field source for the input field. + + + Return cached DocValues for input field and reader. + FieldCache so that values of a field are loaded once per reader (RAM allowing) + + Field for which values are required. + + + + + + Check if equals to another , already knowing that cache and field are equal. + + + + + Return a hash code of a , without the hash-codes of the field + and the cache (those are taken care of elsewhere). + + + + + + Create a cached byte field source with default string-to-byte parser. + + + Create a cached byte field source with a specific string-to-byte parser. + + + Expert: represents field values as different types. + Normally created via a + ValueSuorce + for a particular field and reader. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + + +

+
+ + Return doc value as a float. +

Mandatory: every DocValues implementation must implement at least this method. +

+ document whose float value is requested. + +
+ + Return doc value as an int. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose int value is requested. + +
+ + Return doc value as a long. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose long value is requested. + +
+ + Return doc value as a double. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose double value is requested. + +
+ + Return doc value as a string. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose string value is requested. + +
+ + Return a string representation of a doc value, as reuired for Explanations. + + + Explain the scoring value for the input doc. + + + Returns the minimum of all values or Float.NaN if this + DocValues instance does not contain any value. +

+ This operation is optional +

+ +

+ the minimum of all values or Float.NaN if this + DocValues instance does not contain any value. + +
+ + Returns the maximum of all values or Float.NaN if this + DocValues instance does not contain any value. +

+ This operation is optional +

+ +

+ the maximum of all values or Float.NaN if this + DocValues instance does not contain any value. + +
+ + Returns the average of all values or Float.NaN if this + DocValues instance does not contain any value. * +

+ This operation is optional +

+ +

+ the average of all values or Float.NaN if this + DocValues instance does not contain any value + +
+ + Expert: for test purposes only, return the inner array of values, or null if not applicable. +

+ Allows tests to verify that loaded values are: + + indeed cached/reused. + stored in the expected size/type (byte/short/int/float). + + Note: implementations of DocValues must override this method for + these test elements to be tested, Otherwise the test would not fail, just + print a warning. +

+
+ + + An instance of this subclass should be returned by + , if you want + to modify the custom score calculation of a . + Since Lucene 2.9, queries operate on each segment of an Index separately, + so overriding the similar (now deprecated) methods in + is no longer suitable, as the supplied doc ID is per-segment + and without knowledge of the IndexReader you cannot access the + document or . + + @lucene.experimental + @since 2.9.2 + + + + + Creates a new instance of the provider class for the given IndexReader. + + + + + * Compute a custom score by the subQuery score and a number of + ValueSourceQuery scores. +

+ Subclasses can override this method to modify the custom score. +

+ If your custom scoring is different than the default herein you + should override at least one of the two customScore() methods. + If the number of ValueSourceQueries is always < 2 it is + sufficient to override the other + CustomScore() + method, which is simpler. +

+ The default computation herein is a multiplication of given scores: +

+                ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
+            
+
+ id of scored doc + score of that doc by the subQuery + scores of that doc by the ValueSourceQuery + custom score +
+ + + Compute a custom score by the subQuery score and the ValueSourceQuery score. +

+ Subclasses can override this method to modify the custom score. +

+ If your custom scoring is different than the default herein you + should override at least one of the two customScore() methods. + If the number of ValueSourceQueries is always < 2 it is + sufficient to override this customScore() method, which is simpler. +

+ The default computation herein is a multiplication of the two scores: +

+                ModifiedScore = subQueryScore * valSrcScore
+            
+
+ id of scored doc + score of that doc by the subQuery + score of that doc by the ValueSourceQuery + custom score +
+ + + Explain the custom score. + Whenever overriding , + this method should also be overridden to provide the correct explanation + for the part of the custom scoring. + + doc being explained + explanation for the sub-query part + explanation for the value source part + an explanation for the custom score + + + + Explain the custom score. + Whenever overriding , + this method should also be overridden to provide the correct explanation + for the part of the custom scoring. + + + doc being explained + explanation for the sub-query part + explanation for the value source part + an explanation for the custom score + + + Query that sets document score as a programmatic function of several (sub) scores: + + the score of its subQuery (any query) + (optional) the score of its ValueSourceQuery (or queries). + For most simple/convenient use cases this query is likely to be a + FieldScoreQuery + + Subclasses can modify the computation by overriding . + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Create a CustomScoreQuery over input subQuery. + the sub query whose scored is being customed. Must not be null. + + + + Create a CustomScoreQuery over input subQuery and a . + the sub query whose score is being customed. Must not be null. + + a value source query whose scores are used in the custom score + computation. For most simple/convineient use case this would be a + FieldScoreQuery. + This parameter is optional - it can be null or even an empty array. + + + + Create a CustomScoreQuery over input subQuery and a . + the sub query whose score is being customized. Must not be null. + + value source queries whose scores are used in the custom score + computation. For most simple/convenient use case these would be + FieldScoreQueries. + This parameter is optional - it can be null or even an empty array. + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + + Returns a that calculates the custom scores + for the given . The default implementation returns a default + implementation as specified in the docs of . + + + + + Compute a custom score by the subQuery score and a number of + ValueSourceQuery scores. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override and return a subclass + of for the given . + see CustomScoreProvider#customScore(int,float,float[]) + + + + Compute a custom score by the subQuery score and the ValueSourceQuery score. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override and return a subclass + of for the given . + + + + + Explain the custom score. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override and return a subclass + of for the given . + + + + Explain the custom score. + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override and return a subclass + of for the given . + + + + Checks if this is strict custom scoring. + In strict custom scoring, the ValueSource part does not participate in weight normalization. + This may be useful when one wants full control over how scores are modified, and does + not care about normalizing by the ValueSource part. + One particular case where this is useful if for testing this query. +

+ Note: only has effect when the ValueSource part is not null. +

+
+ + Set the strict mode of this query. + The strict mode to set. + + + + + + A short name of this query, used in . + + + A scorer that applies a (callback) function on scores of the subQuery. + + + A query that scores each document as the value of the numeric input field. +

+ The query matches all documents, and scores each document according to the numeric + value of that field. +

+ It is assumed, and expected, that: + + The field used here is indexed, and has exactly + one token in every scored document. + Best if this field is un_tokenized. + That token is parsable to the selected type. + +

+ Combining this query in a FunctionQuery allows much freedom in affecting document scores. + Note, that with this freedom comes responsibility: it is more than likely that the + default Lucene scoring is superior in quality to scoring modified as explained here. + However, in some cases, and certainly for research experiments, this capability may turn useful. +

+ When contructing this query, select the appropriate type. That type should match the data stored in the + field. So in fact the "right" type should be selected before indexing. Type selection + has effect on the RAM usage: + + consumes 1 * maxDocs bytes. + consumes 2 * maxDocs bytes. + consumes 4 * maxDocs bytes. + consumes 8 * maxDocs bytes. + +

+ Caching: + Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader. + To take advantage of this, it is extremely important to reuse index-readers or index-searchers, + otherwise, for instance if for each query a new index reader is opened, large penalties would be + paid for loading the field values into memory over and over again! + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Expert: A Query that sets the scores of document to the + values obtained from a ValueSource. +

+ This query provides a score for each and every undeleted document in the index. +

+ The value source can be based on a (cached) value of an indexed field, but it + can also be based on an external source, e.g. values read from an external database. +

+ Score is set as: Score(doc,query) = query.getBoost()2 * valueSource(doc). + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Create a value source query + provides the values defines the function to be used for scoring + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + A scorer that (simply) matches all documents, and scores each document with + the value of the value soure in effect. As an example, if the value source + is a (cached) field source, then value of that field in that document will + be used. (assuming field is indexed for this doc, with a single token.) + + + + Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field. +

+ The type param tells how to parse the field string values into a numeric score value. +

+ the numeric field to be used. + + the type of the field: either + , , , or . + +
+ + Type of score field, indicating how field values are interpreted/parsed. +

+ The type selected at search search time should match the data stored in the field. + Different types have different RAM requirements: + + consumes 1 * maxDocs bytes. + consumes 2 * maxDocs bytes. + consumes 4 * maxDocs bytes. + consumes 8 * maxDocs bytes. + +

+
+ + field values are interpreted as numeric byte values. + + + field values are interpreted as numeric short values. + + + field values are interpreted as numeric int values. + + + field values are interpreted as numeric float values. + + + Expert: obtains float field values from the + FieldCache + using getFloats() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements" + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+ + + +

Create a cached float field source with default string-to-float parser. +
+ + Create a cached float field source with a specific string-to-float parser. + + + Expert: obtains int field values from the + FieldCache + using getInts() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+ + + +

Create a cached int field source with default string-to-int parser. +
+ + Create a cached int field source with a specific string-to-int parser. + + + Expert: obtains the ordinal of the field value from the default Lucene + Fieldcache using getStringIndex(). +

+ The native lucene index order is used to assign an ordinal value for each field value. +

+ Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. +

+ Example: +
If there were only three field values: "apple","banana","pear" +
then ord("apple")=1, ord("banana")=2, ord("pear")=3 +

+ WARNING: + ord() depends on the position in an index and can thus change + when other documents are inserted or deleted, + or if a MultiSearcher is used. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+

+
+ + Constructor for a certain field. + field whose values order is used. + + + + Expert: obtains the ordinal of the field value from the default Lucene + FieldCache using getStringIndex() + and reverses the order. +

+ The native lucene index order is used to assign an ordinal value for each field value. +

+ Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. +
+ Example of reverse ordinal (rord): +
If there were only three field values: "apple","banana","pear" +
then rord("apple")=3, rord("banana")=2, ord("pear")=1 +

+ WARNING: + rord() depends on the position in an index and can thus change + when other documents are inserted or deleted, + or if a MultiSearcher is used. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+

+
+ + Contructor for a certain field. + field whose values reverse order is used. + + + + Expert: obtains short field values from the + FieldCache + using getShorts() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+ + + +

Create a cached short field source with default string-to-short parser. +
+ + Create a cached short field source with a specific string-to-short parser. + + + Implements the fuzzy search query. The similarity measurement + is based on the Levenshtein (edit distance) algorithm. + + Warning: this query is not very scalable with its default prefix + length of 0 - in this case, *every* term will be enumerated and + cause an edit score calculation. + + + + + An abstract that matches documents + containing a subset of terms provided by a + enumeration. + +

This query cannot be used directly; you must subclass + it and define to provide a + that iterates through the terms to be + matched. + +

NOTE: if is either + or + , you may encounter a + exception during + searching, which happens when the number of terms to be + searched exceeds + . Setting + to + prevents this. + +

The recommended rewrite method is + : it doesn't spend CPU + computing unhelpful scores, and it tries to pick the most + performant rewrite method given the query. + + Note that produces + MultiTermQueries using + by default. +

+
+ + A rewrite method that first creates a private Filter, + by visiting each term in sequence and marking all docs + for that term. Matching documents are assigned a + constant score equal to the query's boost. + +

This method is faster than the BooleanQuery + rewrite methods when the number of matched terms or + matched documents is non-trivial. Also, it will never + hit an errant + exception. + +

+ + +
+ + A rewrite method that first translates each term into + clause in a + BooleanQuery, and keeps the scores as computed by the + query. Note that typically such scores are + meaningless to the user, and require non-trivial CPU + to compute, so it's almost always better to use + instead. + +

NOTE: This rewrite method will hit + if the number of terms + exceeds . + +

+ + +
+ + Like except + scores are not computed. Instead, each matching + document receives a constant score equal to the + query's boost. + +

NOTE: This rewrite method will hit + if the number of terms + exceeds . + +

+ + +
+ + Read-only default instance of + , with + set to + + + and + set to + + . + Note that you cannot alter the configuration of this + instance; you'll need to create a private instance + instead. + + + + Constructs a query matching terms that cannot be represented with a single + Term. + + + + Construct the enumeration to be used, expanding the pattern term. + + + Expert: Resets the counting of unique terms. + Do this before executing the query/filter. + + + + + + Expert: Return the number of unique terms visited during execution of the query. + If there are many of them, you may consider using another query type + or optimize your total term count in index. +

This method is not thread safe, be sure to only call it when no query is running! + If you re-use the same query instance for another + search, be sure to first reset the term counter + with . +

On optimized indexes / no MultiReaders, you get the correct number of + unique terms for the whole index. Use this number to compare different queries. + For non-optimized indexes this number can also be achived in + non-constant-score mode. In constant-score mode you get the total number of + terms seeked for all segments / sub-readers. +

+ + +
+ + Sets the rewrite method to be used when executing the + query. You can use one of the four core methods, or + implement your own subclass of . + + + + A rewrite method that tries to pick the best + constant-score rewrite method based on term and + document counts from the query. If both the number of + terms and documents is small enough, then + is used. + Otherwise, is + used. + + + + Abstract class that defines how the query is rewritten. + + + If the number of terms in this query is equal to or + larger than this setting then + is used. + + + + If the number of documents to be visited in the + postings exceeds this specified percentage of the + MaxDoc for the index, then + is used. + + 0.0 to 100.0 + + + Create a new FuzzyQuery that will match terms with a similarity + of at least minimumSimilarity to term. + If a prefixLength > 0 is specified, a common prefix + of that length is also required. + + + the term to search for + + a value between 0 and 1 to set the required similarity + between the query term and the matching terms. For example, for a + minimumSimilarity of 0.5 a term of the same length + as the query term is considered similar to the query term if the edit distance + between both terms is less than length(term)*0.5 + + length of common (non-fuzzy) prefix + + IllegalArgumentException if minimumSimilarity is >= 1 or < 0 + or if prefixLength < 0 + + + + Calls FuzzyQuery(term, minimumSimilarity, 0). + + + Calls FuzzyQuery(term, 0.5f, 0). + + + Returns the pattern term. + + + Returns the minimum similarity that is required for this query to match. + float value between 0.0 and 1.0 + + + Returns the non-fuzzy prefix length. This is the number of characters at the start + of a term that must be identical (not fuzzy) to the query term if the query + is to match that term. + + + + Subclass of FilteredTermEnum for enumerating all terms that are similiar + to the specified filter term. + +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ + + + + IOException + + +
+ + Creates a FuzzyTermEnum with an empty prefix. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ + + + + + + IOException + + +
+ + Constructor for enumeration of all terms from specified reader which share a prefix of + length prefixLength with term and which have a fuzzy similarity > + minSimilarity. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ Delivers terms. + + Pattern term. + + Minimum required similarity for terms from the reader. Default value is 0.5f. + + Length of required common prefix. Default value is 0. + + IOException +
+ + The termCompare method in FuzzyTermEnum uses Levenshtein distance to + calculate the distance between the given term and the comparing term. + + + +

Similarity returns a number that is 1.0f or less (including negative numbers) + based on how similar the Term is compared to a target term. It returns + exactly 0.0f when + + editDistance > maximumEditDistance + Otherwise it returns: + + 1 - (editDistance / length) + where length is the length of the shortest term (text or target) including a + prefix that are identical and editDistance is the Levenshtein distance for + the two words.

+ +

Embedded within this algorithm is a fail-fast Levenshtein distance + algorithm. The fail-fast algorithm differs from the standard Levenshtein + distance algorithm in that it is aborted if it is discovered that the + mimimum distance between the words is greater than some threshold. + +

To calculate the maximum distance threshold we use the following formula: + + (1 - minimumSimilarity) * length + where length is the shortest term including any prefix that is not part of the + similarity comparision. This formula was derived by solving for what maximum value + of distance returns false for the following statements: + + similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen))); + return (similarity > minimumSimilarity); + where distance is the Levenshtein distance for the two words. +

+

Levenshtein distance (also known as edit distance) is a measure of similiarity + between two strings where the distance is measured as the number of character + deletions, insertions or substitutions required to transform one string to + the other string. +

+ the target word or phrase + + the similarity, 0.0 or less indicates that it matches less than the required + threshold and 1.0 indicates that the text and target are identical + +
+ + The max Distance is the maximum Levenshtein distance for the text + compared to some other value that results in score that is + better than the minimum similarity. + + the length of the "other value" + + the maximum levenshtein distance that we care about + + + + Creates a new instance with size elements. If + prePopulate is set to true, the queue will pre-populate itself + with sentinel objects and set its to size. In + that case, you should not rely on to get the number of + actual elements that were added to the queue, but keep track yourself.
+ NOTE: in case prePopulate is true, you should pop + elements from the queue using the following code example: + + + PriorityQueue pq = new HitQueue(10, true); // pre-populate. + ScoreDoc top = pq.top(); + + // Add/Update one element. + top.score = 1.0f; + top.doc = 0; + top = (ScoreDoc) pq.updateTop(); + int totalHits = 1; + + // Now pop only the elements that were *truly* inserted. + // First, pop all the sentinel elements (there are pq.size() - totalHits). + for (int i = pq.size() - totalHits; i > 0; i--) pq.pop(); + + // Now pop the truly added elements. + ScoreDoc[] results = new ScoreDoc[totalHits]; + for (int i = totalHits - 1; i >= 0; i--) { + results[i] = (ScoreDoc) pq.pop(); + } + + +

NOTE: This class pre-allocate a full array of + length size. + +

+ the requested size of this queue. + + specifies whether to pre-populate the queue with sentinel values. + + + +
+ + Implements search over a single IndexReader. + +

Applications usually need only call the inherited + or methods. For performance reasons it is + recommended to open only one IndexSearcher and use it for all of your searches. + +

NOTE: + instances are completely + thread safe, meaning multiple threads can call any of its + methods, concurrently. If your application requires + external synchronization, you should not + synchronize on the IndexSearcher instance; + use your own (non-Lucene) objects instead.

+

+
+ + An abstract base class for search implementations. Implements the main search + methods. + +

+ Note that you can only access hits from a Searcher as long as it is not yet + closed, otherwise an IOException will be thrown. +

+
+ + The interface for search implementations. + +

+ Searchable is the abstract network protocol for searching. Implementations + provide search over a single index, over multiple indices, and over indices + on remote servers. + +

+ Queries, filters and sort criteria are designed to be compact so that they + may be efficiently passed to a remote index, with only the top-scoring hits + being returned, rather than every matching hit. + + NOTE: this interface is kept public for convenience. Since it is not + expected to be implemented directly, it may be changed unexpectedly between + releases. +

+
+ + Lower-level search API. + +

+ is called for every document.
+ Collector-based access to remote indexes is discouraged. + +

+ Applications should only use this if they need all of the matching + documents. The high-level search API () is + usually more efficient, as it skips non-high-scoring hits. + +

+ to match documents + + if non-null, used to permit documents to be collected. + + to receive hits + + BooleanQuery.TooManyClauses +
+ + Frees resources associated with this Searcher. + Be careful not to call this method while you are still using objects + that reference this searchable + + + + Expert: Returns the number of documents containing term. + Called by search code to compute term weights. + + + + + + Expert: For each term in the terms array, calculates the number of + documents containing term. Returns an array with these + document frequencies. Used to minimize number of remote calls. + + + + + Expert: Low-level search implementation. Finds the top n + hits for query, applying filter if non-null. + +

Applications should usually call or + instead. +

+ BooleanQuery.TooManyClauses +
+ + Expert: Returns the stored fields of document i. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Get the at the nth position. The + may be used to determine what s to load and how they should be loaded. + + NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the lazy is + loaded an exception may be thrown. If you want the value of a lazy to be available after closing you must + explicitly load it or fetch the Document again with a new loader. + + + + Get the document at the nth position + + The to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. + + The stored fields of the at the nth position + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + + + + + + + + + + + + + Expert: called to re-write queries into primitive queries. + BooleanQuery.TooManyClauses + + + Expert: low-level implementation method + Returns an Explanation that describes how doc scored against + weight. + +

This is intended to be used in developing Similarity implementations, + and, for good performance, should not be displayed with every hit. + Computing an explanation is as expensive as executing the query over the + entire index. +

Applications should call . +

+ BooleanQuery.TooManyClauses +
+ + Expert: Low-level search implementation with arbitrary sorting. Finds + the top n hits for query, applying + filter if non-null, and sorting the hits by the criteria in + sort. + +

Applications should usually call + instead. + +

+ BooleanQuery.TooManyClauses +
+ + Expert: Returns one greater than the largest possible document number. + Called by search code to compute term weights. + + + + + + Search implementation with arbitrary sorting. Finds + the top n hits for query, applying + filter if non-null, and sorting the hits by the criteria in + sort. + +

NOTE: this does not compute scores by default; use + to enable scoring. + +

+ BooleanQuery.TooManyClauses +
+ + Lower-level search API. + +

is called for every matching document. + +

Applications should only use this if they need all of the matching + documents. The high-level search API ( + ) is usually more efficient, as it skips non-high-scoring hits. +

Note: The score passed to this method is a raw score. + In other words, the score will not necessarily be a float whose value is + between 0 and 1. +

+ BooleanQuery.TooManyClauses +
+ + Lower-level search API. + +

is called for every matching + document. +
Collector-based access to remote indexes is discouraged. + +

Applications should only use this if they need all of the + matching documents. The high-level search API () + is usually more efficient, as it skips + non-high-scoring hits. + +

+ to match documents + + if non-null, used to permit documents to be collected. + + to receive hits + + BooleanQuery.TooManyClauses +
+ + Finds the top n + hits for query, applying filter if non-null. + + + BooleanQuery.TooManyClauses + + + Finds the top n + hits for query. + + + BooleanQuery.TooManyClauses + + + Returns an Explanation that describes how doc scored against + query. + +

This is intended to be used in developing Similarity implementations, + and, for good performance, should not be displayed with every hit. + Computing an explanation is as expensive as executing the query over the + entire index. +

+
+ + The Similarity implementation used by this searcher. + + + creates a weight for query + new weight + + + + Expert: Gets or Sets the Similarity implementation used by this Searcher. + + + + + + + Creates a searcher searching the index in the named + directory, with readOnly=true + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Creates a searcher searching the index in the named + directory. You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the underlying IndexReader. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + directory where IndexReader will be opened + + if true, the underlying IndexReader + will be opened readOnly + + + + Creates a searcher searching the provided index + + Note that the underlying IndexReader is not closed, if + IndexSearcher was constructed with IndexSearcher(IndexReader r). + If the IndexReader was supplied implicitly by specifying a directory, then + the IndexReader gets closed. + + + + + + Expert: directly specify the reader, subReaders and their + DocID starts +

+ NOTE: This API is experimental and + might change in incompatible ways in the next + release

+

+
+ + Just like , but you choose + whether or not the fields in the returned instances + should be set by specifying fillFields. +

+ NOTE: this does not compute scores by default. If you need scores, create + a instance by calling + and then pass that to + . +

+

+
+ + By default, no scores are computed when sorting by field (using + ). You can change that, per + IndexSearcher instance, by calling this method. Note that this will incur + a CPU cost. + + + If true, then scores are returned for every matching document + in . + + + If true, then the max score for all matching docs is computed. + + + + Return the this searches. + + + A query that matches all documents. + + + + + Field used for normalization factor (document boost). Null if nothing. + + + + MultiPhraseQuery is a generalized version of PhraseQuery, with an added + method . + To use this class, to search for the phrase "Microsoft app*" first use + add(Term) on the term "Microsoft", then find all terms that have "app" as + prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[] + terms) to add them to the query. + + + 1.0 + + + + Add a single term at the next position in the phrase. + + + + + Add multiple terms at the next position in the phrase. Any of the terms + may match. + + + + + + + Allows to specify the relative position of terms within the phrase. + + + + + + + + + + + Returns a List<Term[]> of the terms in the multiphrase. + Do not modify the List or its contents. + + + + Returns the relative positions of terms in this phrase. + + + Prints a user-readable version of this query. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Gets or sets the phrase slop for this query. + + + + + Implements search over a set of Searchables. + +

Applications usually need only call the inherited + or methods. +

+
+ + Creates a searcher which searches searchers. + + + Return the array of s this searches. + + + Returns index of the searcher for document n in the array + used to construct this searcher. + + + + Returns the document number of document n within its + sub-index. + + + + + + + Create weight in multiple index scenario. + + Distributed query processing is done in the following steps: + 1. rewrite query + 2. extract necessary terms + 3. collect dfs for these terms from the Searchables + 4. create query weight using aggregate dfs. + 5. distribute that weight to Searchables + 6. merge results + + Steps 1-4 are done here, 5+6 in the search() methods + + + rewritten queries + + + + Document Frequency cache acting as a Dummy-Searcher. This class is no + full-fledged Searcher, but only supports the methods necessary to + initialize Weights. + + + + A wrapper for , that exposes its + functionality as a . +

+ MultiTermQueryWrapperFilter is not designed to + be used by itself. Normally you subclass it to provide a Filter + counterpart for a subclass. +

+ For example, and extend + MultiTermQueryWrapperFilter. + This class also provides the functionality behind + ; + this is why it is not abstract. +

+
+ + Wrap a as a Filter. + + + Expert: Resets the counting of unique terms. + Do this before executing the filter. + + + + + + Expert: Return the number of unique terms visited during execution of the filter. + If there are many of them, you may consider using another filter type + or optimize your total term count in index. +

This method is not thread safe, be sure to only call it when no filter is running! + If you re-use the same filter instance for another + search, be sure to first reset the term counter + with . +

+ + +
+ + A that only accepts numeric values within + a specified range. To use this, you must first index the + numeric values using (expert: + ). + +

You create a new NumericRangeFilter with the static + factory methods, eg: + + + Filter f = NumericRangeFilter.newFloatRange("weight", + new Float(0.3f), new Float(0.10f), + true, true); + + + accepts all documents whose float valued "weight" field + ranges from 0.3 to 0.10, inclusive. + See for details on how Lucene + indexes and searches numeric valued fields. + +

NOTE: This API is experimental and + might change in incompatible ways in the next + release. + +

+ 2.9 + + +
+ + Returns the field name for this filter + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the lower value of this range filter + + + Returns the upper value of this range filter + + + Factory that creates a NumericRangeFilter, that filters a long + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a long + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a int + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a int + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a double + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a double + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a float + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a float + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + +

A that matches numeric values within a + specified range. To use this, you must first index the + numeric values using (expert: + ). If your terms are instead textual, + you should use . + is the filter equivalent of this + query.

+ +

You create a new NumericRangeQuery with the static + factory methods, eg: + + + Query q = NumericRangeQuery.newFloatRange("weight", + new Float(0.3f), new Float(0.10f), + true, true); + + + matches all documents whose float valued "weight" field + ranges from 0.3 to 0.10, inclusive. + +

The performance of NumericRangeQuery is much better + than the corresponding because the + number of terms that must be searched is usually far + fewer, thanks to trie indexing, described below.

+ +

You can optionally specify a precisionStep + when creating this query. This is necessary if you've + changed this configuration from its default (4) during + indexing. Lower values consume more disk space but speed + up searching. Suitable values are between 1 and + 8. A good starting point to test is 4, + which is the default value for all Numeric* + classes. See below for + details. + +

This query defaults to + for + 32 bit (int/float) ranges with precisionStep <8 and 64 + bit (long/double) ranges with precisionStep <6. + Otherwise it uses + as the + number of terms is likely to be high. With precision + steps of <4, this query can be run with one of the + BooleanQuery rewrite methods without changing + BooleanQuery's default max clause count. + +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + +

How it works

+ +

See the publication about panFMP, + where this algorithm was described (referred to as TrieRangeQuery): + +

Schindler, U, Diepenbroek, M, 2008. + Generic XML-based Framework for Metadata Portals. + Computers & Geosciences 34 (12), 1947-1955. + doi:10.1016/j.cageo.2008.02.023
+ +

A quote from this paper: Because Apache Lucene is a full-text + search engine and not a conventional database, it cannot handle numerical ranges + (e.g., field value is inside user defined bounds, even dates are numerical values). + We have developed an extension to Apache Lucene that stores + the numerical values in a special string-encoded format with variable precision + (all numerical values like doubles, longs, floats, and ints are converted to + lexicographic sortable string representations and stored with different precisions + (for a more detailed description of how the values are stored, + see ). A range is then divided recursively into multiple intervals for searching: + The center of the range is searched only with the lowest possible precision in the trie, + while the boundaries are matched more exactly. This reduces the number of terms dramatically.

+ +

For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that + uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the + lowest precision. Overall, a range could consist of a theoretical maximum of + 7*255*2 + 255 = 3825 distinct terms (when there is a term for every distinct value of an + 8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used + because it would always be possible to reduce the full 256 values to one term with degraded precision). + In practice, we have seen up to 300 terms in most cases (index with 500,000 metadata records + and a uniform value distribution).

+ +

Precision Step

+

You can choose any precisionStep when encoding values. + Lower step values mean more precisions and so more terms in index (and index gets larger). + On the other hand, the maximum number of terms to match reduces, which optimized query speed. + The formula to calculate the maximum term count is: + + n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 ) + +

(this formula is only correct, when bitsPerValue/precisionStep is an integer; + in other cases, the value must be rounded up and the last summand must contain the modulo of the division as + precision step). + For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465, and for a precision + step of 2, n = 31*3*2 + 3 = 189. But the faster search speed is reduced by more seeking + in the term enum of the index. Because of this, the ideal precisionStep value can only + be found out by testing. Important: You can index with a lower precision step value and test search speed + using a multiple of the original step value.

+ +

Good values for precisionStep are depending on usage and data type: + + The default for all data types is 4, which is used, when no precisionStep is given. + Ideal value in most cases for 64 bit data types (long, double) is 6 or 8. + Ideal value in most cases for 32 bit data types (int, float) is 4. + Steps >64 for long/double and >32 for int/float produces one token + per value in the index and querying is as slow as a conventional . But it can be used + to produce fields, that are solely used for sorting (in this case simply use as + precisionStep). Using NumericFields for sorting + is ideal, because building the field cache is much faster than with text-only numbers. + Sorting is also possible with range query optimized fields using one of the above precisionSteps. + + +

Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed + that in boolean rewrite mode (with raised clause count) + took about 30-40 secs to complete, in constant score filter rewrite mode took 5 secs + and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit + precision step). This query type was developed for a geographic portal, where the performance for + e.g. bounding boxes or exact date/time stamps is important.

+ +

+ 2.9 + + +
+ + Returns the field name for this query + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the lower value of this range query + + + Returns the upper value of this range query + + + Subclass of FilteredTermEnum for enumerating all terms that match the + sub-ranges for trie range queries. +

+ WARNING: This term enumeration is not guaranteed to be always ordered by + . + The ordering depends on how and + generates the sub-ranges. For + ordering is not relevant. +

+
+ + this is a dummy, it is not used by this class. + + + this is a dummy, it is not used by this class. + + + Compares if current upper bound is reached, + this also updates the term count for statistics. + In contrast to , a return value + of false ends iterating the current enum + and forwards to the next sub-range. + + + + Increments the enumeration to the next element. True if one exists. + + + Closes the enumeration to further activity, freeing resources. + + + Expert: Callback for . + You need to overwrite only one of the methods. +

NOTE: This is a very low-level interface, + the method signatures may change in later versions. +

+
+ + This is a helper class to generate prefix-encoded representations for numerical values + and supplies converters to represent float/double values as sortable integers/longs. + +

To quickly execute range queries in Apache Lucene, a range is divided recursively + into multiple intervals for searching: The center of the range is searched only with + the lowest possible precision in the trie, while the boundaries are matched + more exactly. This reduces the number of terms dramatically. + +

This class generates terms to achive this: First the numerical integer values need to + be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned + and the bits are converted to ASCII chars with each 7 bit. The resulting string is + sortable like the original integer value. Each value is also prefixed + (in the first char) by the shift value (number of bits removed) used + during encoding. + +

To also index floating point numbers, this class supplies two methods to convert them + to integer values by changing their bit layout: , + . You will have no precision loss by + converting floating point numbers to integers and back (only that the integer form + is not usable). Other data types like dates can easily converted to longs or ints (e.g. + date to long: ). + +

For easy usage, the trie algorithm is implemented for indexing inside + that can index int, long, + float, and double. For querying, + and implement the query part + for the same data types. + +

This class can also be used, to generate lexicographically sortable (according + ) representations of numeric data types for other + usages (e.g. sorting). + +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + +

+ 2.9 + +
+ + The default precision step used by , , + , and as default + + + + Expert: The maximum term length (used for char[] buffer size) + for encoding long values. + + + + + + Expert: The maximum term length (used for char[] buffer size) + for encoding int values. + + + + + + Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is + stored as SHIFT_START_LONG+shift in the first character + + + + Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is + stored as SHIFT_START_INT+shift in the first character + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by . + + the numeric value + + how many bits to strip from the right + + that will contain the encoded chars, must be at least of + length + + number of chars written to buffer + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by . + + the numeric value + + how many bits to strip from the right + + + + This is a convenience method, that returns prefix coded bits of a long without + reducing the precision. It can be used to store the full precision value as a + stored field in index. +

To decode, use . +

+
+ + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by . + + the numeric value + + how many bits to strip from the right + + that will contain the encoded chars, must be at least of + length + + number of chars written to buffer + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by . + + the numeric value + + how many bits to strip from the right + + + + This is a convenience method, that returns prefix coded bits of an int without + reducing the precision. It can be used to store the full precision value as a + stored field in index. +

To decode, use . +

+
+ + Returns a long from prefixCoded characters. + Rightmost bits will be zero for lower precision codes. + This method can be used to decode e.g. a stored field. + + NumberFormatException if the supplied string is + not correctly prefix encoded. + + + + + + Returns an int from prefixCoded characters. + Rightmost bits will be zero for lower precision codes. + This method can be used to decode e.g. a stored field. + + NumberFormatException if the supplied string is + not correctly prefix encoded. + + + + + + Converts a double value to a sortable signed long. + The value is converted by getting their IEEE 754 floating-point "double format" + bit layout and then some bits are swapped, to be able to compare the result as long. + By this the precision is not reduced, but the value can easily used as a long. + + + + + + Convenience method: this just returns: + longToPrefixCoded(doubleToSortableLong(val)) + + + + Converts a sortable long back to a double. + + + + + Convenience method: this just returns: + sortableLongToDouble(prefixCodedToLong(val)) + + + + Converts a float value to a sortable signed int. + The value is converted by getting their IEEE 754 floating-point "float format" + bit layout and then some bits are swapped, to be able to compare the result as int. + By this the precision is not reduced, but the value can easily used as an int. + + + + + + Convenience method: this just returns: + intToPrefixCoded(floatToSortableInt(val)) + + + + Converts a sortable int back to a float. + + + + + Convenience method: this just returns: + sortableIntToFloat(prefixCodedToInt(val)) + + + + Expert: Splits a long range recursively. + You may implement a builder that adds clauses to a + for each call to its + + method. +

This method is used by . +

+
+ + Expert: Splits an int range recursively. + You may implement a builder that adds clauses to a + for each call to its + + method. +

This method is used by . +

+
+ + This helper does the splitting for both 32 and 64 bit. + + + Helper that delegates to correct range builder + + + Expert: Callback for . + You need to overwrite only one of the methods. +

NOTE: This is a very low-level interface, + the method signatures may change in later versions. +

+
+ + Overwrite this method, if you like to receive the already prefix encoded range bounds. + You can directly build classical (inclusive) range queries from them. + + + + Overwrite this method, if you like to receive the raw long range bounds. + You can use this for e.g. debugging purposes (print out range bounds). + + + + Expert: Callback for . + You need to overwrite only one of the methods. +

NOTE: This is a very low-level interface, + the method signatures may change in later versions. +

+
+ + Overwrite this method, if you like to receive the already prefix encoded range bounds. + You can directly build classical range (inclusive) queries from them. + + + + Overwrite this method, if you like to receive the raw int range bounds. + You can use this for e.g. debugging purposes (print out range bounds). + + + +
Factory that creates a NumericRangeQuery, that queries a long + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a long + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a int + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a int + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a double + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a double + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a float + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a float + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Calculate the final score as the average score of all payloads seen. +

+ Is thread safe and completely reusable. + + +

+
+ + An abstract class that defines a way for Payload*Query instances + to transform the cumulative effects of payload scores for a document. + + + for more information + +

+ This class and its derivations are experimental and subject to change + + + + + +

Calculate the score up to this point for this doc and field + The current doc + + The field + + The start position of the matching Span + + The end position of the matching Span + + The number of payloads seen so far + + The current score so far + + The score for the current payload + + The new current Score + + + + +
+ + Calculate the final score for all the payloads seen so far for this doc/field + The current doc + + The current field + + The total number of payloads seen on this document + + The raw score for those payloads + + The final score for the payloads + + + + Returns the maximum payload score seen, else 1 if there are no payloads on the doc. +

+ Is thread safe and completely reusable. + + +

+
+ + Calculates the minimum payload seen + + + + + + This class is very similar to + except that it factors + in the value of the payloads located at each of the positions where the + occurs. +

+ In order to take advantage of this, you must override + + which returns 1 by default. +

+ Payload scores are aggregated using a pluggable . + +

+ + +
+ + Matches spans which are near one another. One can specify slop, the + maximum number of intervening unmatched positions, as well as whether + matches are required to be in-order. + + + + Base class for span-based queries. + + + Expert: Returns the matches for this query in an index. Used internally + to search for spans. + + + + Returns the name of the field matched by this query. + + + Construct a SpanNearQuery. Matches spans matching a span from each + clause, with up to slop total unmatched positions between + them. * When inOrder is true, the spans from each clause + must be * ordered as in clauses. + + + + Return the clauses whose spans are matched. + + + Returns true iff o is equal to this. + + + Return the maximum number of intervening unmatched positions permitted. + + + Return true if matches are required to be in-order. + + + Expert-only. Public for use by other weight implementations + + + Public for extension only. + + + + This method is no longer an official member of + but it is needed by SpanWeight to build an explanation. + + + + By default, uses the to score the payloads, but + can be overridden to do other things. + + + The payloads + + The start position of the span being scored + + The end position of the span being scored + + + + + + + Experimental class to get set of payloads for most standard Lucene queries. + Operates like Highlighter - IndexReader should only contain doc of interest, + best to use MemoryIndex. + +

+ + WARNING: The status of the Payloads feature is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+
+ + that contains doc with payloads to extract + + + + Query should be rewritten for wild/fuzzy support. + + + + + payloads Collection + + IOException + + + This class is very similar to + except that it factors + in the value of the payload located at each of the positions where the + occurs. +

+ In order to take advantage of this, you must override + + which returns 1 by default. +

+ Payload scores are aggregated using a pluggable . + +

+
+ + Matches spans containing a term. + + + Construct a SpanTermQuery matching the named term's spans. + + + Return the term whose spans are matched. + + + + * + + IOException + + + Returns the SpanScorer score only. +

+ Should not be overriden without good cause! + +

+ the score for just the Span part w/o the payload + + IOException + + + + +
+ + The score for the payload + + + The score, as calculated by + + + + + Position of a term in a document that takes into account the term offset within the phrase. + + + Go to next location of this term current document, and set + position as location - offset, so that a + matching exact phrase is easily identified when all PhrasePositions + have exactly the same position. + + + + A Query that matches documents containing a particular sequence of terms. + A PhraseQuery is built by QueryParser for input like "new york". + +

This query may be combined with other terms or queries with a . +

+
+ + Constructs an empty phrase query. + + + Adds a term to the end of the query phrase. + The relative position of the term is the one immediately after the last term added. + + + + Adds a term to the end of the query phrase. + The relative position of the term within the phrase is specified explicitly. + This allows e.g. phrases with more than one term at the same position + or phrases with gaps (e.g. in connection with stopwords). + + + + + + + + + Returns the set of terms in this phrase. + + + Returns the relative positions of terms in this phrase. + + + + + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Sets the number of other words permitted between words in query phrase. + If zero, then this is an exact phrase search. For larger values this works + like a WITHIN or NEAR operator. +

The slop is in fact an edit-distance, where the units correspond to + moves of terms in the query phrase out of position. For example, to switch + the order of two words requires two moves (the first move places the words + atop one another), so to permit re-orderings of phrases, the slop must be + at least two. +

More exact matches are scored higher than sloppier matches, thus search + results are sorted by exactness. +

The slop is zero by default, requiring exact matches. +

+
+ + A implementation which wraps another + and makes sure only documents with + scores > 0 are collected. + + + + A Filter that restricts search results to values that have a matching prefix in a given + field. + + + + Prints a user-readable version of this query. + + + A Query that matches documents containing terms with a specified prefix. A PrefixQuery + is built by QueryParser for input like app*. + +

This query uses the + + rewrite method. +

+
+ + Constructs a query for terms starting with prefix. + + + Prints a user-readable version of this query. + + + Returns the prefix of this query. + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified prefix filter term. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. + +

+
+ + + + + + + + + The original list of terms from the query, can contain duplicates + + + + Constrains search results to only match those which also match a provided + query. + +

This could be used, for example, with a on a suitably + formatted date field to implement date filtering. One could re-use a single + QueryFilter that matches, e.g., only documents modified within the last + week. The QueryFilter and TermRangeQuery would only need to be reconstructed + once per day. + +

+ $Id:$ + +
+ + Constructs a filter which only matches documents matching + query. + + + + A Scorer for queries with a required subscorer + and an excluding (prohibited) sub DocIdSetIterator. +
+ This Scorer implements , + and it uses the skipTo() on the given scorers. +
+
+ + Construct a ReqExclScorer. + The scorer that must match, except where + + indicates exclusion. + + + + Advance to non excluded doc. +
On entry: + + reqScorer != null, + exclScorer != null, + reqScorer was advanced once via next() or skipTo() + and reqScorer.doc() may still be excluded. + + Advances reqScorer a non excluded required doc, if any. +
+ true iff there is a non excluded required doc. + +
+ + Returns the score of the current document matching the query. + Initially invalid, until is called the first time. + + The score of the required scorer. + + + + A Scorer for queries with a required part and an optional part. + Delays skipTo() on the optional part until a score() is needed. +
+ This Scorer implements . +
+
+ + The scorers passed from the constructor. + These are set to null as soon as their next() or skipTo() returns false. + + + + Construct a ReqOptScorer. + The required scorer. This must match. + + The optional scorer. This is used for scoring only. + + + + Returns the score of the current document matching the query. + Initially invalid, until is called the first time. + + The score of the required scorer, eventually increased by the score + of the optional scorer when it also matches the current document. + + + + A which wraps another scorer and caches the score of the + current document. Successive calls to will return the same + result and will not invoke the wrapped Scorer's score() method, unless the + current document has changed.
+ This class might be useful due to the changes done to the + interface, in which the score is not computed for a document by default, only + if the collector requests it. Some collectors may need to use the score in + several places, however all they have in hand is a object, and + might end up computing the score of a document more than once. +
+
+ + Creates a new instance by wrapping the given scorer. + + + + Subclass of FilteredTermEnum for enumerating a single term. +

+ This can be used by s that need only visit one term, + but want to preserve MultiTermQuery semantics such as + . +

+
+ + + Creates a new SingleTermEnum. +

+ After calling the constructor the enumeration is already pointing to the term, + if it exists. +

+
+ + Score a candidate doc for all slop-valid position-combinations (matches) + encountered while traversing/hopping the PhrasePositions. +
The score contribution of a match depends on the distance: +
- highest score for distance=0 (exact match). +
- score gets lower as distance gets higher. +
Example: for query "a b"~2, a document "x a b a y" can be scored twice: + once for "a b" (distance=0), and once for "b a" (distance=2). +
Possibly not all valid combinations are encountered, because for efficiency + we always propagate the least PhrasePosition. This allows to base on + PriorityQueue and move forward faster. + As result, for example, document "a b c b a" + would score differently for queries "a b c"~4 and "c b a"~4, although + they really are equivalent. + Similarly, for doc "a b c b a f g", query "c b"~2 + would get same score as "g f"~2, although "c b"~2 could be matched twice. + We may want to fix this in the future (currently not, for performance reasons). +
+
+ + Init PhrasePositions in place. + There is a one time initialization for this scorer: +
- Put in repeats[] each pp that has another pp with same position in the doc. +
- Also mark each such pp by pp.repeats = true. +
Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. + In particular, this allows to score queries with no repetitions with no overhead due to this computation. +
- Example 1 - query with no repetitions: "ho my"~2 +
- Example 2 - query with repetitions: "ho my my"~2 +
- Example 3 - query with repetitions: "my ho my"~2 +
Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection. +
+ end (max position), or -1 if any term ran out (i.e. done) + + IOException +
+ + We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences + in the query of the same word would go elsewhere in the matched doc. + + null if differ (i.e. valid) otherwise return the higher offset PhrasePositions + out of the first two PPs found to not differ. + + + + Encapsulates sort criteria for returned hits. + +

The fields used to determine sort order must be carefully chosen. + Documents must contain a single term in such a field, + and the value of the term should indicate the document's relative position in + a given sort order. The field must be indexed, but should not be tokenized, + and does not need to be stored (unless you happen to want it back with the + rest of your document data). In other words: + +

document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.NOT_ANALYZED));

+ + +

Valid Types of Values

+ +

There are four possible kinds of term values which may be put into + sorting fields: Integers, Longs, Floats, or Strings. Unless + SortField objects are specified, the type of value + in the field is determined by parsing the first term in the field. + +

Integer term values should contain only digits and an optional + preceding negative sign. Values must be base 10 and in the range + Integer.MIN_VALUE and Integer.MAX_VALUE inclusive. + Documents which should appear first in the sort + should have low value integers, later documents high values + (i.e. the documents should be numbered 1..n where + 1 is the first and n the last). + +

Long term values should contain only digits and an optional + preceding negative sign. Values must be base 10 and in the range + Long.MIN_VALUE and Long.MAX_VALUE inclusive. + Documents which should appear first in the sort + should have low value integers, later documents high values. + +

Float term values should conform to values accepted by + (except that NaN + and Infinity are not supported). + Documents which should appear first in the sort + should have low values, later documents high values. + +

String term values can contain any valid String, but should + not be tokenized. The values are sorted according to their + natural order. Note that using this type + of term value has higher memory requirements than the other + two types. + +

Object Reuse

+ +

One of these objects can be + used multiple times and the sort order changed between usages. + +

This class is thread safe. + +

Memory Usage

+ +

Sorting uses of caches of term values maintained by the + internal HitQueue(s). The cache is static and contains an integer + or float array of length IndexReader.MaxDoc for each field + name for which a sort is performed. In other words, the size of the + cache in bytes is: + +

4 * IndexReader.MaxDoc * (# of different fields actually used to sort) + +

For String fields, the cache is larger: in addition to the + above array, the value of every term in the field is kept in memory. + If there are many unique terms in the field, this could + be quite large. + +

Note that the size of the cache is not affected by how many + fields are in the index and might be used to sort - only by + the ones actually used to sort a result set. + +

Created: Feb 12, 2004 10:53:57 AM + +

+
+ + Represents sorting by computed relevance. Using this sort criteria returns + the same results as calling + Searcher#search()without a sort criteria, + only with slightly more overhead. + + + + Represents sorting by index order. + + + Sorts by computed relevance. This is the same sort criteria as calling + without a sort criteria, + only with slightly more overhead. + + + + Sorts by the criteria in the given SortField. + + + Sorts in succession by the criteria in each SortField. + + + Sets the sort to the given criteria. + + + Sets the sort to the given criteria in succession. + + + Representation of the sort criteria. + Array of SortField objects used in this sort criteria + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Stores information about how to sort documents by terms in an individual + field. Fields must be indexed in order to sort by them. + +

Created: Feb 11, 2004 1:25:29 PM +

+ +
+ + Sort by document score (relevancy). Sort values are Float and higher + values are at the front. + + + + Sort by document number (index order). Sort values are Integer and lower + values are at the front. + + + + Sort using term values as Strings. Sort values are String and lower + values are at the front. + + + + Sort using term values as encoded Integers. Sort values are Integer and + lower values are at the front. + + + + Sort using term values as encoded Floats. Sort values are Float and + lower values are at the front. + + + + Sort using term values as encoded Longs. Sort values are Long and + lower values are at the front. + + + + Sort using term values as encoded Doubles. Sort values are Double and + lower values are at the front. + + + + Sort using term values as encoded Shorts. Sort values are Short and + lower values are at the front. + + + + Sort using a custom Comparator. Sort values are any Comparable and + sorting is done according to natural order. + + + + Sort using term values as encoded Bytes. Sort values are Byte and + lower values are at the front. + + + + Sort using term values as Strings, but comparing by + value (using String.compareTo) for all comparisons. + This is typically slower than , which + uses ordinals to do the sorting. + + + + Represents sorting by document score (relevancy). + + + Represents sorting by document number (index order). + + + Creates a sort by terms in the given field with the type of term + values explicitly given. + + Name of field to sort by. Can be null if + type is SCORE or DOC. + + Type of values in the terms. + + + + Creates a sort, possibly in reverse, by terms in the given field with the + type of term values explicitly given. + + Name of field to sort by. Can be null if + type is SCORE or DOC. + + Type of values in the terms. + + True if natural order should be reversed. + + + + Creates a sort by terms in the given field, parsed + to numeric values using a custom . + + Name of field to sort by. Must not be null. + + Instance of a , + which must subclass one of the existing numeric + parsers from . Sort type is inferred + by testing which numeric parser the parser subclasses. + + IllegalArgumentException if the parser fails to + subclass an existing numeric parser, or field is null + + + + Creates a sort, possibly in reverse, by terms in the given field, parsed + to numeric values using a custom . + + Name of field to sort by. Must not be null. + + Instance of a , + which must subclass one of the existing numeric + parsers from . Sort type is inferred + by testing which numeric parser the parser subclasses. + + True if natural order should be reversed. + + IllegalArgumentException if the parser fails to + subclass an existing numeric parser, or field is null + + + + Creates a sort by terms in the given field sorted + according to the given locale. + + Name of field to sort by, cannot be null. + + Locale of values in the field. + + + + Creates a sort, possibly in reverse, by terms in the given field sorted + according to the given locale. + + Name of field to sort by, cannot be null. + + Locale of values in the field. + + + + Creates a sort with a custom comparison function. + Name of field to sort by; cannot be null. + + Returns a comparator for sorting hits. + + + + Creates a sort, possibly in reverse, with a custom comparison function. + Name of field to sort by; cannot be null. + + Returns a comparator for sorting hits. + + True if natural order should be reversed. + + + + Returns true if o is equal to this. If a + or + was provided, it must properly + implement equals (unless a singleton is always used). + + + + Returns true if o is equal to this. If a + (deprecated) or + was provided, it must properly + implement hashCode (unless a singleton is always + used). + + + + Returns the to use for + sorting. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + number of top hits the queue will store + + position of this SortField within + . The comparator is primary if sortPos==0, + secondary if sortPos==1, etc. Some comparators can + optimize themselves when they are the primary sort. + + to use when sorting + + + + Returns the name of the field. Could return null + if the sort is by SCORE or DOC. + + Name of field, possibly <c>null</c>. + + + Returns the type of contents in the field. + One of the constants SCORE, DOC, STRING, INT or FLOAT. + + + Returns the Locale by which term values are interpreted. + May return null if no Locale was specified. + + Locale, or <c>null</c>. + + + Returns the instance of a parser that fits to the given sort type. + May return null if no parser was specified. Sorting is using the default parser then. + + An instance of a <see cref="FieldCache" /> parser, or <c>null</c>. + + + Returns whether the sort should be reversed. + True if natural order should be reversed. + + + + Returns the used for + custom sorting + + + + The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery + +

+ NOTE: This API is still experimental and subject to change. +

+
+ + + The DocIdSet for the Filter + + A List of objects + + + + The first entry in the array corresponds to the first "on" bit. + Entries are increasing by document order + + A List of PositionInfo objects + + + Returns the docIdSet + + + + A List of <see cref="Lucene.Net.Search.SpanFilterResult.StartEnd" /> objects + + + + The end position of this match + + + The Start position + The start position of this match + + + Constrains search results to only match those which also match a provided + query. Also provides position information about where each document matches + at the cost of extra space compared with the QueryWrapperFilter. + There is an added cost to this above what is stored in a . Namely, + the position information for each matching document is stored. +

+ This filter does not cache. See the for a wrapper that + caches. + + +

+ $Id:$ + +
+ + Constructs a filter which only matches documents matching + query. + + The to use as the basis for the Filter. + + + +

Wrapper to allow objects participate in composite + single-field SpanQueries by 'lying' about their search field. That is, + the masked SpanQuery will function as normal, + but simply hands back the value supplied + in this class's constructor.

+ +

This can be used to support Queries like or + across different fields, which is not ordinarily + permitted.

+ +

This can be useful for denormalized relational data: for example, when + indexing a document with conceptually many 'children':

+ +

+            teacherid: 1
+            studentfirstname: james
+            studentsurname: jones
+            
+            teacherid: 2
+            studenfirstname: james
+            studentsurname: smith
+            studentfirstname: sally
+            studentsurname: jones
+            
+ +

a SpanNearQuery with a slop of 0 can be applied across two + objects as follows: + + SpanQuery q1 = new SpanTermQuery(new Term("studentfirstname", "james")); + SpanQuery q2 = new SpanTermQuery(new Term("studentsurname", "jones")); + SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname"); + Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false); + + to search for 'studentfirstname:james studentsurname:jones' and find + teacherid 1 without matching teacherid 2 (which has a 'james' in position 0 + and 'jones' in position 1).

+ +

Note: as returns the masked field, scoring will be + done using the norms of the field name supplied. This may lead to unexpected + scoring behaviour.

+

+
+ + A Spans that is formed from the ordered subspans of a SpanNearQuery + where the subspans do not overlap and have a maximum slop between them. +

+ The formed spans only contains minimum slop matches.
+ The matching slop is computed from the distance(s) between + the non overlapping matching Spans.
+ Successive matches are always formed from the successive Spans + of the SpanNearQuery. +

+ The formed spans may contain overlaps when the slop is at least 1. + For example, when querying using + t1 t2 t3 + with slop at least 1, the fragment: + t1 t2 t1 t3 t2 t3 + matches twice: + t1 t2 .. t3 + t1 .. t2 t3 + + + Expert: + Only public for subclassing. Most implementations should not need this class +

+
+ + Expert: an enumeration of span matches. Used to implement span searching. + Each span represents a range of term positions within a document. Matches + are enumerated in order, by increasing document number, within that by + increasing start position and finally by increasing end position. + + + + Move to the next match, returning true iff any such exists. + + + Skips to the first match beyond the current, whose document number is + greater than or equal to target.

Returns true iff there is such + a match.

Behaves as if written: + boolean skipTo(int target) { + do { + if (!next()) + return false; + } while (target > doc()); + return true; + } + + Most implementations are considerably more efficient than that. +

+
+ + Returns the document number of the current match. Initially invalid. + + + Returns the start position of the current match. Initially invalid. + + + Returns the end position of the current match. Initially invalid. + + + Returns the payload data for the current span. + This is invalid until is called for + the first time. + This method must not be called more than once after each call + of . However, most payloads are loaded lazily, + so if the payload data for the current position is not needed, + this method may not be called at all for performance reasons. An ordered + SpanQuery does not lazy load, so if you have payloads in your index and + you do not want ordered SpanNearQuerys to collect payloads, you can + disable collection with a constructor option.
+ + Note that the return type is a collection, thus the ordering should not be relied upon. +
+

+ WARNING: The status of the Payloads feature is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case.

+ +

+ a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false + java.io.IOException +
+ + Checks if a payload can be loaded at this position. +

+ Payloads can only be loaded once per call to + . + +

+ true if there is a payload available at this position that can be loaded +
+ + The spans in the same order as the SpanNearQuery + + + Indicates that all subSpans have same doc() + + + Advances the subSpans to just after an ordered match with a minimum slop + that is smaller than the slop allowed by the SpanNearQuery. + + true iff there is such a match. + + + + Advance the subSpans to the same document + + + Check whether two Spans in the same document are ordered. + + + + + true iff spans1 starts before spans2 + or the spans start at the same position, + and spans1 ends before spans2. + + + + Like , but use the spans + starts and ends as parameters. + + + + Order the subSpans within the same document by advancing all later spans + after the previous one. + + + + The subSpans are ordered in the same doc, so there is a possible match. + Compute the slop while making the match as short as possible by advancing + all subSpans except the last one in reverse order. + + + + Similar to , but for the unordered case. + + Expert: + Only public for subclassing. Most implementations should not need this class + + + + WARNING: The List is not necessarily in order of the the positions + Collection of &lt;c&gt;byte[]&lt;/c&gt; payloads + IOException + + + Wraps a Spans, and can be used to form a linked list. + + + Matches spans near the beginning of a field. + + + Construct a SpanFirstQuery matching spans in match whose end + position is less than or equal to end. + + + + Return the SpanQuery whose matches are filtered. + + + Return the maximum end position permitted in a match. + + + Removes matches which overlap with another SpanQuery. + + + Construct a SpanNotQuery matching spans from include which + have no overlap with spans from exclude. + + + + Returns true iff o is equal to this. + + + Return the SpanQuery whose matches are filtered. + + + Return the SpanQuery whose matches must not overlap those returned. + + + Matches the union of its clauses. + + + Construct a SpanOrQuery merging the provided clauses. + + + Return the clauses whose spans are matched. + + + Expert: + Public for extension only + + + + A Query that matches documents containing a term. + This may be combined with other terms with a . + + + + Constructs a query for the term t. + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Returns the term of this query. + + + A Filter that restricts search results to a range of values in a given + field. + +

This filter matches the documents looking for terms that fall into the + supplied range according to . It is not intended + for numerical ranges, use instead. + +

If you construct a large number of range filters with different ranges but on the + same field, may have significantly better performance. +

+ 2.9 + +
+ + The field this range applies to + + The lower bound on this range + + The upper bound on this range + + Does this range include the lower bound? + + Does this range include the upper bound? + + IllegalArgumentException if both terms are null or if + lowerTerm is null and includeLower is true (similar for upperTerm + and includeUpper) + + + + WARNING: Using this constructor and supplying a non-null + value in the collator parameter will cause every single + index Term in the Field referenced by lowerTerm and/or upperTerm to be + examined. Depending on the number of index Terms in this Field, the + operation could be very slow. + + + + The lower bound on this range + + The upper bound on this range + + Does this range include the lower bound? + + Does this range include the upper bound? + + The collator to use when determining range inclusion; set + to null to use Unicode code point ordering instead of collation. + + IllegalArgumentException if both terms are null or if + lowerTerm is null and includeLower is true (similar for upperTerm + and includeUpper) + + + + Constructs a filter for field fieldName matching + less than or equal to upperTerm. + + + + Constructs a filter for field fieldName matching + greater than or equal to lowerTerm. + + + + Returns the field name for this filter + + + Returns the lower value of this range filter + + + Returns the upper value of this range filter + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the collator used to determine range inclusion, if any. + + + A Query that matches documents within an exclusive range of terms. + +

This query matches the documents looking for terms that fall into the + supplied range according to . It is not intended + for numerical ranges, use instead. + +

This query uses the + + rewrite method. +

+ 2.9 + +
+ + Constructs a query selecting all terms greater/equal than lowerTerm + but less/equal than upperTerm. + +

+ If an endpoint is null, it is said + to be "open". Either or both endpoints may be open. Open endpoints may not + be exclusive (you can't select all but the first or last term without + explicitly specifying the term to exclude.) + +

+ The field that holds both lower and upper terms. + + The term text at the lower end of the range + + The term text at the upper end of the range + + If true, the lowerTerm is + included in the range. + + If true, the upperTerm is + included in the range. + +
+ + Constructs a query selecting all terms greater/equal than + lowerTerm but less/equal than upperTerm. +

+ If an endpoint is null, it is said + to be "open". Either or both endpoints may be open. Open endpoints may not + be exclusive (you can't select all but the first or last term without + explicitly specifying the term to exclude.) +

+ If collator is not null, it will be used to decide whether + index terms are within the given range, rather than using the Unicode code + point order in which index terms are stored. +

+ WARNING: Using this constructor and supplying a non-null + value in the collator parameter will cause every single + index Term in the Field referenced by lowerTerm and/or upperTerm to be + examined. Depending on the number of index Terms in this Field, the + operation could be very slow. + +

+ + The Term text at the lower end of the range + + The Term text at the upper end of the range + + If true, the lowerTerm is + included in the range. + + If true, the upperTerm is + included in the range. + + The collator to use to collate index Terms, to determine + their membership in the range bounded by lowerTerm and + upperTerm. + +
+ + Prints a user-readable version of this query. + + + Returns the field name for this query + + + Returns the lower value of this range query + + + Returns the upper value of this range query + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the collator used to determine range inclusion, if any. + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified range parameters. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+ 2.9 + +
+ + Enumerates all terms greater/equal than lowerTerm + but less/equal than upperTerm. + + If an endpoint is null, it is said to be "open". Either or both + endpoints may be open. Open endpoints may not be exclusive + (you can't select all but the first or last term without + explicitly specifying the term to exclude.) + + + + + An interned field that holds both lower and upper terms. + + The term text at the lower end of the range + + The term text at the upper end of the range + + If true, the lowerTerm is included in the range. + + If true, the upperTerm is included in the range. + + The collator to use to collate index Terms, to determine their + membership in the range bounded by lowerTerm and + upperTerm. + + + IOException + + + Expert: A Scorer for documents matching a Term. + + + Construct a TermScorer. + + + The weight of the Term in the query. + + An iterator over the documents matching the Term. + + The Similarity implementation to be used for score + computations. + + The field norms of the document fields for the Term. + + + + Advances to the next document matching the query.
+ The iterator over the matching documents is buffered using + . + +
+ the document matching the query or -1 if there are no more documents. + +
+ + Advances to the first match beyond the current whose document number is + greater than or equal to a given target.
+ The implementation uses . + +
+ The target document number. + + the matching document or -1 if none exist. + +
+ + Returns a string representation of this TermScorer. + + + The is used to timeout search requests that + take longer than the maximum allowed search time limit. After this time is + exceeded, the search thread is stopped by throwing a + . + + + + Default timer resolution. + + + + + Default for . + + + + + Create a TimeLimitedCollector wrapper over another with a specified timeout. + the wrapped + + max time allowed for collecting hits after which is thrown + + + + Calls on the decorated + unless the allowed time has passed, in which case it throws an exception. + + + TimeExceededException + if the time allowed has exceeded. + + + + + Gets or sets the timer resolution. + The default timer resolution is 20 milliseconds. + This means that a search required to take no longer than + 800 milliseconds may be stopped after 780 to 820 milliseconds. +
Note that: + + Finer (smaller) resolution is more accurate but less efficient. + Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds. + Setting resolution smaller than current resolution might take effect only after current + resolution. (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, + then it can take up to 20 milliseconds for the change to have effect. + +
+
+ + Checks if this time limited collector is greedy in collecting the last hit. + A non greedy collector, upon a timeout, would throw a + without allowing the wrapped collector to collect current doc. A greedy one would + first allow the wrapped hit collector to collect current doc and only then + throw a . + + + + TimerThread provides a pseudo-clock service to all searching + threads, so that they can count elapsed time with less overhead + than repeatedly calling System.currentTimeMillis. A single + thread should be created to be used for all searches. + + + + Get the timer value in milliseconds. + + + Thrown when elapsed search time exceeds allowed search time. + + + Returns allowed time (milliseconds). + + + Returns elapsed time (milliseconds). + + + Returns last doc(absolute doc id) that was collected when the search time exceeded. + + + Represents hits returned by + and + + + + Constructs a TopDocs with a default maxScore=Float.NaN. + + + + + + The total number of hits for the query. + + + The top hits for the query. + + + + Gets or sets the maximum score value encountered, needed for normalizing. + Note that in case scores are not tracked, this returns . + + + + A base class for all collectors that return a output. This + collector allows easy extension by providing a single constructor which + accepts a as well as protected members for that + priority queue and a counter of the number of total hits.
+ Extending classes can override and + in order to provide their own implementation. +
+
+ + The priority queue which holds the top documents. Note that different + implementations of PriorityQueue give different meaning to 'top documents'. + HitQueue for example aggregates the top scoring documents, while other PQ + implementations may hold documents sorted by other criteria. + + + + The total number of documents that the collector encountered. + + + Populates the results array with the ScoreDoc instaces. This can be + overridden in case a different ScoreDoc type should be returned. + + + + Returns a instance containing the given results. If + results is null it means there are no results to return, + either because there were 0 calls to collect() or because the arguments to + topDocs were invalid. + + + + Returns the top docs that were collected by this collector. + + + Returns the documents in the rage [start .. pq.size()) that were collected + by this collector. Note that if start >= pq.size(), an empty TopDocs is + returned.
+ This method is convenient to call if the application allways asks for the + last results, starting from the last 'page'.
+ NOTE: you cannot call this method more than once for each search + execution. If you need to call it more than once, passing each time a + different start, you should call and work + with the returned object, which will contain all the + results this search execution collected. +
+
+ + Returns the documents in the rage [start .. start+howMany) that were + collected by this collector. Note that if start >= pq.size(), an empty + TopDocs is returned, and if pq.size() - start < howMany, then only the + available documents in [start .. pq.size()) are returned.
+ This method is useful to call in case pagination of search results is + allowed by the search application, as well as it attempts to optimize the + memory used by allocating only as much as requested by howMany.
+ NOTE: you cannot call this method more than once for each search + execution. If you need to call it more than once, passing each time a + different range, you should call and work with the + returned object, which will contain all the results this + search execution collected. +
+
+ + The total number of documents that matched this query. + + + A that sorts by using + s. +

+ See the method + for instantiating a TopFieldCollector. + +

NOTE: This API is experimental and might change in + incompatible ways in the next release.

+

+
+ + Creates a new from the given + arguments. + +

NOTE: The instances returned by this method + pre-allocate a full array of length + numHits. + +

+ the sort criteria (SortFields). + + the number of results to collect. + + specifies whether the actual field values should be returned on + the results (FieldDoc). + + specifies whether document scores should be tracked and set on the + results. Note that if set to false, then the results' scores will + be set to Float.NaN. Setting this to true affects performance, as + it incurs the score computation on each competitive result. + Therefore if document scores are not required by the application, + it is recommended to set it to false. + + specifies whether the query's maxScore should be tracked and set + on the resulting . Note that if set to false, + returns Float.NaN. Setting this to + true affects performance as it incurs the score computation on + each result. Also, setting this true automatically sets + trackDocScores to true as well. + + specifies whether documents are scored in doc Id order or not by + the given in . + + a instance which will sort the results by + the sort criteria. + + IOException +
+ + + Represents hits returned by . + + + + The fields which were used to sort results by. + + + Creates one of these objects. + Total number of hits for the query. + + The top hits for the query. + + The sort criteria used to find the top hits. + + The maximum score encountered. + + + + A implementation that collects the top-scoring hits, + returning them as a . This is used by to + implement -based search. Hits are sorted by score descending + and then (when the scores are tied) docID ascending. When you create an + instance of this collector you should know in advance whether documents are + going to be collected in doc Id order or not. + +

NOTE: The values and + are not valid scores. This + collector will not properly collect hits with such + scores. +

+
+ + Creates a new given the number of hits to + collect and whether documents are scored in order by the input + to . + +

NOTE: The instances returned by this method + pre-allocate a full array of length + numHits, and fill the array with sentinel + objects. +

+
+ + Implements the wildcard search query. Supported wildcards are *, which + matches any character sequence (including the empty one), and ?, + which matches any single character. Note this query can be slow, as it + needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, + a Wildcard term should not start with one of the wildcards * or + ?. + +

This query uses the + + rewrite method. + +

+ + +
+ + Prints a user-readable version of this query. + + + Returns the pattern term. + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified wildcard filter term. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + ***************************************** + String equality with support for wildcards + ****************************************** + + + + Creates a new WildcardTermEnum. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. +

+
+ + Determines if a word matches a wildcard pattern. + Work released by Granta Design Ltd after originally being done on + company time. + + + + This exception is thrown when there is an attempt to + access something that has already been closed. + + + + Base implementation class for buffered . + + + Abstract base class for output to a file in a Directory. A random-access + output stream. Used for all Lucene index output operations. + + + + + + + + Writes a single byte. + + + + + Writes an array of bytes. + the bytes to write + + the number of bytes to write + + + + + + Writes an array of bytes. + the bytes to write + + the offset in the byte array + + the number of bytes to write + + + + + + Writes an int as four bytes. + + + + + Writes an int in a variable-length format. Writes between one and + five bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Writes a long as eight bytes. + + + + + Writes an long in a variable-length format. Writes between one and five + bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Writes a string. + + + + + Writes a sub sequence of characters from s as the old + format (modified UTF-8 encoded bytes). + + the source of the characters + + the first character in the sequence + + the number of characters in the sequence + + -- please pre-convert to utf8 bytes + instead or use + + + + Writes a sub sequence of characters from char[] as + the old format (modified UTF-8 encoded bytes). + + the source of the characters + + the first character in the sequence + + the number of characters in the sequence + + -- please pre-convert to utf8 bytes instead or use + + + + Copy numBytes bytes from input to ourself. + + + Forces any buffered output to be written. + + + Closes this stream to further operations. + + + Closes this stream to further operations. + + + Sets current position in this file, where the next write will occur. + + + + + Set the file length. By default, this method does + nothing (it's optional for a Directory to implement + it). But, certain Directory implementations (for + + can use this to inform the + underlying IO system to pre-allocate the file to the + specified size. If the length is longer than the + current file length, the bytes added to the file are + undefined. Otherwise the file is truncated. + + file length + + + + Returns the current position in this file, where the next write will + occur. + + + + + + The number of bytes in the file. + + + Writes a single byte. + + + + + Writes an array of bytes. + the bytes to write + + the number of bytes to write + + + + + + Forces any buffered output to be written. + + + Expert: implements buffer write. Writes bytes at the current position in + the output. + + the bytes to write + + the number of bytes to write + + + + Expert: implements buffer write. Writes bytes at the current position in + the output. + + the bytes to write + + the offset in the byte array + + the number of bytes to write + + + + Closes this stream to further operations. + + + Sets current position in this file, where the next write will occur. + + + + + Returns the current position in this file, where the next write will + occur. + + + + + + The number of bytes in the file. + + + Writes bytes through to a primary IndexOutput, computing + checksum as it goes. Note that you cannot use seek(). + + + + Writes bytes through to a primary IndexOutput, computing + checksum. Note that you cannot use seek(). + + + + Starts but does not complete the commit of this file (= + writing of the final checksum at the end). After this + is called must call and the + to complete the commit. + + + + See + + + Expert: A Directory instance that switches files between + two other Directory instances. +

Files with the specified extensions are placed in the + primary directory; others are placed in the secondary + directory. The provided Set must not change once passed + to this class, and must allow multiple threads to call + contains at once.

+ +

NOTE: this API is new and experimental and is + subject to suddenly change in the next release. +

+
+ + Utility method to return a file's extension. + + + Return the primary directory + + + Return the secondary directory + + + + Base class for Directory implementations that store index + files in the file system. There are currently three core + subclasses: + + + + is a straightforward + implementation using java.io.RandomAccessFile. + However, it has poor concurrent performance + (multiple threads will bottleneck) as it + synchronizes when multiple threads read from the + same file. + + uses java.nio's + FileChannel's positional io when reading to avoid + synchronization when reading from the same file. + Unfortunately, due to a Windows-only Sun + JRE bug this is a poor choice for Windows, but + on all other platforms this is the preferred + choice. Applications using or + Future#cancel(boolean) (on Java 1.5) should use + instead. See java doc + for details. + + + + uses memory-mapped IO when + reading. This is a good choice if you have plenty + of virtual memory relative to your index size, eg + if you are running on a 64 bit JRE, or you are + running on a 32 bit JRE but your index sizes are + small enough to fit into the virtual memory space. + Java has currently the limitation of not being able to + unmap files from user code. The files are unmapped, when GC + releases the byte buffers. Due to + + this bug in Sun's JRE, MMapDirectory's + is unable to close the underlying OS file handle. Only when + GC finally collects the underlying objects, which could be + quite some time later, will the file handle be closed. + This will consume additional transient disk usage: on Windows, + attempts to delete or overwrite the files will result in an + exception; on other platforms, which typically have a "delete on + last close" semantics, while such operations will succeed, the bytes + are still consuming space on disk. For many applications this + limitation is not a problem (e.g. if you have plenty of disk space, + and you don't rely on overwriting files on Windows) but it's still + an important limitation to be aware of. This class supplies a + (possibly dangerous) workaround mentioned in the bug report, + which may fail on non-Sun JVMs. + + Applications using or + Future#cancel(boolean) (on Java 1.5) should use + instead. See + java doc for details. + + + Unfortunately, because of system peculiarities, there is + no single overall best implementation. Therefore, we've + added the method, to allow Lucene to choose + the best FSDirectory implementation given your + environment, and the known limitations of each + implementation. For users who have no reason to prefer a + specific implementation, it's best to simply use + . For all others, you should instantiate the + desired implementation directly. + +

The locking implementation is by default + , but can be changed by + passing in a custom instance. +

+
+ + Initializes the directory to create a new file with the given name. + This method should be used in . + + + + The underlying filesystem directory + + + Create a new FSDirectory for the named location (ctor for subclasses). + the path of the directory + + the lock factory to use, or null for the default + (); + + IOException + + + Creates an FSDirectory instance, trying to pick the + best implementation given the current environment. + The directory returned uses the . + +

Currently this returns as + NIOFSDirectory is currently not supported. + +

NOTE: this method may suddenly change which + implementation is returned from release to release, in + the event that higher performance defaults become + possible; if the precise implementation is important to + your application, please instantiate it directly, + instead. On 64 bit systems, it may also good to + return , but this is disabled + because of officially missing unmap support in Java. + For optimal performance you should consider using + this implementation on 64 bit JVMs. + +

See above +

+
+ + Creates an FSDirectory instance, trying to pick the + best implementation given the current environment. + The directory returned uses the . + +

Currently this returns as + NIOFSDirectory is currently not supported. + +

NOTE: this method may suddenly change which + implementation is returned from release to release, in + the event that higher performance defaults become + possible; if the precise implementation is important to + your application, please instantiate it directly, + instead. On 64 bit systems, it may also good to + return , but this is disabled + because of officially missing unmap support in Java. + For optimal performance you should consider using + this implementation on 64 bit JVMs. + +

See above +

+
+ + Just like , but allows you to + also specify a custom . + + + + Lists all files (not subdirectories) in the + directory. This method never returns null (throws + instead). + + + NoSuchDirectoryException if the directory + does not exist, or does exist but is not a + directory. + + IOException if list() returns null + + + Lists all files (not subdirectories) in the + directory. + + + + + + Returns true iff a file with the given name exists. + + + Returns the time the named file was last modified. + + + Returns the time the named file was last modified. + + + Set the modified time of an existing file to now. + + + Returns the length in bytes of a file in the directory. + + + Removes an existing file in the directory. + + + So we can do some byte-to-hexchar conversion below + + + For debug output. + + + Default read chunk size. This is a conditional + default: on 32bit JVMs, it defaults to 100 MB. On + 64bit JVMs, it's Integer.MAX_VALUE. + + + + + + The maximum number of bytes to read at once from the + underlying file during . + + + + + + Base class for file system based locking implementation. + + +

Base class for Locking implementation. uses + instances of this class to implement locking.

+ +

Note that there are some useful tools to verify that + your LockFactory is working correctly: + , , + .

+ +

+ + + + + + +
+ + Return a new Lock instance identified by lockName. + name of the lock to be created. + + + + Attempt to clear (forcefully unlock and remove) the + specified lock. Only call this at a time when you are + certain this lock is no longer in use. + + name of the lock to be cleared. + + + + Gets or sets the prefix in use for all locks created in this + LockFactory. This is normally called once, when a + Directory gets this LockFactory instance. However, you + can also call this (after this instance is assigned to + a Directory) to override the prefix in use. This + is helpful if you're running Lucene on machines that + have different mount points for the same shared + directory. + + + + Directory for the lock files. + + + Gets the lock directory. + Subclasses can use this to set the lock directory. + This method can be only called + once to initialize the lock directory. It is used by + to set the lock directory to itsself. + Subclasses can also use this method to set the directory + in the constructor. + + + + + An interprocess mutex lock. +

Typical use might look like: + new Lock.With(directory.makeLock("my.lock")) { + public Object doBody() { + ... code to execute while locked ... + } + }.run(); + +

+ +
+ + Pass this value to to try + forever to obtain the lock. + + + + How long waits, in milliseconds, + in between attempts to acquire the lock. + + + + Attempts to obtain exclusive access and immediately return + upon success or failure. + + true iff exclusive access is obtained + + + + If a lock obtain called, this failureReason may be set + with the "root cause" Exception as to why the lock was + not obtained. + + + + Attempts to obtain an exclusive lock within amount of + time given. Polls once per + (currently 1000) milliseconds until lockWaitTimeout is + passed. + + length of time to wait in + milliseconds or + to retry forever + + true if lock was obtained + + LockObtainFailedException if lock wait times out + IllegalArgumentException if lockWaitTimeout is + out of bounds + + IOException if obtain() throws IOException + + + Releases exclusive access. + + + Returns true if the resource is currently locked. Note that one must + still call before using the resource. + + + + Utility class for executing code with exclusive access. + + + Constructs an executor that will grab the named lock. + + + Code to execute with exclusive access. + + + Calls while lock is obtained. Blocks if lock + cannot be obtained immediately. Retries to obtain lock once per second + until it is obtained, or until it has tried ten times. Lock is released when + exits. + + LockObtainFailedException if lock could not + be obtained + + IOException if throws IOException + + + This exception is thrown when the write.lock + could not be acquired. This + happens when a writer tries to open an index + that another writer already has open. + + + + + + This exception is thrown when the write.lock + could not be released. + + + + + + Simple standalone tool that forever acquires & releases a + lock using a specific LockFactory. Run without any args + to see usage. + + + + + + + + + Simple standalone server that must be running when you + use . This server simply + verifies at most one process holds the lock at a time. + Run without any args to see usage. + + + + + + + + + File-based implementation that uses + mmap for reading, and + for writing. + +

NOTE: memory mapping uses up a portion of the + virtual memory address space in your process equal to the + size of the file being mapped. Before using this class, + be sure your have plenty of virtual address space, e.g. by + using a 64 bit JRE, or a 32 bit JRE with indexes that are + guaranteed to fit within the address space. + On 32 bit platforms also consult + if you have problems with mmap failing because of fragmented + address space. If you get an OutOfMemoryException, it is recommened + to reduce the chunk size, until it works. + +

Due to + this bug in Sun's JRE, MMapDirectory's + is unable to close the underlying OS file handle. Only when GC + finally collects the underlying objects, which could be quite + some time later, will the file handle be closed. + +

This will consume additional transient disk usage: on Windows, + attempts to delete or overwrite the files will result in an + exception; on other platforms, which typically have a "delete on + last close" semantics, while such operations will succeed, the bytes + are still consuming space on disk. For many applications this + limitation is not a problem (e.g. if you have plenty of disk space, + and you don't rely on overwriting files on Windows) but it's still + an important limitation to be aware of. + +

This class supplies the workaround mentioned in the bug report + (disabled by default, see ), which may fail on + non-Sun JVMs. It forcefully unmaps the buffer on close by using + an undocumented internal cleanup functionality. + is true, if the workaround + can be enabled (with no guarantees). +

+
+ + Create a new MMapDirectory for the named location. + + + the path of the directory + + the lock factory to use, or null for the default. + + IOException + + + Create a new MMapDirectory for the named location and the default lock factory. + + + the path of the directory + + IOException + + + true, if this platform supports unmapping mmaped files. + + + Try to unmap the buffer, this method silently fails if no support + for that in the JVM. On Windows, this leads to the fact, + that mmapped files cannot be modified or deleted. + + + + Creates an IndexInput for the file with the given name. + + + Creates an IndexOutput for the file with the given name. + + + Enables or disables the workaround for unmapping the buffers + from address space after closing , that is + mentioned in the bug report. This hack may fail on non-Sun JVMs. + It forcefully unmaps the buffer on close by using + an undocumented internal cleanup functionality. +

NOTE: Enabling this is completely unsupported + by Java and may lead to JVM crashs if IndexInput + is closed while another thread is still accessing it (SIGSEGV). +

+ IllegalArgumentException if + is false and the workaround cannot be enabled. + +
+ + Gets or sets the maximum chunk size (default is for + 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping. + Especially on 32 bit platform, the address space can be very fragmented, + so large index files cannot be mapped. + Using a lower chunk size makes the directory implementation a little + bit slower (as the correct chunk must be resolved on each seek) + but the chance is higher that mmap does not fail. On 64 bit + Java platforms, this parameter should always be , + as the adress space is big enough. + + + +

Implements using native OS file + locks. Note that because this LockFactory relies on + java.nio.* APIs for locking, any problems with those APIs + will cause locking to fail. Specifically, on certain NFS + environments the java.nio.* locks will fail (the lock can + incorrectly be double acquired) whereas + worked perfectly in those same + environments. For NFS based access to an index, it's + recommended that you try + first and work around the one limitation that a lock file + could be left when the JVM exits abnormally.

+ +

The primary benefit of is + that lock files will be properly removed (by the OS) if + the JVM has an abnormal exit.

+ +

Note that, unlike , the existence of + leftover lock files in the filesystem on exiting the JVM + is fine because the OS will free the locks held against + these files even though the files still remain.

+ +

If you suspect that this or any other LockFactory is + not working properly in your environment, you can easily + test it by using , + and .

+ +

+ + +
+ + Create a NativeFSLockFactory instance, with null (unset) + lock directory. When you pass this factory to a + subclass, the lock directory is automatically set to the + directory itsself. Be sure to create one instance for each directory + your create! + + + + Create a NativeFSLockFactory instance, storing lock + files into the specified lockDirName: + + + where lock files are created. + + + + Create a NativeFSLockFactory instance, storing lock + files into the specified lockDir: + + + where lock files are created. + + + + + Not implemented. Waiting for volunteers. + + + + + Not implemented. Waiting for volunteers. + + + + Use this to disable locking entirely. + Only one instance of this lock is created. You should call + to get the instance. + + + + + + + This exception is thrown when you try to list a + non-existent directory. + + + + A memory-resident implementation. Locking + implementation is by default the + but can be changed with . + + + + Constructs an empty . + + + Creates a new RAMDirectory instance from a different + Directory implementation. This can be used to load + a disk-based index into memory. +

+ This should be used only with indices that can fit into memory. +

+ Note that the resulting RAMDirectory instance is fully + independent from the original Directory (it is a + complete copy). Any subsequent changes to the + original Directory will not be visible in the + RAMDirectory instance. + +

+ a Directory value + + if an error occurs + +
+ + Returns true iff the named file exists in this directory. + + + Returns the time the named file was last modified. + IOException if the file does not exist + + + Set the modified time of an existing file to now. + IOException if the file does not exist + + + Returns the length in bytes of a file in the directory. + IOException if the file does not exist + + + Return total size in bytes of all files in this + directory. This is currently quantized to + RAMOutputStream.BUFFER_SIZE. + + + + Removes an existing file in the directory. + IOException if the file does not exist + + + Creates a new, empty file in the directory with the given name. Returns a stream writing this file. + + + Returns a stream reading an existing file. + + + Closes the store to future operations, releasing associated memory. + + + A memory-resident implementation. + + + + + A memory-resident implementation. + For lucene internal use. + + + + Construct an empty output buffer. + + + Copy the current contents of this buffer to the named output. + + + Resets this to an empty buffer. + + + Returns byte usage of all buffers. + + + A straightforward implementation of + using java.io.RandomAccessFile. However, this class has + poor concurrent performance (multiple threads will + bottleneck) as it synchronizes when multiple threads + read from the same file. It's usually better to use + or instead. + + + + Create a new SimpleFSDirectory for the named location. + + + the path of the directory + + the lock factory to use, or null for the default. + + IOException + + + Create a new SimpleFSDirectory for the named location and the default lock factory. + + + the path of the directory + + IOException + + + Creates an IndexOutput for the file with the given name. + + + Creates an IndexInput for the file with the given name. + + + IndexInput methods + + + Method used for testing. Returns true if the underlying + file descriptor is valid. + + + + output methods: + + + Random-access methods + + +

Implements using + .

+ +

NOTE: the javadocs + for File.createNewFile contain a vague + yet spooky warning about not using the API for file + locking. This warning was added due to this + bug, and in fact the only known problem with using + this API for locking is that the Lucene write lock may + not be released when the JVM exits abnormally.

+

When this happens, a + is hit when trying to create a writer, in which case you + need to explicitly clear the lock file first. You can + either manually remove the file, or use the + + API. But, first be certain that no writer is in fact + writing to the index otherwise you can easily corrupt + your index.

+ +

If you suspect that this or any other LockFactory is + not working properly in your environment, you can easily + test it by using , + and .

+ +

+ + +
+ + Create a SimpleFSLockFactory instance, with null (unset) + lock directory. When you pass this factory to a + subclass, the lock directory is automatically set to the + directory itsself. Be sure to create one instance for each directory + your create! + + + + Instantiate using the provided directory (as a File instance). + where lock files should be created. + + + + Instantiate using the provided directory name (String). + where lock files should be created. + + + + Implements for a single in-process instance, + meaning all locking will take place through this one instance. + Only use this when you are certain all + IndexReaders and IndexWriters for a given index are running + against a single shared in-process Directory instance. This is + currently the default locking for RAMDirectory. + + + + + + + A that wraps another + and verifies that each lock obtain/release + is "correct" (never results in two processes holding the + lock at the same time). It does this by contacting an + external server () to assert that + at most one process holds the lock at a time. To use + this, you should also run on the + host & port matching what you pass to the constructor. + + + + + + + + + should be a unique id across all clients + + the LockFactory that we are testing + + host or IP where + is running + + the port is + listening on + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A simple wrapper to allow for the use of the GeneralKeyedCollection. The + wrapper is required as there can be several keys for an object depending + on how many interfaces it implements. + + + + + This class provides supporting methods of java.util.BitSet + that are not present in System.Collections.BitArray. + + + + + Returns the next set bit at or after index, or -1 if no such bit exists. + + + the index of bit array at which to start checking + the next set bit or -1 + + + + Returns the next un-set bit at or after index, or -1 if no such bit exists. + + + the index of bit array at which to start checking + the next set bit or -1 + + + + Returns the number of bits set to true in this BitSet. + + The BitArray object. + The number of bits set to true in this BitSet. + + + + Mimics Java's Character class. + + + + + + + + + + + + + + + + + + + + + For Debuging purposes. + + + + + Support class used to handle Hashtable addition, which does a check + first to make sure the added item is unique in the hash. + + + + + Converts the specified collection to its string representation. + + The collection to convert to string. + A string representation of the specified collection. + + + + Compares two string arrays for equality. + + First string array list to compare + Second string array list to compare + true if the strings are equal in both arrays, false otherwise + + + + Sorts an IList collections + + The System.Collections.IList instance that will be sorted + The Comparator criteria, null to use natural comparator. + + + + Fills the array with an specific value from an specific index to an specific index. + + The array to be filled. + The first index to be filled. + The last index to be filled. + The value to fill the array with. + + + + Fills the array with an specific value. + + The array to be filled. + The value to fill the array with. + + + + Compares the entire members of one array whith the other one. + + The array to be compared. + The array to be compared with. + Returns true if the two specified arrays of Objects are equal + to one another. The two arrays are considered equal if both arrays + contain the same number of elements, and all corresponding pairs of + elements in the two arrays are equal. Two objects e1 and e2 are + considered equal if (e1==null ? e2==null : e1.equals(e2)). In other + words, the two arrays are equal if they contain the same elements in + the same order. Also, two array references are considered equal if + both are null. + + + + Summary description for TestSupportClass. + + + + + Compares two Term arrays for equality. + + First Term array to compare + Second Term array to compare + true if the Terms are equal in both arrays, false otherwise + + + + Support class that emulates the behavior of the ConcurrentDictionary + from .NET 4.0. This class will, in most cases, perform slightly slower + than the 4.0 equivalent. Note that all behavior is emulated, which means + that , , and + all return a snapshot of the data at the time it was called. + + + + + Removes all items from the . + + The is read-only. + + + + + Contains conversion support elements such as classes, interfaces and static methods. + + + + + + + + + Represents a strongly typed list of objects that can be accessed by index. + Provides methods to search, sort, and manipulate lists. Also provides functionality + to compare lists against each other through an implementations of + . + The type of elements in the list. + + + Initializes a new instance of the + class that is empty and has the + default initial capacity. + + + Initializes a new instance of the + class that contains elements copied from the specified collection and has + sufficient capacity to accommodate the number of elements copied. + The collection whose elements are copied to the new list. + + + Initializes a new instance of the + class that is empty and has the specified initial capacity. + The number of elements that the new list can initially store. + + + Adds a range of objects represented by the + implementation. + The + implementation to add to this list. + + + Compares the counts of two + implementations. + This uses a trick in LINQ, sniffing types for implementations + of interfaces that might supply shortcuts when trying to make comparisons. + In this case, that is the and + interfaces, either of which can provide a count + which can be used in determining the equality of sequences (if they don't have + the same count, then they can't be equal). + The from the left hand side of the + comparison to check the count of. + The from the right hand side of the + comparison to check the count of. + Null if the result is indeterminate. This occurs when either + or doesn't implement or . + Otherwise, it will get the count from each and return true if they are equal, false otherwise. + + + Compares the contents of a + implementation to another one to determine equality. + Thinking of the implementation as + a string with any number of characters, the algorithm checks + each item in each list. If any item of the list is not equal (or + one list contains all the elements of another list), then that list + element is compared to the other list element to see which + list is greater. + The implementation + that is considered the left hand side. + The implementation + that is considered the right hand side. + True if the items are equal, false otherwise. + + + Compares this sequence to another + implementation, returning true if they are equal, false otherwise. + The other implementation + to compare against. + True if the sequence in + is the same as this one. + + + Compares this object for equality against other. + The other object to compare this object against. + True if this object and are equal, false + otherwise. + + + Gets the hash code for the list. + The hash code value. + + + Gets the hash code for the list. + The + implementation which will have all the contents hashed. + The hash code value. + + + Clones the . + This is a shallow clone. + A new shallow clone of this + . + + + + Represents the methods to support some operations over files. + + + + + Returns an array of abstract pathnames representing the files and directories of the specified path. + + The abstract pathname to list it childs. + An array of abstract pathnames childs of the path specified or null if the path is not a directory + + + + Returns a list of files in a give directory. + + The full path name to the directory. + + An array containing the files. + + + + Flushes the specified file stream. Ensures that all buffered + data is actually written to the file system. + + The file stream. + + + A collection of which can be + looked up by instances of . + The type of the items contains in this + collection. + The type of the keys that can be used to look + up the items. + + + Creates a new instance of the + class. + The which will convert + instances of to + when the override of is called. + + + The which will convert + instances of to + when the override of is called. + + + Converts an item that is added to the collection to + a key. + The instance of + to convert into an instance of . + The instance of which is the + key for this item. + + + Determines if a key for an item exists in this + collection. + The instance of + to see if it exists in this collection. + True if the key exists in the collection, false otherwise. + + + + A C# emulation of the Java Hashmap + + A is a close equivalent to the Java + Hashmap. One difference java implementation of the class is that + the Hashmap supports both null keys and values, where the C# Dictionary + only supports null values not keys. Also, V Get(TKey) + method in Java returns null if the key doesn't exist, instead of throwing + an exception. This implementation doesn't throw an exception when a key + doesn't exist, it will return null. This class is slower than using a + , because of extra checks that have to be + done on each access, to check for null. + + + NOTE: This class works best with nullable types. default(T) is returned + when a key doesn't exist in the collection (this being similar to how Java returns + null). Therefore, if the expected behavior of the java code is to execute code + based on if the key exists, when the key is an integer type, it will return 0 instead of null. + + + Consider also implementing IDictionary, IEnumerable, and ICollection + like does, so HashMap can be + used in substituted in place for the same interfaces it implements. + + + The type of keys in the dictionary + The type of values in the dictionary + + + + Wraps a dictionary and adds the value + represented by the null key + + + + + Wraps a dictionary's collection, adding in a + null key. + + + + + A simple class for number conversions. + + + + + Min radix value. + + + + + Max radix value. + + + + + Converts a number to System.String. + + + + + + + Converts a number to System.String. + + + + + + + Converts a number to System.String in the specified radix. + + A number to be converted. + A radix. + A System.String representation of the number in the specified redix. + + + + Parses a number in the specified radix. + + An input System.String. + A radix. + The parsed number in the specified radix. + + + + Performs an unsigned bitwise right shift with the specified number + + Number to operate on + Ammount of bits to shift + The resulting number from the shift operation + + + + Performs an unsigned bitwise right shift with the specified number + + Number to operate on + Ammount of bits to shift + The resulting number from the shift operation + + + + Returns the index of the first bit that is set to true that occurs + on or after the specified starting index. If no such bit exists + then -1 is returned. + + The BitArray object. + The index to start checking from (inclusive). + The index of the next set bit. + + + + Converts a System.String number to long. + + + + + + + Provides platform infos. + + + + + Whether we run under a Unix platform. + + + + + Whether we run under a supported Windows platform. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Copies an array of chars obtained from a String into a specified array of chars + + The String to get the chars from + Position of the String to start getting the chars + Position of the String to end getting the chars + Array to return the chars + Position of the destination array of chars to start storing the chars + An array of chars + + + + Abstract base class that provides a synchronization interface + for derived lock types + + + + + A ThreadLock class that actually does no locking + Used in ParallelMultiSearcher/MultiSearcher + + + + + Wrapper class for the Monitor Enter/Exit methods + using the interface + + + + + A weak reference wrapper for the hashtable keys. Whenever a key\value pair + is added to the hashtable, the key is wrapped using a WeakKey. WeakKey saves the + value of the original object hashcode for fast comparison. + + + + Methods for manipulating arrays. + + + Parses the string argument as if it was an int value and returns the + result. Throws NumberFormatException if the string does not represent an + int quantity. + + + a string representation of an int quantity. + + int the value represented by the argument + + NumberFormatException if the argument could not be parsed as an int quantity. + + + Parses a char array into an int. + the character array + + The offset into the array + + The length + + the int + + NumberFormatException if it can't parse + + + Parses the string argument as if it was an int value and returns the + result. Throws NumberFormatException if the string does not represent an + int quantity. The second argument specifies the radix to use when parsing + the value. + + + a string representation of an int quantity. + + + + the base to use for conversion. + + int the value represented by the argument + + NumberFormatException if the argument could not be parsed as an int quantity. + + + Returns hash of chars in range start (inclusive) to + end (inclusive) + + + + Returns hash of chars in range start (inclusive) to + end (inclusive) + + + + An average, best guess, MemoryModel that should work okay on most systems. + + + + + Returns primitive memory sizes for estimating RAM usage. + + + + + a primitive Class - bool, byte, char, short, long, float, + short, double, int + + the size in bytes of given primitive Class + + + + size of array beyond contents + + + Class size overhead + + + size of reference + + + A variety of high efficiencly bit twiddling routines. + + + $Id$ + + + + Returns the number of bits set in the long + + + Returns the number of set bits in an array of longs. + + + Returns the popcount or cardinality of the two sets after an intersection. + Neither array is modified. + + + + Returns the popcount or cardinality of the union of two sets. + Neither array is modified. + + + + Returns the popcount or cardinality of A & ~B + Neither array is modified. + + + + table of number of trailing zeros in a byte + + + Returns number of trailing zeros in a 64 bit long value. + + + Returns number of trailing zeros in a 32 bit int value. + + + returns 0 based index of first set bit + (only works for x!=0) +
This is an alternate implementation of ntz() +
+
+ + returns 0 based index of first set bit +
This is an alternate implementation of ntz() +
+
+ + returns true if v is a power of two or zero + + + returns true if v is a power of two or zero + + + returns the next highest power of two, or the current value if it's already a power of two or zero + + + returns the next highest power of two, or the current value if it's already a power of two or zero + + + Optimized implementation of a vector of bits. This is more-or-less like + java.util.BitSet, but also includes the following: + + a count() method, which efficiently computes the number of one bits; + optimized read from and write to disk; + inlinable get() method; + store and load, as bit set or d-gaps, depending on sparseness; + + + + + Constructs a vector capable of holding n bits. + + + Sets the value of bit to one. + + + Sets the value of bit to true, and + returns true if bit was already set + + + + Sets the value of bit to zero. + + + Returns true if bit is one and + false if it is zero. + + + + Returns the number of bits in this vector. This is also one greater than + the number of the largest valid bit number. + + + + Returns the total number of one bits in this vector. This is efficiently + computed and cached, so that, if the vector is not changed, no + recomputation is done for repeated calls. + + + + + For testing + + + + Writes this vector to the file name in Directory + d, in a format that can be read by the constructor + . + + + + Write as a bit set + + + Write as a d-gaps list + + + Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. + + + Constructs a bit vector from the file name in Directory + d, as written by the method. + + + + Read as a bit set + + + read as a d-gaps list + + + Retrieve a subset of this BitVector. + + + starting index, inclusive + + ending index, exclusive + + subset + + + + Some useful constants. + + + The value of System.getProperty("java.version"). * + + + True iff this is Java version 1.1. + + + True iff this is Java version 1.2. + + + True iff this is Java version 1.3. + + + The value of System.getProperty("os.name"). * + + + True iff running on Linux. + + + True iff running on Windows. + + + True iff running on SunOS. + + + Simple DocIdSet and DocIdSetIterator backed by a BitSet + + + This DocIdSet implementation is cacheable. + + + Returns the underlying BitSet. + + + Provides methods for sanity checking that entries in the FieldCache + are not wasteful or inconsistent. +

+

+ Lucene 2.9 Introduced numerous enhancements into how the FieldCache + is used by the low levels of Lucene searching (for Sorting and + ValueSourceQueries) to improve both the speed for Sorting, as well + as reopening of IndexReaders. But these changes have shifted the + usage of FieldCache from "top level" IndexReaders (frequently a + MultiReader or DirectoryReader) down to the leaf level SegmentReaders. + As a result, existing applications that directly access the FieldCache + may find RAM usage increase significantly when upgrading to 2.9 or + Later. This class provides an API for these applications (or their + Unit tests) to check at run time if the FieldCache contains "insane" + usages of the FieldCache. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced and + experimental. It may be removed or altered w/o warning in future releases + of Lucene. +

+

+ + + + + + +
+ + If set, will be used to estimate size for all CacheEntry objects + dealt with. + + + + Quick and dirty convenience method + + + + + Quick and dirty convenience method that instantiates an instance with + "good defaults" and uses it to test the CacheEntrys + + + + + + Tests a CacheEntry[] for indication of "insane" cache usage. +

+ NOTE:FieldCache CreationPlaceholder objects are ignored. + (:TODO: is this a bad idea? are we masking a real problem?) +

+

+
+ + Internal helper method used by check that iterates over + valMismatchKeys and generates a Collection of Insanity + instances accordingly. The MapOfSets are used to populate + the Insantiy objects. + + + + + + Internal helper method used by check that iterates over + the keys of readerFieldToValIds and generates a Collection + of Insanity instances whenever two (or more) ReaderField instances are + found that have an ancestery relationships. + + + + + + + Checks if the seed is an IndexReader, and if so will walk + the hierarchy of subReaders building up a list of the objects + returned by obj.getFieldCacheKey() + + + + Simple pair object for using "readerKey + fieldName" a Map key + + + Simple container for a collection of related CacheEntry objects that + in conjunction with eachother represent some "insane" usage of the + FieldCache. + + + + CacheEntry objects which suggest a problem + + + Multi-Line representation of this Insanity object, starting with + the Type and Msg, followed by each CacheEntry.toString() on it's + own line prefaced by a tab character + + + + Type of insane behavior this object represents + + + Description of hte insane behavior + + + An Enumaration of the differnet types of "insane" behavior that + may be detected in a FieldCache. + + + + + + + + + + + Indicates an overlap in cache usage on a given field + in sub/super readers. + + + +

+ Indicates entries have the same reader+fieldname but + different cached values. This can happen if different datatypes, + or parsers are used -- and while it's not necessarily a bug + it's typically an indication of a possible problem. +

+

+ PNOTE: Only the reader, fieldname, and cached value are actually + tested -- if two cache entries have different parsers or datatypes but + the cached values are the same Object (== not just equal()) this method + does not consider that a red flag. This allows for subtle variations + in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) +

+

+
+ + Indicates an expected bit of "insanity". This may be useful for + clients that wish to preserve/log information about insane usage + but indicate that it was expected. + + + + + A class that mimics Java's IdentityHashMap in that it determines + object equality solely on ReferenceEquals rather than (possibly overloaded) + object.Equals(). + + NOTE: Java's documentation on IdentityHashMap says that it also uses + ReferenceEquals on it's Values as well. This class does not follow this behavior + + The type of the keys in the dictionary + The type of the values in the dictionary + + + Provides support for converting byte sequences to Strings and back again. + The resulting Strings preserve the original byte sequences' sort order. + + The Strings are constructed using a Base 8000h encoding of the original + binary data - each char of an encoded String represents a 15-bit chunk + from the byte sequence. Base 8000h was chosen because it allows for all + lower 15 bits of char to be used without restriction; the surrogate range + [U+D8000-U+DFFF] does not represent valid chars, and would require + complicated handling to avoid them and allow use of char's high bit. + + Although unset bits are used as padding in the final char, the original + byte sequence could contain trailing bytes with no set bits (null bytes): + padding is indistinguishable from valid information. To overcome this + problem, a char is appended, indicating the number of encoded bytes in the + final content char. + + This class's operations are defined over CharBuffers and ByteBuffers, to + allow for wrapped arrays to be reused, reducing memory allocation costs for + repeated operations. Note that this class calls array() and arrayOffset() + on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be + used. This class interprets the arrayOffset() and limit() values returned by + its input buffers as beginning and end+1 positions on the wrapped array, + resprectively; similarly, on the output buffer, arrayOffset() is the first + position written to, and limit() is set to one past the final output array + position. + + + + Returns the number of chars required to encode the given byte sequence. + + + The byte sequence to be encoded. Must be backed by an array. + + The number of chars required to encode the given byte sequence + + IllegalArgumentException If the given ByteBuffer is not backed by an array + + + Returns the number of bytes required to decode the given char sequence. + + + The char sequence to be encoded. Must be backed by an array. + + The number of bytes required to decode the given char sequence + + IllegalArgumentException If the given CharBuffer is not backed by an array + + + Encodes the input byte sequence into the output char sequence. Before + calling this method, ensure that the output CharBuffer has sufficient + capacity by calling . + + + The byte sequence to encode + + Where the char sequence encoding result will go. The limit + is set to one past the position of the final char. + + IllegalArgumentException If either the input or the output buffer + is not backed by an array + + + + Decodes the input char sequence into the output byte sequence. Before + calling this method, ensure that the output ByteBuffer has sufficient + capacity by calling . + + + The char sequence to decode + + Where the byte sequence decoding result will go. The limit + is set to one past the position of the final char. + + IllegalArgumentException If either the input or the output buffer + is not backed by an array + + + + Decodes the given char sequence, which must have been encoded by + or + . + + + The char sequence to decode + + A byte sequence containing the decoding result. The limit + is set to one past the position of the final char. + + IllegalArgumentException If the input buffer is not backed by an + array + + + + Encodes the input byte sequence. + + + The byte sequence to encode + + A char sequence containing the encoding result. The limit is set + to one past the position of the final char. + + IllegalArgumentException If the input buffer is not backed by an + array + + + + Helper class for keeping Listss of Objects associated with keys. WARNING: THIS CLASS IS NOT THREAD SAFE + + + the backing store for this object + + + + Adds val to the Set associated with key in the Map. If key is not + already in the map, a new Set will first be created. + + the size of the Set associated with key once val is added to it. + + + + Adds multiple vals to the Set associated with key in the Map. + If key is not + already in the map, a new Set will first be created. + + the size of the Set associated with key once val is added to it. + + + + direct access to the map backing this object. + + + An "open" BitSet implementation that allows direct access to the array of words + storing the bits. +

+ Unlike java.util.bitset, the fact that bits are packed into an array of longs + is part of the interface. This allows efficient implementation of other algorithms + by someone other than the author. It also allows one to efficiently implement + alternate serialization or interchange formats. +

+ OpenBitSet is faster than java.util.BitSet in most operations + and *much* faster at calculating cardinality of sets and results of set operations. + It can also handle sets of larger cardinality (up to 64 * 2**32-1) +

+ The goals of OpenBitSet are the fastest implementation possible, and + maximum code reuse. Extra safety and encapsulation + may always be built on top, but if that's built in, the cost can never be removed (and + hence people re-implement their own version in order to get better performance). + If you want a "safe", totally encapsulated (and slower and limited) BitSet + class, use java.util.BitSet. +

+

Performance Results

+ + Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M +
BitSet size = 1,000,000 +
Results are java.util.BitSet time divided by OpenBitSet time. + + + + + + + + + + +
cardinality intersect_count union nextSetBit get iterator
50% full 3.36 3.96 1.44 1.46 1.99 1.58
1% full 3.31 3.90   1.04   0.99
+
+ Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M +
BitSet size = 1,000,000 +
Results are java.util.BitSet time divided by OpenBitSet time. + + + + + + + + + + +
cardinality intersect_count union nextSetBit get iterator
50% full 2.50 3.50 1.00 1.03 1.12 1.25
1% full 2.51 3.49   1.00   1.02
+
+ $Id$ + +
+ + Constructs an OpenBitSet large enough to hold numBits. + + + + + + + Constructs an OpenBitSet from an existing long[]. +
+ The first 64 bits are in long[0], + with bit index 0 at the least significant bit, and bit index 63 at the most significant. + Given a bit index, + the word containing it is long[index/64], and it is at bit number index%64 within that word. +

+ numWords are the number of elements in the array that contain + set bits (non-zero longs). + numWords should be <= bits.length, and + any existing words in the array at position >= numWords should be zero. + +

+
+ + Returns the current capacity in bits (1 greater than the index of the last bit) + + + Returns the current capacity of this set. Included for + compatibility. This is *not* equal to + + + + Returns true if there are no set bits + + + Returns true or false for the specified bit index. + + + Returns true or false for the specified bit index. + The index should be less than the OpenBitSet size + + + + Returns true or false for the specified bit index + + + Returns true or false for the specified bit index. + The index should be less than the OpenBitSet size. + + + + returns 1 if the bit is set, 0 if not. + The index should be less than the OpenBitSet size + + + + sets a bit, expanding the set size if necessary + + + Sets the bit at the specified index. + The index should be less than the OpenBitSet size. + + + + Sets the bit at the specified index. + The index should be less than the OpenBitSet size. + + + + Sets a range of bits, expanding the set size if necessary + + + lower index + + one-past the last bit to set + + + + clears a bit. + The index should be less than the OpenBitSet size. + + + + clears a bit. + The index should be less than the OpenBitSet size. + + + + clears a bit, allowing access beyond the current set size without changing the size. + + + Clears a range of bits. Clearing past the end does not change the size of the set. + + + lower index + + one-past the last bit to clear + + + + Clears a range of bits. Clearing past the end does not change the size of the set. + + + lower index + + one-past the last bit to clear + + + + Sets a bit and returns the previous value. + The index should be less than the OpenBitSet size. + + + + Sets a bit and returns the previous value. + The index should be less than the OpenBitSet size. + + + + flips a bit. + The index should be less than the OpenBitSet size. + + + + flips a bit. + The index should be less than the OpenBitSet size. + + + + flips a bit, expanding the set size if necessary + + + flips a bit and returns the resulting bit value. + The index should be less than the OpenBitSet size. + + + + flips a bit and returns the resulting bit value. + The index should be less than the OpenBitSet size. + + + + Flips a range of bits, expanding the set size if necessary + + + lower index + + one-past the last bit to flip + + + + the number of set bits + + + + Returns the popcount or cardinality of the intersection of the two sets. + Neither set is modified. + + + + Returns the popcount or cardinality of the union of the two sets. + Neither set is modified. + + + + Returns the popcount or cardinality of "a and not b" + or "intersection(a, not(b))". + Neither set is modified. + + + + Returns the popcount or cardinality of the exclusive-or of the two sets. + Neither set is modified. + + + + Returns the index of the first set bit starting at the index specified. + -1 is returned if there are no more set bits. + + + + Returns the index of the first set bit starting at the index specified. + -1 is returned if there are no more set bits. + + + + this = this AND other + + + this = this OR other + + + Remove all elements set in other. this = this AND_NOT other + + + this = this XOR other + + + returns true if the sets have any elements in common + + + Expand the long[] with the size given as a number of words (64 bit longs). + getNumWords() is unchanged by this call. + + + + Ensure that the long[] is big enough to hold numBits, expanding it if necessary. + getNumWords() is unchanged by this call. + + + + Lowers numWords, the number of words in use, + by checking for trailing zero words. + + + + returns the number of 64 bit words it would take to hold numBits + + + returns true if both sets have the same bits set + + + This DocIdSet implementation is cacheable. + + + Expert: Gets or sets the long[] storing the bits + + + Expert: gets or sets the number of longs in the array that are in use + + + Construct an OpenBitSetDISI with its bits set + from the doc ids of the given DocIdSetIterator. + Also give a maximum size one larger than the largest doc id for which a + bit may ever be set on this OpenBitSetDISI. + + + + Construct an OpenBitSetDISI with no bits set, and a given maximum size + one larger than the largest doc id for which a bit may ever be set + on this OpenBitSetDISI. + + + + Perform an inplace OR with the doc ids from a given DocIdSetIterator, + setting the bit for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace AND with the doc ids from a given DocIdSetIterator, + leaving only the bits set for which the doc ids are in common. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace NOT with the doc ids from a given DocIdSetIterator, + clearing all the bits for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace XOR with the doc ids from a given DocIdSetIterator, + flipping all the bits for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + An iterator to iterate over set bits in an OpenBitSet. + This is faster than nextSetBit() for iterating over the complete set of bits, + especially when the density of the bits set is high. + + + $Id$ + + + + ** the python code that generated bitlist + def bits2int(val): + arr=0 + for shift in range(8,0,-1): + if val & 0x80: + arr = (arr << 4) | shift + val = val << 1 + return arr + def int_table(): + tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ] + return ','.join(tbl) + **** + + + + Base class for cache implementations. + + + Returns a thread-safe cache backed by the specified cache. + In order to guarantee thread-safety, all access to the backed cache must + be accomplished through the returned cache. + + + + Called by . This method + returns a instance that wraps + this instance by default and can be overridden to return + e. g. subclasses of or this + in case this cache is already synchronized. + + + + Puts a (key, value)-pair into the cache. + + + Returns the value for the given key. + + + Returns whether the given key is in this cache. + + + Closes the cache. + + + Simple Cache wrapper that synchronizes all + calls that access the cache. + + + + Simple cache implementation that uses a HashMap to store (key, value) pairs. + This cache is not synchronized, use + if needed. + + + + Returns a Set containing all keys in this cache. + + + + The maximum number of items to cache. + + + + + The list to efficiently maintain the LRU state. + + + + + The dictionary to hash into any location in the list. + + + + + The node instance to use/re-use when adding an item to the cache. + + + + + Container to hold the key and value to aid in removal from + the dictionary when an item is removed from cache. + + + + Estimates the size of a given Object using a given MemoryModel for primitive + size information. + + Resource Usage: + + Internally uses a Map to temporally hold a reference to every + object seen. + + If checkIntered, all Strings checked will be interned, but those + that were not already interned will be released for GC when the + estimate is complete. + + + + Constructs this object with an AverageGuessMemoryModel and + checkInterned = true. + + + + check if Strings are interned and don't add to size + if they are. Defaults to true but if you know the objects you are checking + won't likely contain many interned Strings, it will be faster to turn off + intern checking. + + + + MemoryModel to use for primitive object sizes. + + + + MemoryModel to use for primitive object sizes. + + check if Strings are interned and don't add to size + if they are. Defaults to true but if you know the objects you are checking + won't likely contain many interned Strings, it will be faster to turn off + intern checking. + + + + Return good default units based on byte size. + + + + Common util methods for dealing with s. + + + + Gathers sub-readers from reader into a List. + + + + + Returns sub IndexReader that contains the given document id. + + + id of document + + parent reader + + sub reader of parent which contains the specified doc id + + + + Returns sub-reader subIndex from reader. + + + parent reader + + index of desired sub reader + + the subreader at subINdex + + + + Returns index of the searcher/reader for document n in the + array used to construct this searcher/reader. + + + + A ScorerDocQueue maintains a partial ordering of its Scorers such that the + least Scorer can always be found in constant time. Put()'s and pop()'s + require log(size) time. The ordering is by Scorer.doc(). + + + + Create a ScorerDocQueue with a maximum size. + + + Adds a Scorer to a ScorerDocQueue in log(size) time. + If one tries to add more Scorers than maxSize + a RuntimeException (ArrayIndexOutOfBound) is thrown. + + + + Adds a Scorer to the ScorerDocQueue in log(size) time if either + the ScorerDocQueue is not full, or not lessThan(scorer, top()). + + + + true if scorer is added, false otherwise. + + + + Returns the least Scorer of the ScorerDocQueue in constant time. + Should not be used when the queue is empty. + + + + Returns document number of the least Scorer of the ScorerDocQueue + in constant time. + Should not be used when the queue is empty. + + + + Removes and returns the least scorer of the ScorerDocQueue in log(size) + time. + Should not be used when the queue is empty. + + + + Removes the least scorer of the ScorerDocQueue in log(size) time. + Should not be used when the queue is empty. + + + + Should be called when the scorer at top changes doc() value. + Still log(n) worst case, but it's at least twice as fast to + { pq.top().change(); pq.adjustTop(); } + instead of + { o = pq.pop(); o.change(); pq.push(o); } + + + + + Returns the number of scorers currently stored in the ScorerDocQueue. + + + Removes all entries from the ScorerDocQueue. + + + Simple lockless and memory barrier free String intern cache that is guaranteed + to return the same String instance as String.intern() does. + + + + Subclasses of StringInterner are required to + return the same single String object for all equal strings. + Depending on the implementation, this may not be + the same object returned as String.intern(). + + This StringInterner base class simply delegates to String.intern(). + + + + Returns a single object instance for each equal string. + + + Returns a single object instance for each equal string. + + + Size of the hash table, should be a power of two. + + Maximum length of each bucket, after which the oldest item inserted is dropped. + + + + Floating point numbers smaller than 32 bits. + + + $Id$ + + + + Converts a 32 bit float to an 8 bit float. +
Values less than zero are all mapped to zero. +
Values are truncated (rounded down) to the nearest 8 bit value. +
Values between zero and the smallest representable value + are rounded up. + +
+ the 32 bit float to be converted to an 8 bit float (byte) + + the number of mantissa bits to use in the byte, with the remainder to be used in the exponent + + the zero-point in the range of exponent values + + the 8 bit float representation + +
+ + Converts an 8 bit float to a 32 bit float. + + + floatToByte(b, mantissaBits=3, zeroExponent=15) +
smallest non-zero value = 5.820766E-10 +
largest value = 7.5161928E9 +
epsilon = 0.125 +
+
+ + byteToFloat(b, mantissaBits=3, zeroExponent=15) + + + floatToByte(b, mantissaBits=5, zeroExponent=2) +
smallest nonzero value = 0.033203125 +
largest value = 1984.0 +
epsilon = 0.03125 +
+
+ + byteToFloat(b, mantissaBits=5, zeroExponent=2) + + + Stores and iterate on sorted integers in compressed form in RAM.
+ The code for compressing the differences between ascending integers was + borrowed from and + .

+ NOTE: this class assumes the stored integers are doc Ids (hence why it + extends ). Therefore its assumes + can be used as sentinel. If you intent to use + this value, then make sure it's not used during search flow. +

+
+ + When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, + a SortedVIntList representing the index numbers of the set bits + will be smaller than that BitSet. + + + + Create a SortedVIntList from all elements of an array of integers. + + + A sorted array of non negative integers. + + + + Create a SortedVIntList from an array of integers. + An array of sorted non negative integers. + + The number of integers to be used from the array. + + + + Create a SortedVIntList from a BitSet. + A bit set representing a set of integers. + + + + Create a SortedVIntList from an OpenBitSet. + A bit set representing a set of integers. + + + + Create a SortedVIntList. + An iterator providing document numbers as a set of integers. + This DocIdSetIterator is iterated completely when this constructor + is called and it must provide the integers in non + decreasing order. + + + + An iterator over the sorted integers. + + + + The total number of sorted integers. + + + The size of the byte array storing the compressed sorted integers. + + + This DocIdSet implementation is cacheable. + + + Borrowed from Cglib. Allows custom swap so that two arrays can be sorted + at the same time. + + + + Methods for manipulating strings. + + + Expert: + The StringInterner implementation used by Lucene. + This shouldn't be changed to an incompatible implementation after other Lucene APIs have been used. + + + + Return the same string object for all equal strings + + + Compares two byte[] arrays, element by element, and returns the + number of elements common to both arrays. + + + The first byte[] to compare + + + The second byte[] to compare + + + The number of common elements. + + + + Compares two strings, character by character, and returns the + first position where the two strings differ from one another. + + + The first string to compare + + The second string to compare + + The first position where the two strings differ. + + + + Helper methods to ease implementing . + + + for printing boost only if not 1.0 + + + Class to encode java's UTF16 char[] into UTF8 byte[] + without always allocating a new byte[] as + String.getBytes("UTF-8") does. + +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Encode characters from a char[] source, starting at + offset and stopping when the character 0xffff is seen. + Returns the number of bytes written to bytesOut. + + + + Encode characters from a char[] source, starting at + offset for length chars. Returns the number of bytes + written to bytesOut. + + + + Encode characters from this String, starting at offset + for length characters. Returns the number of bytes + written to bytesOut. + + + + Convert UTF8 bytes into UTF16 characters. If offset + is non-zero, conversion starts at that starting point + in utf8, re-using the results from the previous call + up until offset. + + + + Use by certain classes to match version compatibility + across releases of Lucene. +

+ WARNING: When changing the version parameter + that you supply to components in Lucene, do not simply + change the version at search-time, but instead also adjust + your indexing code to match, and re-index. +

+
+ + Match settings and bugs in Lucene's 2.0 release. + + + Match settings and bugs in Lucene's 2.1 release. + + + Match settings and bugs in Lucene's 2.2 release. + + + Match settings and bugs in Lucene's 2.3 release. + + + Match settings and bugs in Lucene's 2.4 release. + + + Match settings and bugs in Lucene's 2.9 release. + + + + Match settings and bugs in Lucene's 3.0 release. + + Use this to get the latest and greatest settings, bug fixes, + etc, for Lucene. + + + + + +

WARNING: if you use this setting, and then + upgrade to a newer release of Lucene, sizable changes + may happen. If precise back compatibility is important + then you should instead explicitly specify an actual + version. + If you use this constant then you may need to + re-index all of your documents when upgrading + Lucene, as the way text is indexed may have changed. + Additionally, you may need to re-test your entire + application to ensure it behaves as expected, as + some defaults may have changed and may break functionality + in your application. +

+
+
+
diff --git a/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.dll b/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.dll new file mode 100644 index 0000000..713ba19 Binary files /dev/null and b/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.dll differ diff --git a/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.pdb b/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.pdb new file mode 100644 index 0000000..8ee3dea Binary files /dev/null and b/packages/Lucene.Net.3.0.3/lib/NET35/Lucene.Net.pdb differ diff --git a/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.XML b/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.XML new file mode 100644 index 0000000..1041be3 --- /dev/null +++ b/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.XML @@ -0,0 +1,21777 @@ + + + + Lucene.Net + + + + An Analyzer builds TokenStreams, which analyze text. It thus represents a + policy for extracting index terms from text. +

+ Typical implementations first build a Tokenizer, which breaks the stream of + characters from the Reader into raw Tokens. One or more TokenFilters may + then be applied to the output of the Tokenizer. +

+
+ + Creates a TokenStream which tokenizes all the text in the provided + Reader. Must be able to handle null field name for + backward compatibility. + + + + Creates a TokenStream that is allowed to be re-used + from the previous time that the same thread called + this method. Callers that do not need to use more + than one TokenStream at the same time from this + analyzer should use this method for better + performance. + + + + This is only present to preserve + back-compat of classes that subclass a core analyzer + and override tokenStream but not reusableTokenStream + + + Java uses Class<? extends Analyer> to constrain to + only Types that inherit from Analyzer. C# does not have a generic type class, + ie Type<t>. The method signature stays the same, and an exception may + still be thrown, if the method doesn't exist. + + + + Invoked before indexing a Fieldable instance if + terms have already been added to that field. This allows custom + analyzers to place an automatic position increment gap between + Fieldable instances using the same field name. The default value + position increment gap is 0. With a 0 position increment gap and + the typical default token position increment of 1, all terms in a field, + including across Fieldable instances, are in successive positions, allowing + exact PhraseQuery matches, for instance, across Fieldable instance boundaries. + + + Fieldable name being indexed. + + position increment gap, added to the next token emitted from + + + + Just like , except for + Token offsets instead. By default this returns 1 for + tokenized fields and, as if the fields were joined + with an extra space character, and 0 for un-tokenized + fields. This method is only called if the field + produced at least one token for indexing. + + + the field just indexed + + offset gap, added to the next token emitted from + + + + Frees persistent resources used by this Analyzer + + + Used by Analyzers that implement reusableTokenStream + to retrieve previously saved TokenStreams for re-use + by the same thread. + + + + This class converts alphabetic, numeric, and symbolic Unicode characters + which are not in the first 127 ASCII characters (the "Basic Latin" Unicode + block) into their ASCII equivalents, if one exists. + + Characters from the following Unicode blocks are converted; however, only + those characters with reasonable ASCII alternatives are converted: + + + C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf + Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf + Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf + Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf + Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf + Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf + IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf + Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf + Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf + General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf + Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf + Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf + Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf + Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf + Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf + Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf + + + See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode + + The set of character conversions supported by this class is a superset of + those supported by Lucene's which strips + accents from Latin1 characters. For example, 'À' will be replaced by + 'a'. + + + + A TokenFilter is a TokenStream whose input is another TokenStream. +

+ This is an abstract class; subclasses must override . + +

+ + +
+ + A TokenStream enumerates the sequence of tokens, either from + s of a or from query text. +

+ This is an abstract class. Concrete subclasses are: + + , a TokenStream whose input is a Reader; and + , a TokenStream whose input is another + TokenStream. + + A new TokenStream API has been introduced with Lucene 2.9. This API + has moved from being based to based. While + still exists in 2.9 as a convenience class, the preferred way + to store the information of a is to use s. +

+ TokenStream now extends , which provides + access to all of the token s for the TokenStream. + Note that only one instance per is created and reused + for every token. This approach reduces object creation and allows local + caching of references to the s. See + for further details. +

+ The workflow of the new TokenStream API is as follows: + + Instantiation of TokenStream/s which add/get + attributes to/from the . + The consumer calls . + The consumer retrieves attributes from the stream and stores local + references to all attributes it wants to access + The consumer calls until it returns false and + consumes the attributes after each call. + The consumer calls so that any end-of-stream operations + can be performed. + The consumer calls to release any resource when finished + using the TokenStream + + To make sure that filters and consumers know which attributes are available, + the attributes must be added during instantiation. Filters and consumers are + not required to check for availability of attributes in + . +

+ You can find some example code for the new API in the analysis package level + Javadoc. +

+ Sometimes it is desirable to capture a current state of a TokenStream + , e. g. for buffering purposes (see , + ). For this usecase + and + can be used. +

+
+ + An AttributeSource contains a list of different s, + and methods to add and get them. There can only be a single instance + of an attribute in the same AttributeSource instance. This is ensured + by passing in the actual type of the Attribute (Class<Attribute>) to + the , which then checks if an instance of + that type is already present. If yes, it returns the instance, otherwise + it creates a new instance and returns it. + + + + An AttributeSource using the default attribute factory . + + + An AttributeSource that uses the same attributes as the supplied one. + + + An AttributeSource using the supplied for creating new instances. + + + Returns a new iterator that iterates the attribute classes + in the same order they were added in. + Signature for Java 1.5: public Iterator<Class<? extends Attribute>> getAttributeClassesIterator() + + Note that this return value is different from Java in that it enumerates over the values + and not the keys + + + + Returns a new iterator that iterates all unique Attribute implementations. + This iterator may contain less entries that , + if one instance implements more than one Attribute interface. + Signature for Java 1.5: public Iterator<AttributeImpl> getAttributeImplsIterator() + + + + a cache that stores all interfaces for known implementation classes for performance (slow reflection) + + + + Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. +

Please note: It is not guaranteed, that att is added to + the AttributeSource, because the provided attributes may already exist. + You should always retrieve the wanted attributes using after adding + with this method and cast to your class. + The recommended way to use custom implementations is using an +

+
+
+ + The caller must pass in a Class<? extends Attribute> value. + This method first checks if an instance of that class is + already in this AttributeSource and returns it. Otherwise a + new instance is created, added to this AttributeSource and returned. + + + + The caller must pass in a Class<? extends Attribute> value. + Returns true, iff this AttributeSource contains the passed-in Attribute. + \ + + + + The caller must pass in a Class<? extends Attribute> value. + Returns the instance of the passed in Attribute contained in this AttributeSource + + + IllegalArgumentException if this AttributeSource does not contain the Attribute. + It is recommended to always use even in consumers + of TokenStreams, because you cannot know if a specific TokenStream really uses + a specific Attribute. will automatically make the attribute + available. If you want to only use the attribute, if it is available (to optimize + consuming), use . + + + + Resets all Attributes in this AttributeSource by calling + on each Attribute implementation. + + + + Captures the state of all Attributes. The return value can be passed to + to restore the state of this or another AttributeSource. + + + + Restores this state by copying the values of all attribute implementations + that this state contains into the attributes implementations of the targetStream. + The targetStream must contain a corresponding instance for each argument + contained in this state (e.g. it is not possible to restore the state of + an AttributeSource containing a TermAttribute into a AttributeSource using + a Token instance as implementation). + + Note that this method does not affect attributes of the targetStream + that are not contained in this state. In other words, if for example + the targetStream contains an OffsetAttribute, but this state doesn't, then + the value of the OffsetAttribute remains unchanged. It might be desirable to + reset its value to the default, in which case the caller should first + call on the targetStream. + + + + Performs a clone of all instances returned in a new + AttributeSource instance. This method can be used to e.g. create another TokenStream + with exactly the same attributes (using ) + + + + Returns the used AttributeFactory. + + + Returns true, iff this AttributeSource has any attributes + + + An AttributeFactory creates instances of s. + + + returns an for the supplied interface class. + + + This is the default factory that creates s using the + class name of the supplied interface class by appending Impl to it. + + + + This class holds the state of an AttributeSource. + + + + + + + A TokenStream using the default attribute factory. + + + A TokenStream that uses the same attributes as the supplied one. + + + A TokenStream using the supplied AttributeFactory for creating new instances. + + + Consumers (i.e., ) use this method to advance the stream to + the next token. Implementing classes must implement this method and update + the appropriate s with the attributes of the next + token. + + The producer must make no assumptions about the attributes after the + method has been returned: the caller may arbitrarily change it. If the + producer needs to preserve the state for subsequent calls, it can use + to create a copy of the current attribute state. + + This method is called for every token of a document, so an efficient + implementation is crucial for good performance. To avoid calls to + and , + references to all s that this stream uses should be + retrieved during instantiation. + + To ensure that filters and consumers know which attributes are available, + the attributes must be added during instantiation. Filters and consumers + are not required to check for availability of attributes in + . + + + false for end of stream; true otherwise + + + This method is called by the consumer after the last token has been + consumed, after returned false + (using the new TokenStream API). Streams implementing the old API + should upgrade to use this feature. +

+ This method can be used to perform any end-of-stream operations, such as + setting the final offset of a stream. The final offset of a stream might + differ from the offset of the last token eg in case one or more whitespaces + followed after the last token, but a was used. + +

+ IOException +
+ + Resets this stream to the beginning. This is an optional operation, so + subclasses may or may not implement this method. is not needed for + the standard indexing process. However, if the tokens of a + TokenStream are intended to be consumed more than once, it is + necessary to implement . Note that if your TokenStream + caches tokens and feeds them back again after a reset, it is imperative + that you clone the tokens when you store them away (on the first pass) as + well as when you return them (on future passes after ). + + + + Releases resources associated with this stream. + + + The source of tokens for this filter. + + + Construct a token stream filtering the given input. + + + Performs end-of-stream operations, if any, and calls then end() on the + input TokenStream.

+ NOTE: Be sure to call super.end() first when overriding this method. +

+
+ + Reset the filter as well as the input TokenStream. + + + Converts characters above ASCII to their ASCII equivalents. For example, + accents are removed from accented characters. + + The string to fold + + The number of characters in the input string + + + + + * Base utility class for implementing a . + * You subclass this, and then record mappings by calling + * , and then invoke the correct + * method to correct an offset. + + + + Subclasses of CharFilter can be chained to filter CharStream. + They can be used as with additional offset + correction. s will automatically use + if a CharFilter/CharStream subclass is used. + + + $Id$ + + + + + CharStream adds + functionality over . All Tokenizers accept a + CharStream instead of as input, which enables + arbitrary character based filtering before tokenization. + The method fixed offsets to account for + removal or insertion of characters, so that the offsets + reported in the tokens match the character offsets of the + original Reader. + + + + Called by CharFilter(s) and Tokenizer to correct token offset. + + + offset as seen in the output + + corrected offset based on the input + + + + Subclass may want to override to correct the current offset. + current offset + corrected offset + + + Chains the corrected offset through the input + CharFilter. + + + + This class can be used if the token attributes of a TokenStream + are intended to be consumed more than once. It caches + all token attribute states locally in a List. + +

CachingTokenFilter implements the optional method + , which repositions the + stream to the first Token. +

+
+ + A simple class that stores Strings as char[]'s in a + hash table. Note that this is not a general purpose + class. For example, it cannot remove items from the + set, nor does it resize its hash table to be smaller, + etc. It is designed to be quick to test if a char[] + is in the set without the necessity of converting it + to a String first. +

+ Please note: This class implements but + does not behave like it should in all cases. The generic type is + , because you can add any object to it, + that has a string representation. The add methods will use + and store the result using a + buffer. The same behaviour have the methods. + The method returns an IEnumerable. + For type safety also {@link #stringIterator()} is provided. +

+
+ + Create set with enough capacity to hold startSize + terms + + + + Create set from a Collection of char[] or String + + + Create set from entries + + + true if the len chars of text starting at off + are in the set + + + + Returns true if the String is in the set + + + Add this char[] directly to the set. + If ignoreCase is true for this Set, the text array will be directly modified. + The user should never modify this text array after calling this method. + + + + + Returns an unmodifiable . This allows to provide + unmodifiable views of internal sets for "read-only" use + + A Set for which the unmodifiable set it returns. + A new unmodifiable + ArgumentNullException of the given set is null + + + + returns a copy of the given set as a . If the given set + is a the ignoreCase property will be preserved. + + A set to copy + a copy of the given set as a . If the given set + is a the ignoreCase property will be preserved. + + + Adds all of the elements in the specified collection to this collection + + + Wrapper that calls UnionWith + + + + The IEnumerator<String> for this set. Strings are constructed on the fly, + so use nextCharArray for more efficient access + + + + do not modify the returned char[] + + + CharReader is a Reader wrapper. It reads chars from + Reader and outputs , defining an + identify function method that + simply returns the provided offset. + + + + An abstract base class for simple, character-oriented tokenizers. + + + A Tokenizer is a TokenStream whose input is a Reader. +

+ This is an abstract class; subclasses must override +

+ NOTE: Subclasses overriding must call + before setting attributes. +

+
+ + The text source for this Tokenizer. + + + Construct a tokenizer with null input. + + + Construct a token stream processing the given input. + + + Construct a tokenizer with null input using the given AttributeFactory. + + + Construct a token stream processing the given input using the given AttributeFactory. + + + Construct a token stream processing the given input using the given AttributeSource. + + + Construct a token stream processing the given input using the given AttributeSource. + + + Return the corrected offset. If is a subclass + this method calls , else returns currentOff. + + offset as seen in the output + + corrected offset based on the input + + + + + + Expert: Reset the tokenizer to a new reader. Typically, an + analyzer (in its reusableTokenStream method) will use + this to re-use a previously created tokenizer. + + + + Returns true iff a character should be included in a token. This + tokenizer generates as tokens adjacent sequences of characters which + satisfy this predicate. Characters for which this is false are used to + define token boundaries and are not included in tokens. + + + + Called on each token character to normalize it before it is added to the + token. The default implementation does nothing. Subclasses may use this + to, e.g., lowercase tokens. + + + + A filter that replaces accented characters in the ISO Latin 1 character set + (ISO-8859-1) by their unaccented equivalent. The case will not be altered. +

+ For instance, 'À' will be replaced by 'a'. +

+ +

+ If you build a new index, use + which covers a superset of Latin 1. + This class is included for use with existing indexes and will be removed + in a future release (possible Lucene 4.0) + +
+ + To replace accented characters in a String by unaccented equivalents. + + + "Tokenizes" the entire stream as a single token. This is useful + for data like zip codes, ids, and some product names. + + + + Emits the entire input as a single token. + + + Removes words that are too long or too short from the stream. + + + Build a filter that removes words that are too long or too + short from the text. + + + + Returns the next input Token whose term() is the right len + + + A LetterTokenizer is a tokenizer that divides text at non-letters. That's + to say, it defines tokens as maximal strings of adjacent letters, as defined + by java.lang.Character.isLetter() predicate. + Note: this does a decent job for most European languages, but does a terrible + job for some Asian languages, where words are not separated by spaces. + + + + Construct a new LetterTokenizer. + + + Construct a new LetterTokenizer using a given . + + + Construct a new LetterTokenizer using a given . + + + Collects only characters which satisfy + . + + + + Normalizes token text to lower case. + + + LowerCaseTokenizer performs the function of LetterTokenizer + and LowerCaseFilter together. It divides text at non-letters and converts + them to lower case. While it is functionally equivalent to the combination + of LetterTokenizer and LowerCaseFilter, there is a performance advantage + to doing the two tasks at once, hence this (redundant) implementation. +

+ Note: this does a decent job for most European languages, but does a terrible + job for some Asian languages, where words are not separated by spaces. +

+
+ + Construct a new LowerCaseTokenizer. + + + Construct a new LowerCaseTokenizer using a given . + + + Construct a new LowerCaseTokenizer using a given . + + + Converts char to lower case + . + + + + Simplistic that applies the mappings + contained in a to the character + stream, and correcting the resulting changes to the + offsets. + + + + Default constructor that takes a . + + + Easy-use constructor that takes a . + + + Holds a map of String input to String output, to be used + with . + + + + Records a replacement to be applied to the inputs + stream. Whenever singleMatch occurs in + the input, it will be replaced with + replacement. + + + input String to be replaced + + output String + + + + Expert: This class provides a + for indexing numeric values that can be used by + or . + +

Note that for simple usage, is + recommended. disables norms and + term freqs, as they are not usually needed during + searching. If you need to change these settings, you + should use this class. + +

See for capabilities of fields + indexed numerically.

+ +

Here's an example usage, for an int field: + + + Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value)); + field.setOmitNorms(true); + field.setOmitTermFreqAndPositions(true); + document.add(field); + + +

For optimal performance, re-use the TokenStream and Field instance + for more than one document: + + + NumericTokenStream stream = new NumericTokenStream(precisionStep); + Field field = new Field(name, stream); + field.setOmitNorms(true); + field.setOmitTermFreqAndPositions(true); + Document document = new Document(); + document.add(field); + + for(all documents) { + stream.setIntValue(value) + writer.addDocument(document); + } + + +

This stream is not intended to be used in analyzers; + it's more for iterating the different precisions during + indexing a specific numeric value.

+ +

NOTE: as token streams are only consumed once + the document is added to the index, if you index more + than one numeric field, use a separate NumericTokenStream + instance for each.

+ +

See for more details on the + precisionStep + parameter as well as how numeric fields work under the hood.

+ +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + Since 2.9 +

+
+ + The full precision token gets this token type assigned. + + + The lower precision tokens gets this token type assigned. + + + Creates a token stream for numeric values using the default precisionStep + (4). The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Creates a token stream for numeric values with the specified + precisionStep. The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Expert: Creates a token stream for numeric values with the specified + precisionStep using the given . + The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Expert: Creates a token stream for numeric values with the specified + precisionStep using the given + . + The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Initializes the token stream with the supplied long value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value)) + + + + Initializes the token stream with the supplied int value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value)) + + + + Initializes the token stream with the supplied double value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value)) + + + + Initializes the token stream with the supplied float value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value)) + + + + This analyzer is used to facilitate scenarios where different + fields require different analysis techniques. Use + to add a non-default analyzer on a field name basis. + +

Example usage: + + + PerFieldAnalyzerWrapper aWrapper = + new PerFieldAnalyzerWrapper(new StandardAnalyzer()); + aWrapper.addAnalyzer("firstname", new KeywordAnalyzer()); + aWrapper.addAnalyzer("lastname", new KeywordAnalyzer()); + + +

In this example, StandardAnalyzer will be used for all fields except "firstname" + and "lastname", for which KeywordAnalyzer will be used. + +

A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing + and query parsing. +

+
+ + Constructs with default analyzer. + + + Any fields not specifically + defined to use a different analyzer will use the one provided here. + + + + Constructs with default analyzer and a map of analyzers to use for + specific fields. + + + Any fields not specifically + defined to use a different analyzer will use the one provided here. + + a Map (String field name to the Analyzer) to be + used for those fields + + + + Defines an analyzer to use for the specified field. + + + field name requiring a non-default analyzer + + non-default analyzer to use for field + + + + Return the positionIncrementGap from the analyzer assigned to fieldName + + + Return the offsetGap from the analyzer assigned to field + + + Transforms the token stream as per the Porter stemming algorithm. + Note: the input to the stemming filter must already be in lower case, + so you will need to use LowerCaseFilter or LowerCaseTokenizer farther + down the Tokenizer chain in order for this to work properly! +

+ To use this filter with other analyzers, you'll want to write an + Analyzer class that sets up the TokenStream chain as you want it. + To use this with LowerCaseTokenizer, for example, you'd write an + analyzer like this: +

+ + class MyAnalyzer extends Analyzer { + public final TokenStream tokenStream(String fieldName, Reader reader) { + return new PorterStemFilter(new LowerCaseTokenizer(reader)); + } + } + +

+
+ + + Stemmer, implementing the Porter Stemming Algorithm + + The Stemmer class transforms a word into its root form. The input + word can be provided a character at time (by calling add()), or at once + by calling one of the various stem(something) methods. + + + + reset() resets the stemmer so it can stem another word. If you invoke + the stemmer by calling add(char) and then stem(), you must call reset() + before starting another word. + + + + Add a character to the word being stemmed. When you are finished + adding characters, you can call stem(void) to process the word. + + + + After a word has been stemmed, it can be retrieved by toString(), + or a reference to the internal buffer can be retrieved by getResultBuffer + and getResultLength (which is generally more efficient.) + + + + Stem a word provided as a String. Returns the result as a String. + + + Stem a word contained in a char[]. Returns true if the stemming process + resulted in a word different from the input. You can retrieve the + result with getResultLength()/getResultBuffer() or toString(). + + + + Stem a word contained in a portion of a char[] array. Returns + true if the stemming process resulted in a word different from + the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Stem a word contained in a leading portion of a char[] array. + Returns true if the stemming process resulted in a word different + from the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Stem the word placed into the Stemmer buffer through calls to add(). + Returns true if the stemming process resulted in a word different + from the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Test program for demonstrating the Stemmer. It reads a file and + stems each word, writing the result to standard out. + Usage: Stemmer file-name + + + + Returns the length of the word resulting from the stemming process. + + + Returns a reference to a character buffer containing the results of + the stemming process. You also need to consult getResultLength() + to determine the length of the result. + + + + An that filters + with + + + + Filters with , + and , using a list of English stop + words. + + +

+ You must specify the required compatibility when creating + StandardAnalyzer: + + As of 2.9, StopFilter preserves position increments + As of 2.4, Tokens incorrectly identified as acronyms are corrected (see + LUCENE-1608) + +

+
+ + Default maximum allowed token length + + + Specifies whether deprecated acronyms should be replaced with HOST type. + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + + Specifies whether deprecated acronyms should be replaced with HOST type. + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + + An unmodifiable set containing some common English words that are usually not + useful for searching. + + + + Builds an analyzer with the default stop words (). + + Lucene version to match see above + + + Builds an analyzer with the given stop words. + Lucene version to match See above /> + + + stop words + + + + Builds an analyzer with the stop words from the given file. + + + Lucene version to match See above /> + + + File to read stop words from + + + + Builds an analyzer with the stop words from the given reader. + + + Lucene version to match See above /> + + + Reader to read stop words from + + + + Constructs a filtered by a + , a and a . + + + + Set maximum allowed token length. If a token is seen + that exceeds this length then it is discarded. This + setting only takes effect the next time tokenStream or + reusableTokenStream is called. + + + + Normalizes tokens extracted with . + + + Construct filtering in. + + + Returns the next token in the stream, or null at EOS. +

Removes 's from the end of words. +

Removes dots from acronyms. +

+
+ + A grammar-based tokenizer constructed with JFlex + +

This should be a good tokenizer for most European-language documents: + + + Splits words at punctuation characters, removing punctuation. However, a + dot that's not followed by whitespace is considered part of a token. + Splits words at hyphens, unless there's a number in the token, in which case + the whole token is interpreted as a product number and is not split. + Recognizes email addresses and internet hostnames as one token. + + +

Many applications have specific tokenizer needs. If this tokenizer does + not suit your application, please consider copying this source code + directory to your project and maintaining your own grammar-based tokenizer. + + +

+ You must specify the required compatibility when creating + StandardAnalyzer: + + As of 2.4, Tokens incorrectly identified as acronyms are corrected (see + LUCENE-1608 + +

+
+ + this solves a bug where HOSTs that end with '.' are identified + as ACRONYMs. + + + + A private instance of the JFlex-constructed scanner + + + String token types that correspond to token type int constants + + + Creates a new instance of the + . Attaches + the input to the newly created JFlex scanner. + + + + The input reader + + See http://issues.apache.org/jira/browse/LUCENE-1068 + + + + Creates a new StandardTokenizer with a given . + + + Creates a new StandardTokenizer with a given + + + + + + (non-Javadoc) + + + + + + Remove in 3.X and make true the only valid value + See https://issues.apache.org/jira/browse/LUCENE-1068 + + Set to true to replace mischaracterized acronyms as HOST. + + + + Set the max allowed token length. Any token longer + than this is skipped. + + + + This class is a scanner generated by + JFlex 1.4.1 + on 9/4/08 6:49 PM from the specification file + /tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex + + + + This character denotes the end of file + + + initial size of the lookahead buffer + + + lexical states + + + Translates characters to character classes + + + Translates characters to character classes + + + Translates DFA states to action switch labels. + + + Translates a state to a row index in the transition table + + + The transition table of the DFA + + + ZZ_ATTRIBUTE[aState] contains the attributes of state aState + + + the input device + + + the current state of the DFA + + + the current lexical state + + + this buffer contains the current text to be matched and is + the source of the yytext() string + + + + the textposition at the last accepting state + + + the textposition at the last state to be included in yytext + + + the current text position in the buffer + + + startRead marks the beginning of the yytext() string in the buffer + + + endRead marks the last character in the buffer, that has been read + from input + + + + number of newlines encountered up to the start of the matched text + + + the number of characters up to the start of the matched text + + + the number of characters from the last newline up to the start of the + matched text + + + + zzAtBOL == true <=> the scanner is currently at the beginning of a line + + + zzAtEOF == true <=> the scanner is at the EOF + + + this solves a bug where HOSTs that end with '.' are identified + as ACRONYMs. + + + + Fills Lucene token with the current token text. + + + Fills TermAttribute with the current token text. + + + Creates a new scanner + There is also a java.io.InputStream version of this constructor. + + + the java.io.Reader to read input from. + + + + Creates a new scanner. + There is also java.io.Reader version of this constructor. + + + the java.io.Inputstream to read input from. + + + + Unpacks the compressed character translation table. + + + the packed character translation table + + the unpacked character translation table + + + + Refills the input buffer. + + false, iff there was new input. + + + if any I/O-Error occurs + + + + Closes the input stream. + + + Resets the scanner to read from a new input stream. + Does not close the old reader. + + All internal variables are reset, the old input stream + cannot be reused (internal buffer is discarded and lost). + Lexical state is set to ZZ_INITIAL. + + + the new input stream + + + + Returns the current lexical state. + + + Enters a new lexical state + + + the new lexical state + + + + Returns the text matched by the current regular expression. + + + Returns the character at position pos from the + matched text. + + It is equivalent to yytext().charAt(pos), but faster + + + the position of the character to fetch. + A value from 0 to yylength()-1. + + + the character at position pos + + + + Returns the length of the matched text region. + + + Reports an error that occured while scanning. + + In a wellformed scanner (no or only correct usage of + yypushback(int) and a match-all fallback rule) this method + will only be called with things that "Can't Possibly Happen". + If this method is called, something is seriously wrong + (e.g. a JFlex bug producing a faulty scanner etc.). + + Usual syntax/scanner level error handling should be done + in error fallback rules. + + + the code of the errormessage to display + + + + Pushes the specified amount of characters back into the input stream. + + They will be read again by then next call of the scanning method + + + the number of characters to be read again. + This number must not be greater than yylength()! + + + + Resumes scanning until the next regular expression is matched, + the end of input is encountered or an I/O-Error occurs. + + + the next token + + if any I/O-Error occurs + + + + Filters with and + . + + +

+ You must specify the required compatibility when creating + StopAnalyzer: + + As of 2.9, position increments are preserved + +

+
+ + An unmodifiable set containing some common English words that are not usually useful + for searching. + + + + Builds an analyzer which removes words in ENGLISH_STOP_WORDS. + + + Builds an analyzer with the stop words from the given set. + + + Builds an analyzer with the stop words from the given file. + + + + + See above + + File to load stop words from + + + + Builds an analyzer with the stop words from the given reader. + + + See above + + Reader to load stop words from + + + + Filters LowerCaseTokenizer with StopFilter. + + + Filters LowerCaseTokenizer with StopFilter. + + + Removes stop words from a token stream. + + + Construct a token stream filtering the given input. + If stopWords is an instance of (true if + makeStopSet() was used to construct the set) it will be directly used + and ignoreCase will be ignored since CharArraySet + directly controls case sensitivity. +

+ If stopWords is not an instance of , + a new CharArraySet will be constructed and ignoreCase will be + used to specify the case sensitivity of that set. +

+ true if token positions should record the removed stop words + Input TokenStream + A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords + if true, all words are lower cased first +
+ + Constructs a filter which removes words from the input + TokenStream that are named in the Set. + + true if token positions should record the removed stop words + Input stream + A Set of strings or char[] or any other ToString()-able set representing the stopwords + + + + Builds a Set from an array of stop words, + appropriate for passing into the StopFilter constructor. + This permits this stopWords construction to be cached once when + an Analyzer is constructed. + + + passing false to ignoreCase + + + Builds a Set from an array of stop words, + appropriate for passing into the StopFilter constructor. + This permits this stopWords construction to be cached once when + an Analyzer is constructed. + + A list of strings or char[] or any other ToString()-able list representing the stop words + passing false to ignoreCase + + + + An array of stopwords + If true, all words are lower cased first. + a Set containing the words + + + + A List of Strings or char[] or any other toString()-able list representing the stopwords + if true, all words are lower cased first + A Set ()containing the words + + + Returns the next input Token whose term() is not a stop word. + + + Returns version-dependent default for enablePositionIncrements. Analyzers + that embed StopFilter use this method when creating the StopFilter. Prior + to 2.9, this returns false. On 2.9 or later, it returns true. + + + + If true, this StopFilter will preserve + positions of the incoming tokens (ie, accumulate and + set position increments of the removed stop tokens). + Generally, true is best as it does not + lose information (positions of the original tokens) + during indexing. + +

When set, when a token is stopped + (omitted), the position increment of the following + token is incremented. + +

NOTE: be sure to also + set if + you use QueryParser to create queries. +

+
+ + This TokenFilter provides the ability to set aside attribute states + that have already been analyzed. This is useful in situations where multiple fields share + many common analysis steps and then go their separate ways. +

+ It is also useful for doing things like entity extraction or proper noun analysis as + part of the analysis workflow and saving off those tokens for use in another field. + + + TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1)); + TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream(); + TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream(); + TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2)); + source2.addSinkTokenStream(sink1); + source2.addSinkTokenStream(sink2); + TokenStream final1 = new LowerCaseFilter(source1); + TokenStream final2 = source2; + TokenStream final3 = new EntityDetect(sink1); + TokenStream final4 = new URLDetect(sink2); + d.add(new Field("f1", final1)); + d.add(new Field("f2", final2)); + d.add(new Field("f3", final3)); + d.add(new Field("f4", final4)); + + In this example, sink1 and sink2 will both get tokens from both + reader1 and reader2 after whitespace tokenizer + and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. + It is important, that tees are consumed before sinks (in the above example, the field names must be + less the sink's field names). If you are not sure, which stream is consumed first, you can simply + add another sink and then pass all tokens to the sinks at once using . + This TokenFilter is exhausted after this. In the above example, change + the example above to: + + ... + TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream()); + TokenStream final2 = source2.newSinkTokenStream(); + sink1.consumeAllTokens(); + sink2.consumeAllTokens(); + ... + + In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready. +

Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. +

+
+ + Instantiates a new TeeSinkTokenFilter. + + + Returns a new that receives all tokens consumed by this stream. + + + Returns a new that receives all tokens consumed by this stream + that pass the supplied filter. + + + + + + Adds a created by another TeeSinkTokenFilter + to this one. The supplied stream will also receive all consumed tokens. + This method can be used to pass tokens from two different tees to one sink. + + + + TeeSinkTokenFilter passes all tokens to the added sinks + when itself is consumed. To be sure, that all tokens from the input + stream are passed to the sinks, you can call this methods. + This instance is exhausted after this, but all sinks are instant available. + + + + A filter that decides which states to store in the sink. + + + Returns true, iff the current state of the passed-in shall be stored + in the sink. + + + + Called by . This method does nothing by default + and can optionally be overridden. + + + + A Token is an occurrence of a term from the text of a field. It consists of + a term's text, the start and end offset of the term in the text of the field, + and a type string. +

+ The start and end offsets permit applications to re-associate a token with + its source text, e.g., to display highlighted query terms in a document + browser, or to show matching text fragments in a KWIC display, etc. +

+ The type is a string, assigned by a lexical analyzer + (a.k.a. tokenizer), naming the lexical or syntactic class that the token + belongs to. For example an end of sentence marker token might be implemented + with type "eos". The default token type is "word". +

+ A Token can optionally have metadata (a.k.a. Payload) in the form of a variable + length byte array. Use and + to retrieve the payloads from the index. +

+

+
+

NOTE: As of 2.9, Token implements all interfaces + that are part of core Lucene and can be found in the namespace. + Even though it is not necessary to use Token anymore, with the new TokenStream API it can + be used as convenience class that implements all s, which is especially useful + to easily switch from the old to the new TokenStream API. +

+

Tokenizers and TokenFilters should try to re-use a Token instance when + possible for best performance, by implementing the + API. + Failing that, to create a new Token you should first use + one of the constructors that starts with null text. To load + the token from a char[] use . + To load from a String use or . + Alternatively you can get the Token's termBuffer by calling either , + if you know that your text is shorter than the capacity of the termBuffer + or , if there is any possibility + that you may need to grow the buffer. Fill in the characters of your term into this + buffer, with if loading from a string, + or with , and finally call to + set the length of the term text. See LUCENE-969 + for details.

+

Typical Token reuse patterns: + + Copying text from a string (type is reset to if not + specified):
+ + return reusableToken.reinit(string, startOffset, endOffset[, type]); + +
+ Copying some text from a string (type is reset to + if not specified):
+ + return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]); + +
+ Copying text from char[] buffer (type is reset to + if not specified):
+ + return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]); + +
+ Copying some text from a char[] buffer (type is reset to + if not specified):
+ + return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]); + +
+ Copying from one one Token to another (type is reset to + if not specified):
+ + return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]); + +
+
+ A few things to note: + + clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one. + Because TokenStreams can be chained, one cannot assume that the Token's current type is correct. + The startOffset and endOffset represent the start and offset in the + source text, so be careful in adjusting them. + When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again. + +

+

+ + +
+ + Base class for Attributes that can be added to a + . +

+ Attributes are used to add data in a dynamic, yet type-safe way to a source + of usually streamed objects, e. g. a . +

+
+ + Base interface for attributes. + + + Clears the values in this AttributeImpl and resets it to its + default value. If this implementation implements more than one Attribute interface + it clears all. + + + + The default implementation of this method accesses all declared + fields of this object and prints the values in the following syntax: + + + public String toString() { + return "start=" + startOffset + ",end=" + endOffset; + } + + + This method may be overridden by subclasses. + + + + Subclasses must implement this method and should compute + a hashCode similar to this: + + public int hashCode() { + int code = startOffset; + code = code * 31 + endOffset; + return code; + } + + + see also + + + + All values used for computation of + should be checked here for equality. + + see also + + + + Copies the values from this Attribute into the passed-in + target attribute. The target implementation must support all the + Attributes this implementation supports. + + + + Shallow clone. Subclasses must override this if they + need to clone any members deeply, + + + + The term text of a Token. + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use + to increase it. After + altering the buffer be sure to call + to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + , + , or + + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use first. + + the truncated length + + + + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use and + directly instead. If you really need a + String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + A Token's lexical type. The Default value is "word". + + + Gets or sets this Token's lexical type. Defaults to "word". + + + The positionIncrement determines the position of this token + relative to the previous Token in a TokenStream, used in phrase + searching. + +

The default value is one. + +

Some common uses for this are: + + Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token. + + Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words. + + + +

+ + +
+ + Gets or sets the position increment. The default value is one. + + + the distance from the prior term + + + This attribute can be used to pass different flags down the chain, + eg from one TokenFilter to another one. + + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + The flags can be used to encode information about the token for use by other s. + + +

+ The bits +
+ + The start and end character offset of a Token. + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. + + + + Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + The payload of a Token. See also . + + + Returns this Token's payload. + + + Constructs a Token will null text. + + + Constructs a Token with null text and start & end + offsets. + + start offset in the source text + end offset in the source text + + + Constructs a Token with null text and start & end + offsets plus the Token type. + + start offset in the source text + end offset in the source text + the lexical type of this Token + + + Constructs a Token with null text and start & end + offsets plus flags. NOTE: flags is EXPERIMENTAL. + + start offset in the source text + end offset in the source text + The bits to set for this token + + + Constructs a Token with the given term text, and start + & end offsets. The type defaults to "word." + NOTE: for better indexing speed you should + instead use the char[] termBuffer methods to set the + term text. + + term text + start offset + end offset + + + Constructs a Token with the given text, start and end + offsets, & type. NOTE: for better indexing + speed you should instead use the char[] termBuffer + methods to set the term text. + + term text + start offset + end offset + token type + + + Constructs a Token with the given text, start and end + offsets, & type. NOTE: for better indexing + speed you should instead use the char[] termBuffer + methods to set the term text. + + + + + token type bits + + + Constructs a Token with the given term buffer (offset + & length), start and end + offsets + + + + + + + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + the index in the buffer of the first character to copy + the number of characters to copy + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use + to increase it. After + altering the buffer be sure to call + to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + , + , or + + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Allocates a buffer char[] of at least newSize, without preserving the existing content. + its always used in places that set the content + + minimum size of the buffer + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use first. + + the truncated length + + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Resets the term text, payload, flags, and positionIncrement, + startOffset, endOffset and token type to default. + + + + Makes a clone, but replaces the term buffer & + start/end offset in the process. This is more + efficient than doing a full clone (and then calling + setTermBuffer) because it saves a wasted copy of the old + termBuffer. + + + + Shorthand for calling , + , + , + , + + + this Token instance + + + + Shorthand for calling , + , + , + + on Token.DEFAULT_TYPE + + this Token instance + + + + Shorthand for calling , + , + , + + + + this Token instance + + + + Shorthand for calling , + , + , + + + + this Token instance + + + + Shorthand for calling , + , + , + + on Token.DEFAULT_TYPE + + this Token instance + + + + Shorthand for calling , + , + , + + on Token.DEFAULT_TYPE + + this Token instance + + + + Copy the prototype token's fields into this one. Note: Payloads are shared. + + + + + Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + + + + + + + Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + + + + + + + + + + + + Convenience factory that returns Token as implementation for the basic + attributes and return the default impl (with "Impl" appended) for all other + attributes. + @since 3.0 + + + + Set the position increment. This determines the position of this token + relative to the previous Token in a , used in phrase + searching. + +

The default value is one. + +

Some common uses for this are: + + Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token. + + Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words. + + +

+ the distance from the prior term + + +
+ + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use and + directly instead. If you really need a + String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + Gets or sets this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to , as the term text may have been altered by a + stemmer or some other filter. + + + + Gets or sets this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + Returns this Token's lexical type. Defaults to "word". + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + The flags can be used to encode information about the token for use by other s. + + +

+ The bits +
+ + Returns this Token's payload. + + + + Expert: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes + and for all other attributes calls the given delegate factory. + + + + + Expert: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes + and for all other attributes calls the given delegate factory. + + + + This attribute can be used to pass different flags down the tokenizer chain, + eg from one TokenFilter to another one. + + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + The flags can be used to encode information about the token for use by other s. + + +

+ The bits +
+ + The start and end character offset of a Token. + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. + + + + Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + The payload of a Token. See also . + + + Initialize this attribute with no payload. + + + Initialize this attribute with the given payload. + + + Returns this Token's payload. + + + The positionIncrement determines the position of this token + relative to the previous Token in a , used in phrase + searching. + +

The default value is one. + +

Some common uses for this are: + + Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token. + + Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words. + + +

+
+ + Set the position increment. The default value is one. + + + the distance from the prior term + + + The term text of a Token. + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use + to increase it. After + altering the buffer be sure to call + to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + , + , or + + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Allocates a buffer char[] of at least newSize, without preserving the existing content. + its always used in places that set the content + + minimum size of the buffer + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use first. + + the truncated length + + + + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use and + directly instead. If you + really need a String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + A Token's lexical type. The Default value is "word". + + + Returns this Token's lexical type. Defaults to "word". + + + An Analyzer that uses . + + + A WhitespaceTokenizer is a tokenizer that divides text at whitespace. + Adjacent sequences of non-Whitespace characters form tokens. + + + + Construct a new WhitespaceTokenizer. + + + Construct a new WhitespaceTokenizer using a given . + + + Construct a new WhitespaceTokenizer using a given . + + + Collects only characters which do not satisfy + . + + + + Loader for text files that represent a list of stopwords. + + + Loads a text file and adds every line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the file should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + File containing the wordlist + A HashSet with the file's words + + + Loads a text file and adds every non-comment line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the file should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + File containing the wordlist + The comment string to ignore + A HashSet with the file's words + + + Reads lines from a Reader and adds every line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the Reader should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + Reader containing the wordlist + A HashSet with the reader's words + + + Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the Reader should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + + Reader containing the wordlist + + The string representing a comment. + + A HashSet with the reader's words + + + + Reads a stem dictionary. Each line contains: + word\tstem + (i.e. two tab seperated words) + + + stem dictionary that overrules the stemming algorithm + + IOException + + + + + + + + + Synonymous with . + +

WARNING: This interface may change within minor versions, despite Lucene's backward compatibility requirements. + This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards + compatibility promises remain intact. For example, Lucene can still + read and write indices created within the same major version. +

+ + +

+
+ + Return the raw byte[] for the binary field. Note that + you must also call and + to know which range of bytes in this + returned array belong to the field. + + reference to the Field value as byte[]. + + + Return the raw byte[] for the binary field. Note that + you must also call and + to know which range of bytes in this + returned array belong to the field.

+ About reuse: if you pass in the result byte[] and it is + used, likely the underlying implementation will hold + onto this byte[] and return it in future calls to + or . + So if you subsequently re-use the same byte[] elsewhere + it will alter this Fieldable's value. +

+ User defined buffer that will be used if + possible. If this is null or not large enough, a new + buffer is allocated + + reference to the Field value as byte[]. + +
+ + Gets or sets the boost factor for hits for this field. This value will be + multiplied into the score of all hits on this this field of this + document. + +

The boost is multiplied by of the document + containing this field. If a document has multiple fields with the same + name, all such values are multiplied together. This product is then + used to compute the norm factor for the field. By + default, in the + method, the boost value is multiplied + by the + and then rounded by before it is stored in the + index. One should attempt to ensure that this product does not overflow + the range of that encoding. + +

The default value is 1.0. + +

Note: this value is not stored directly with the document in the index. + Documents returned from and + may thus not have the same value present as when + this field was indexed. + +

+ + + + + + +
+ + Returns the name of the field as an interned string. + For example "date", "title", "body", ... + + + + The value of the field as a String, or null. +

+ For indexing, if isStored()==true, the stringValue() will be used as the stored field value + unless isBinary()==true, in which case GetBinaryValue() will be used. + + If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. + If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null, + else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens. +

+
+ + The value of the field as a Reader, which can be used at index time to generate indexed tokens. + + + + + The TokenStream for this field to be used when indexing, or null. + + + + + True if the value of the field is to be stored in the index for return + with search hits. + + + + True if the value of the field is to be indexed, so that it may be + searched on. + + + + True if the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. + + + + True if the term or terms used to index this field are stored as a term + vector, available from . + These methods do not provide access to the original content of the field, + only to terms used to index it. If the original content must be + preserved, use the stored attribute instead. + + + + + + + True if terms are stored as term vector together with their offsets + (start and end positon in source text). + + + + True if terms are stored as term vector together with their token positions. + + + True if the value of the field is stored as binary + + + + True if norms are omitted for this indexed field. + + Expert: + If set, omit normalization factors associated with this indexed field. + This effectively disables indexing boosts and length normalization for this field. + + + + + Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving + it's values via or is only valid as long as the that + retrieved the is still open. + + + true if this field can be loaded lazily + + + Returns offset into byte[] segment that is used as value, if Field is not binary + returned value is undefined + + index of the first character in byte[] segment that represents this Field value + + + Returns length of byte[] segment that is used as value, if Field is not binary + returned value is undefined + + length of byte[] segment that represents this Field value + + + Expert: + + If set, omit term freq, positions and payloads from + postings for this field. + + + NOTE: While this option reduces storage space + required in the index, it also means any query + requiring positional information, such as + or + + subclasses will silently fail to find results. + + + + Return the raw byte[] for the binary field. Note that + you must also call and + to know which range of bytes in this + returned array belong to the field. + + reference to the Field value as byte[]. + + + Prints a Field for human consumption. + + + Gets or sets the boost factor for hits for this field. + +

The default value is 1.0. + +

Note: this value is not stored directly with the document in the index. + Documents returned from and + may thus not have the same value present as when + this field was indexed. +

+
+ + Returns the name of the field as an interned string. + For example "date", "title", "body", ... + + + + True iff the value of the field is to be stored in the index for return + with search hits. It is an error for this to be true if a field is + Reader-valued. + + + + True iff the value of the field is to be indexed, so that it may be + searched on. + + + + True iff the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. + + + + True iff the term or terms used to index this field are stored as a term + vector, available from . + These methods do not provide access to the original content of the field, + only to terms used to index it. If the original content must be + preserved, use the stored attribute instead. + + + + + + + True iff terms are stored as term vector together with their offsets + (start and end position in source text). + + + + True iff terms are stored as term vector together with their token positions. + + + True iff the value of the filed is stored as binary + + + Returns length of byte[] segment that is used as value, if Field is not binary + returned value is undefined + + length of byte[] segment that represents this Field value + + + Returns offset into byte[] segment that is used as value, if Field is not binary + returned value is undefined + + index of the first character in byte[] segment that represents this Field value + + + True if norms are omitted for this indexed field + + + Expert: + + If set, omit term freq, positions and payloads from + postings for this field. + +

NOTE: While this option reduces storage space + required in the index, it also means any query + requiring positional information, such as + or subclasses will + silently fail to find results. +

+
+ + Simple utility class providing static methods to + compress and decompress binary data for stored fields. + This class uses java.util.zip.Deflater and Inflater + classes to compress and decompress. + + + + Compresses the specified byte range using the + specified compressionLevel (constants are defined in + java.util.zip.Deflater). + + + + Compresses the specified byte range, with default BEST_COMPRESSION level + + + Compresses all bytes in the array, with default BEST_COMPRESSION level + + + Compresses the String value, with default BEST_COMPRESSION level + + + Compresses the String value using the specified + compressionLevel (constants are defined in + java.util.zip.Deflater). + + + + Decompress the byte array previously returned by + compress + + + + Decompress the byte array previously returned by + compressString back into a String + + + + Provides support for converting dates to strings and vice-versa. + The strings are structured so that lexicographic sorting orders by date, + which makes them suitable for use as field values and search terms. + +

Note that this class saves dates with millisecond granularity, + which is bad for and , as those + queries are expanded to a BooleanQuery with a potentially large number + of terms when searching. Thus you might want to use + instead. + +

+ Note: dates before 1970 cannot be used, and therefore cannot be + indexed when using this class. See for an + alternative without such a limitation. + +

+ Another approach is , which provides + a sortable binary representation (prefix encoded) of numeric values, which + date/time are. + For indexing a , convert it to unix timestamp as + long and + index this as a numeric value with + and use to query it. + +

+ If you build a new index, use or + instead. + This class is included for use with existing + indices and will be removed in a future (possibly Lucene 4.0) + +
+ + Converts a Date to a string suitable for indexing. + RuntimeException if the date specified in the + method argument is before 1970 + + + + Converts a millisecond time to a string suitable for indexing. + RuntimeException if the time specified in the + method argument is negative, that is, before 1970 + + + + Converts a string-encoded date into a millisecond time. + + + Converts a string-encoded date into a Date object. + + + Provides support for converting dates to strings and vice-versa. + The strings are structured so that lexicographic sorting orders + them by date, which makes them suitable for use as field values + and search terms. + +

This class also helps you to limit the resolution of your dates. Do not + save dates with a finer resolution than you really need, as then + RangeQuery and PrefixQuery will require more memory and become slower. + +

Compared to the strings generated by the methods + in this class take slightly more space, unless your selected resolution + is set to Resolution.DAY or lower. + +

+ Another approach is , which provides + a sortable binary representation (prefix encoded) of numeric values, which + date/time are. + For indexing a , convert it to unix timestamp as + long and + index this as a numeric value with + and use to query it. +

+
+ + Converts a Date to a string suitable for indexing. + + + the date to be converted + + the desired resolution, see + + + a string in format yyyyMMddHHmmssSSS or shorter, + depending on resolution; using GMT as timezone + + + + Converts a millisecond time to a string suitable for indexing. + + + the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT + + the desired resolution, see + + + a string in format yyyyMMddHHmmssSSS or shorter, + depending on resolution; using GMT as timezone + + + + Converts a string produced by timeToString or + DateToString back to a time, represented as the + number of milliseconds since January 1, 1970, 00:00:00 GMT. + + + the date string to be converted + + the number of milliseconds since January 1, 1970, 00:00:00 GMT + + ParseException if dateString is not in the + expected format + + + + Converts a string produced by timeToString or + DateToString back to a time, represented as a + Date object. + + + the date string to be converted + + the parsed time as a Date object + + ParseException if dateString is not in the + expected format + + + + Limit a date's resolution. For example, the date 2004-09-21 13:50:11 + will be changed to 2004-09-01 00:00:00 when using + Resolution.MONTH. + + + + The desired resolution of the date to be returned + + the date with all values more precise than resolution + set to 0 or 1 + + + + Limit a date's resolution. For example, the date 1095767411000 + (which represents 2004-09-21 13:50:11) will be changed to + 1093989600000 (2004-09-01 00:00:00) when using + Resolution.MONTH. + + + The time in milliseconds (not ticks). + The desired resolution of the date to be returned + + the date with all values more precise than resolution + set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT + + + + Specifies the time granularity. + + + Documents are the unit of indexing and search. + + A Document is a set of fields. Each field has a name and a textual value. + A field may be stored with the document, in which + case it is returned with search hits on the document. Thus each document + should typically contain one or more stored fields which uniquely identify + it. + +

Note that fields which are not stored are + not available in documents retrieved from the index, e.g. with , + or . +

+
+ + Constructs a new document with no fields. + + +

Adds a field to a document. Several fields may be added with + the same name. In this case, if the fields are indexed, their text is + treated as though appended for the purposes of search.

+

Note that add like the removeField(s) methods only makes sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ +

Removes field with the specified name from the document. + If multiple fields exist with this name, this method removes the first field that has been added. + If there is no field with the specified name, the document remains unchanged.

+

Note that the removeField(s) methods like the add method only make sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ +

Removes all fields with the given name from the document. + If there is no field with the specified name, the document remains unchanged.

+

Note that the removeField(s) methods like the add method only make sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ + Returns a field with the given name if any exist in this document, or + null. If multiple fields exists with this name, this method returns the + first value added. + Do not use this method with lazy loaded fields. + + + + Returns a field with the given name if any exist in this document, or + null. If multiple fields exists with this name, this method returns the + first value added. + + + + Returns the string value of the field with the given name if any exist in + this document, or null. If multiple fields exist with this name, this + method returns the first value added. If only binary fields with this name + exist, returns null. + + + + Returns a List of all the fields in a document. +

Note that fields which are not stored are + not available in documents retrieved from the + index, e.g. or . +

+
+ + Returns an array of s with the given name. + Do not use with lazy loaded fields. + This method returns an empty array when there are no + matching fields. It never returns null. + + + the name of the field + + a Field[] array + + + + Returns an array of s with the given name. + This method returns an empty array when there are no + matching fields. It never returns null. + + + the name of the field + + a Fieldable[] array + + + + Returns an array of values of the field specified as the method parameter. + This method returns an empty array when there are no + matching fields. It never returns null. + + the name of the field + + a String[] of field values + + + + Returns an array of byte arrays for of the fields that have the name specified + as the method parameter. This method returns an empty + array when there are no matching fields. It never + returns null. + + + the name of the field + + a byte[][] of binary field values + + + + Returns an array of bytes for the first (or only) field that has the name + specified as the method parameter. This method will return null + if no binary fields with the specified name are available. + There may be non-binary fields with the same name. + + + the name of the field. + + a byte[] containing the binary field value or null + + + + Prints the fields of a document for human consumption. + + + Gets or sets, at indexing time, the boost factor. + + The default is 1.0 + +

Note that once a document is indexed this value is no longer available + from the index. At search time, for retrieved documents, this method always + returns 1. This however does not mean that the boost value set at indexing + time was ignored - it was just combined with other indexing time factors and + stored elsewhere, for better indexing and search performance. (For more + information see the "norm(t,d)" part of the scoring formula in + Similarity.) +

+
+ + A field is a section of a Document. Each field has two parts, a name and a + value. Values may be free text, provided as a String or as a Reader, or they + may be atomic keywords, which are not further processed. Such keywords may + be used to represent dates, urls, etc. Fields are optionally stored in the + index, so that they may be returned with hits on the document. + + + +

Expert: change the value of this field. This can + be used during indexing to re-use a single Field + instance to improve indexing speed by avoiding GC cost + of new'ing and reclaiming Field instances. Typically + a single instance is re-used as + well. This helps most on small documents.

+ +

Each Field instance should only be used once + within a single instance. See ImproveIndexingSpeed + for details.

+

+
+ + Expert: change the value of this field. See setValue(String). + + + Expert: change the value of this field. See setValue(String). + + + Expert: change the value of this field. See setValue(String). + + + Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. + May be combined with stored values from stringValue() or GetBinaryValue() + + + + Create a field by specifying its name, value and how it will + be saved in the index. Term vectors will not be stored in the index. + + + The name of the field + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + NullPointerException if name or value is null + IllegalArgumentException if the field is neither stored nor indexed + + + Create a field by specifying its name, value and how it will + be saved in the index. + + + The name of the field + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + Whether term vector should be stored + + NullPointerException if name or value is null + IllegalArgumentException in any of the following situations: + + the field is neither stored nor indexed + the field is not indexed but termVector is TermVector.YES + + + + + Create a field by specifying its name, value and how it will + be saved in the index. + + + The name of the field + + Whether to .intern() name or not + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + Whether term vector should be stored + + NullPointerException if name or value is null + IllegalArgumentException in any of the following situations: + + the field is neither stored nor indexed + the field is not indexed but termVector is TermVector.YES + + + + + Create a tokenized and indexed field that is not stored. Term vectors will + not be stored. The Reader is read only when the Document is added to the index, + i.e. you may not close the Reader until + has been called. + + + The name of the field + + The reader with the content + + NullPointerException if name or reader is null + + + Create a tokenized and indexed field that is not stored, optionally with + storing term vectors. The Reader is read only when the Document is added to the index, + i.e. you may not close the Reader until + has been called. + + + The name of the field + + The reader with the content + + Whether term vector should be stored + + NullPointerException if name or reader is null + + + Create a tokenized and indexed field that is not stored. Term vectors will + not be stored. This is useful for pre-analyzed fields. + The TokenStream is read only when the Document is added to the index, + i.e. you may not close the TokenStream until + has been called. + + + The name of the field + + The TokenStream with the content + + NullPointerException if name or tokenStream is null + + + Create a tokenized and indexed field that is not stored, optionally with + storing term vectors. This is useful for pre-analyzed fields. + The TokenStream is read only when the Document is added to the index, + i.e. you may not close the TokenStream until + has been called. + + + The name of the field + + The TokenStream with the content + + Whether term vector should be stored + + NullPointerException if name or tokenStream is null + + + Create a stored field with binary value. Optionally the value may be compressed. + + + The name of the field + + The binary value + + How value should be stored (compressed or not) + + IllegalArgumentException if store is Store.NO + + + Create a stored field with binary value. Optionally the value may be compressed. + + + The name of the field + + The binary value + + Starting offset in value where this Field's bytes are + + Number of bytes to use for this Field, starting at offset + + How value should be stored (compressed or not) + + IllegalArgumentException if store is Store.NO + + + The value of the field as a String, or null. If null, the Reader value or + binary value is used. Exactly one of stringValue(), + readerValue(), and getBinaryValue() must be set. + + + + The value of the field as a Reader, or null. If null, the String value or + binary value is used. Exactly one of stringValue(), + readerValue(), and getBinaryValue() must be set. + + + + The TokesStream for this field to be used when indexing, or null. If null, the Reader value + or String value is analyzed to produce the indexed tokens. + + + + Specifies whether and how a field should be stored. + + + Store the original field value in the index. This is useful for short texts + like a document's title which should be displayed with the results. The + value is stored in its original form, i.e. no analyzer is used before it is + stored. + + + + Do not store the field value in the index. + + + Specifies whether and how a field should be indexed. + + + Do not index the field value. This field can thus not be searched, + but one can still access its contents provided it is + stored. + + + + Index the tokens produced by running the field's + value through an Analyzer. This is useful for + common text. + + + + Index the field's value without using an Analyzer, so it can be searched. + As no analyzer is used the value will be stored as a single term. This is + useful for unique Ids like product numbers. + + + + Expert: Index the field's value without an Analyzer, + and also disable the storing of norms. Note that you + can also separately enable/disable norms by setting + . No norms means that + index-time field and document boosting and field + length normalization are disabled. The benefit is + less memory usage as norms take up one byte of RAM + per indexed field for every document in the index, + during searching. Note that once you index a given + field with norms enabled, disabling norms will + have no effect. In other words, for this to have the + above described effect on a field, all instances of + that field must be indexed with NOT_ANALYZED_NO_NORMS + from the beginning. + + + + Expert: Index the tokens produced by running the + field's value through an Analyzer, and also + separately disable the storing of norms. See + for what norms are + and why you may want to disable them. + + + + Specifies whether and how a field should have term vectors. + + + Do not store term vectors. + + + Store the term vectors of each document. A term vector is a list + of the document's terms and their number of occurrences in that document. + + + + Store the term vector + token position information + + + + + + + Store the term vector + Token offset information + + + + + + + Store the term vector + Token position and offset information + + + + + + + + + + + + Get the best representation of a TermVector given the flags. + + + + Similar to a + java.io.FileFilter, the FieldSelector allows one to make decisions about + what Fields get loaded on a by + + + + + the field to accept or reject + + an instance of + if the named fieldName should be loaded. + + + + Provides information about what should be done with this Field + + + + + + + + Load this every time the is loaded, reading in the data as it is encountered. + and should not return null. +

+ should be called by the Reader. +

+
+ + Lazily load this . This means the is valid, but it may not actually contain its data until + invoked. SHOULD NOT BE USED. is safe to use and should + return a valid instance of a . +

+ should be called by the Reader. +

+
+ + Do not load the . and should return null. + is not called. +

+ should not be called by the Reader. +

+
+ + Load this field as in the case, but immediately return from loading for the . Thus, the + Document may not have its complete set of Fields. and should + both be valid for this +

+ should be called by the Reader. +

+
+ + Expert: Load the size of this rather than its value. + Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value. + The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0] + + + + Expert: Like but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded + + + Load the First field and break. +

+ See +

+
+ + A based on a Map of field names to s + + + Create a a MapFieldSelector + maps from field names (String) to s + + + + Create a a MapFieldSelector + fields to LOAD. List of Strings. All other fields are NO_LOAD. + + + + Create a a MapFieldSelector + fields to LOAD. All other fields are NO_LOAD. + + + + Load field according to its associated value in fieldSelections + a field name + + the fieldSelections value that field maps to or NO_LOAD if none. + + + + Provides support for converting longs to Strings, and back again. The strings + are structured so that lexicographic sorting order is preserved. + +

+ That is, if l1 is less than l2 for any two longs l1 and l2, then + NumberTools.longToString(l1) is lexicographically less than + NumberTools.longToString(l2). (Similarly for "greater than" and "equals".) + +

+ This class handles all long values (unlike + ). + +

+ For new indexes use instead, which + provides a sortable binary representation (prefix encoded) of numeric + values. + To index and efficiently query numeric values use + and . + This class is included for use with existing + indices and will be removed in a future release (possibly Lucene 4.0). + +
+ + Equivalent to longToString(Long.MIN_VALUE) + + + Equivalent to longToString(Long.MAX_VALUE) + + + The length of (all) strings returned by + + + Converts a long to a String suitable for indexing. + + + Converts a String that was returned by back to a + long. + + + IllegalArgumentException + if the input is null + + NumberFormatException + if the input does not parse (it was not a String returned by + longToString()). + + + +

This class provides a that enables indexing + of numeric values for efficient range filtering and + sorting. Here's an example usage, adding an int value: + + document.add(new NumericField(name).setIntValue(value)); + + + For optimal performance, re-use the + NumericField and instance for more than + one document: + + + NumericField field = new NumericField(name); + Document document = new Document(); + document.add(field); + + for(all documents) { + ... + field.setIntValue(value) + writer.addDocument(document); + ... + } + + +

The .Net native types int, long, + float and double are + directly supported. However, any value that can be + converted into these native types can also be indexed. + For example, date/time values represented by a + can be translated into a long + value using the java.util.Date.getTime method. If you + don't need millisecond precision, you can quantize the + value, either by dividing the result of + java.util.Date.getTime or using the separate getters + (for year, month, etc.) to construct an int or + long value.

+ +

To perform range querying or filtering against a + NumericField, use or + . To sort according to a + NumericField, use the normal numeric sort types, eg + NumericField values + can also be loaded directly from .

+ +

By default, a NumericField's value is not stored but + is indexed for range filtering and sorting. You can use + the + constructor if you need to change these defaults.

+ +

You may add the same field name as a NumericField to + the same document more than once. Range querying and + filtering will be the logical OR of all values; so a range query + will hit all documents that have at least one value in + the range. However sort behavior is not defined. If you need to sort, + you should separately index a single-valued NumericField.

+ +

A NumericField will consume somewhat more disk space + in the index than an ordinary single-valued field. + However, for a typical index that includes substantial + textual content per document, this increase will likely + be in the noise.

+ +

Within Lucene, each numeric value is indexed as a + trie structure, where each term is logically + assigned to larger and larger pre-defined brackets (which + are simply lower-precision representations of the value). + The step size between each successive bracket is called the + precisionStep, measured in bits. Smaller + precisionStep values result in larger number + of brackets, which consumes more disk space in the index + but may result in faster range search performance. The + default value, 4, was selected for a reasonable tradeoff + of disk space consumption versus performance. You can + use the expert constructor + if you'd + like to change the value. Note that you must also + specify a congruent value when creating + or . + For low cardinality fields larger precision steps are good. + If the cardinality is < 100, it is fair + to use , which produces one + term per value. + +

For more information on the internals of numeric trie + indexing, including the precisionStep + configuration, see . The format of + indexed values is described in . + +

If you only need to sort by numeric value, and never + run range querying/filtering, you can index using a + precisionStep of . + This will minimize disk space consumed.

+ +

More advanced users can instead use + directly, when indexing numbers. This + class is a wrapper around this token stream type for + easier, more intuitive usage.

+ +

NOTE: This class is only used during + indexing. When retrieving the stored field value from a + instance after search, you will get a + conventional instance where the numeric + values are returned as s (according to + toString(value) of the used data type). + +

NOTE: This API is + experimental and might change in incompatible ways in the + next release. + +

+ 2.9 + +
+ + Creates a field for numeric values using the default precisionStep + (4). The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + This constructor creates an indexed, but not stored field. + + the field name + + + + Creates a field for numeric values using the default precisionStep + (4). The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + + the field name + + if the field should be stored in plain text form + (according to toString(value) of the used data type) + + if the field should be indexed using + + + + Creates a field for numeric values with the specified + precisionStep. The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + This constructor creates an indexed, but not stored field. + + the field name + + the used precision step + + + + Creates a field for numeric values with the specified + precisionStep. The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + + the field name + + the used precision step + + if the field should be stored in plain text form + (according to toString(value) of the used data type) + + if the field should be indexed using + + + + Returns always null for numeric fields + + + Initializes the field with the supplied long value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).SetLongValue(value)) + + + + Initializes the field with the supplied int value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setIntValue(value)) + + + + Initializes the field with the supplied double value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setDoubleValue(value)) + + + + Initializes the field with the supplied float value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setFloatValue(value)) + + + + Returns a for indexing the numeric value. + + + Returns always null for numeric fields + + + Returns the numeric value as a string (how it is stored, when is chosen). + + + Returns the current numeric value as a subclass of , null if not yet initialized. + + + Declare what fields to load normally and what fields to load lazily + + + + + + Pass in the Set of names to load and the Set of names to load lazily. If both are null, the + Document will not have any on it. + + A Set of field names to load. May be empty, but not null + + A Set of field names to load lazily. May be empty, but not null + + + + Indicate whether to load the field with the given name or not. If the is not in either of the + initializing Sets, then is returned. If a Field name + is in both fieldsToLoad and lazyFieldsToLoad, lazy has precedence. + + + The name to check + + The + + + + + Base class for enumerating all but deleted docs. + +

NOTE: this class is meant only to be used internally + by Lucene; it's only public so it can be shared across + packages. This means the API is freely subject to + change, and, the class could be removed entirely, in any + Lucene release. Use directly at your own risk! */ +

+
+ + TermDocs provides an interface for enumerating <document, frequency> + pairs for a term.

The document portion names each document containing + the term. Documents are indicated by number. The frequency portion gives + the number of times the term occurred in each document.

The pairs are + ordered by document number. +

+ +
+ + Sets this to the data for a term. + The enumeration is reset to the start of the data for this term. + + + + Sets this to the data for the current term in a . + This may be optimized in some implementations. + + + + Moves to the next pair in the enumeration.

Returns true iff there is + such a next pair in the enumeration. +

+
+ + Attempts to read multiple entries from the enumeration, up to length of + docs. Document numbers are stored in docs, and term + frequencies are stored in freqs. The freqs array must be as + long as the docs array. + +

Returns the number of entries read. Zero is only returned when the + stream has been exhausted. +

+
+ + Skips entries to the first beyond the current whose document number is + greater than or equal to target.

Returns true iff there is such + an entry.

Behaves as if written: + boolean skipTo(int target) { + do { + if (!next()) + return false; + } while (target > doc()); + return true; + } + + Some implementations are considerably more efficient than that. +

+
+ + Frees associated resources. + + + Returns the current document number.

This is invalid until + is called for the first time. +

+
+ + Returns the frequency of the term within the current document.

This + is invalid until is called for the first time. +

+
+ + Holds buffered deletes, by docID, term or query. We + hold two instances of this class: one for the deletes + prior to the last flush, the other for deletes after + the last flush. This is so if we need to abort + (discard all buffered docs) we can also discard the + buffered deletes yet keep the deletes done during + previously flushed segments. + + + + Abstract base class for input from a file in a . A + random-access input stream. Used for all Lucene index input operations. + + + + + + Reads and returns a single byte. + + + + + Reads a specified number of bytes into an array at the specified offset. + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + + + Reads a specified number of bytes into an array at the + specified offset with control over whether the read + should be buffered (callers who have their own buffer + should pass in "false" for useBuffer). Currently only + respects this parameter. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + set to false if the caller will handle + buffering. + + + + + + Reads four bytes and returns an int. + + + + + Reads an int stored in variable-length format. Reads between one and + five bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Reads eight bytes and returns a long. + + + + + Reads a long stored in variable-length format. Reads between one and + nine bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + Call this if readString should read characters stored + in the old modified UTF8 format (length in java chars + and java's modified UTF8 encoding). This is used for + indices written pre-2.4 See LUCENE-510 for details. + + + + Reads a string. + + + + + Reads Lucene's old "modified UTF-8" encoded + characters into an array. + + the array to read characters into + + the offset in the array to start storing characters + + the number of characters to read + + + + -- please use readString or readBytes + instead, and construct the string + from those utf8 bytes + + + + Expert + + Similar to but does not do any conversion operations on the bytes it is reading in. It still + has to invoke just as does, but it does not need a buffer to store anything + and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine + how many more bytes to read + + The number of chars to read + + this method operates on old "modified utf8" encoded + strings + + + + Closes the stream to futher operations. + + + Sets current position in this file, where the next read will occur. + + + + + The number of bytes in the file. + + + Returns a clone of this stream. + +

Clones of a stream access the same data, and are positioned at the same + point as the stream they were cloned from. + +

Expert: Subclasses must ensure that clones may be positioned at + different points in the input from each other and from the stream they + were cloned from. +

+
+ + Returns the current position in this file, where the next read will + occur. + + + + + + Class to write byte streams into slices of shared + byte[]. This is used by DocumentsWriter to hold the + posting list for many terms in RAM. + + + + Set up the writer to write at address. + + + Write byte into byte slice stream + + + Basic tool and API to check the health of an index and + write a new segments file that removes reference to + problematic segments. + +

As this tool checks every byte in the index, on a large + index it can take quite a long time to run. + +

WARNING: this tool and API is new and + experimental and is subject to suddenly change in the + next release. Please make a complete backup of your + index before using this to fix your index! +

+
+ + Create a new CheckIndex on the directory. + + + Set infoStream where messages should go. If null, no + messages are printed + + + + Returns a instance detailing + the state of the index. + +

As this method checks every byte in the index, on a large + index it can take quite a long time to run. + +

WARNING: make sure + you only call this when the index is not opened by any + writer. +

+
+ + Returns a instance detailing + the state of the index. + + + list of specific segment names to check + +

As this method checks every byte in the specified + segments, on a large index it can take quite a long + time to run. + +

WARNING: make sure + you only call this when the index is not opened by any + writer. + + + +

Test field norms. +
+ + Test the term index. + + + Test stored fields for a segment. + + + Test term vectors for a segment. + + + Repairs the index using previously returned result + from . Note that this does not + remove any of the unreferenced files after it's done; + you must separately open an , which + deletes unreferenced files when it's created. + +

WARNING: this writes a + new segments file into the index, effectively removing + all documents in broken segments from the index. + BE CAREFUL. + +

WARNING: Make sure you only call this when the + index is not opened by any writer. +

+
+ + Command-line interface to check and fix an index. +

+ Run it like this: + + java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] + + + -fix: actually write a new segments_N file, removing any problematic segments + -segment X: only check the specified + segment(s). This can be specified multiple times, + to check more than one segment, eg -segment _2 + -segment _a. You can't use this with the -fix + option. + +

WARNING: -fix should only be used on an emergency basis as it will cause + documents (perhaps many) to be permanently removed from the index. Always make + a backup copy of your index before running this! Do not run this tool on an index + that is actively being written to. You have been warned! +

Run without -fix, this tool will open the index, report version information + and report any exceptions it hits and what action it would take if -fix were + specified. With -fix, this tool will remove any segments that have issues and + write a new segments_N file. This means all documents contained in the affected + segments will be removed. +

+ This tool exits with exit code 1 if the index cannot be opened or has any + corruption, else 0. +

+
+ + Returned from detailing the health and status of the index. + +

WARNING: this API is new and experimental and is + subject to suddenly change in the next release. + +

+
+ + True if no problems were found with the index. + + + True if we were unable to locate and load the segments_N file. + + + True if we were unable to open the segments_N file. + + + True if we were unable to read the version number from segments_N file. + + + Name of latest segments_N file in the index. + + + Number of segments in the index. + + + String description of the version of the index. + + + Empty unless you passed specific segments list to check as optional 3rd argument. + + CheckIndex.CheckIndex_Renamed_Method(System.Collections.IList) + + + + True if the index was created with a newer version of Lucene than the CheckIndex tool. + + + List of instances, detailing status of each segment. + + + Directory index is in. + + + SegmentInfos instance containing only segments that + had no problems (this is used with the + method to repair the index. + + + + How many documents will be lost to bad segments. + + + How many bad segments were found. + + + True if we checked only specific segments () + was called with non-null + argument). + + + + Holds the userData of the last commit in the index + + + Holds the status of each segment in the index. + See . + +

WARNING: this API is new and experimental and is + subject to suddenly change in the next release. +

+
+ + Name of the segment. + + + Document count (does not take deletions into account). + + + True if segment is compound file format. + + + Number of files referenced by this segment. + + + Net size (MB) of the files referenced by this + segment. + + + + Doc store offset, if this segment shares the doc + store files (stored fields and term vectors) with + other segments. This is -1 if it does not share. + + + + String of the shared doc store segment, or null if + this segment does not share the doc store files. + + + + True if the shared doc store files are compound file + format. + + + + True if this segment has pending deletions. + + + Name of the current deletions file name. + + + Number of deleted documents. + + + True if we were able to open a SegmentReader on this + segment. + + + + Number of fields in this segment. + + + True if at least one of the fields in this segment + does not omitTermFreqAndPositions. + + + + + + Map<String, String> that includes certain + debugging details that IndexWriter records into + each segment it creates + + + + Status for testing of field norms (null if field norms could not be tested). + + + Status for testing of indexed terms (null if indexed terms could not be tested). + + + Status for testing of stored fields (null if stored fields could not be tested). + + + Status for testing of term vectors (null if term vectors could not be tested). + + + Status from testing field norms. + + + Number of fields successfully tested + + + Exception thrown during term index test (null on success) + + + Status from testing term index. + + + Total term count + + + Total frequency across all terms. + + + Total number of positions. + + + Exception thrown during term index test (null on success) + + + Status from testing stored fields. + + + Number of documents tested. + + + Total number of stored fields tested. + + + Exception thrown during stored fields test (null on success) + + + Status from testing stored fields. + + + Number of documents tested. + + + Total number of term vectors tested. + + + Exception thrown during term vector test (null on success) + + + Optimized implementation. + + + Overridden by SegmentTermPositions to skip in prox stream. + + + Optimized implementation. + + + Class for accessing a compound stream. + This class implements a directory, but is limited to only read operations. + Directory methods that would normally modify data throw an exception. + + + + A Directory is a flat list of files. Files may be written once, when they + are created. Once a file is created it may only be opened for read, or + deleted. Random access is permitted both when reading and writing. + +

Java's i/o APIs not used directly, but rather all i/o is + through this API. This permits things such as: + implementation of RAM-based indices; + implementation indices stored in a database, via JDBC; + implementation of an index as a single file; + + + Directory locking is implemented by an instance of + , and can be changed for each Directory + instance using . + +

+
+ + Holds the LockFactory instance (implements locking for + this Directory instance). + + + + Returns an array of strings, one for each file in the directory. + + + + Returns true iff a file with the given name exists. + + + Returns the time the named file was last modified. + + + Set the modified time of an existing file to now. + + + Removes an existing file in the directory. + + + Returns the length of a file in the directory. + + + Creates a new, empty file in the directory with the given name. + Returns a stream writing this file. + + + + Ensure that any writes to this file are moved to + stable storage. Lucene uses this to properly commit + changes to the index, to prevent a machine/OS crash + from corrupting the index. + + + + Returns a stream reading an existing file. + + + Returns a stream reading an existing file, with the + specified read buffer size. The particular Directory + implementation may ignore the buffer size. Currently + the only Directory implementations that respect this + parameter are and + . + + + + Construct a . + the name of the lock file + + + + Attempt to clear (forcefully unlock and remove) the + specified lock. Only call this at a time when you are + certain this lock is no longer in use. + + name of the lock to be cleared. + + + + Closes the store. + + + Set the LockFactory that this Directory instance should + use for its locking implementation. Each * instance of + LockFactory should only be used for one directory (ie, + do not share a single instance across multiple + Directories). + + + instance of . + + + + Return a string identifier that uniquely differentiates + this Directory instance from other Directory instances. + This ID should be the same if two Directory instances + (even in different JVMs and/or on different machines) + are considered "the same index". This is how locking + "scopes" to the right index. + + + + Copy contents of a directory src to a directory dest. + If a file in src already exists in dest then the + one in dest will be blindly overwritten. + +

NOTE: the source directory cannot change + while this method is running. Otherwise the results + are undefined and you could easily hit a + FileNotFoundException. + +

NOTE: this method only copies files that look + like index files (ie, have extensions matching the + known extensions of index files). + +

+ source directory + + destination directory + + if true, call method on source directory + + IOException +
+ + AlreadyClosedException if this Directory is closed + + + Get the LockFactory that this Directory instance is + using for its locking implementation. Note that this + may be null for Directory implementations that provide + their own locking implementation. + + + + Returns an array of strings, one for each file in the directory. + + + Returns true iff a file with the given name exists. + + + Returns the time the compound file was last modified. + + + Set the modified time of the compound file to now. + + + Not implemented + UnsupportedOperationException + + + Not implemented + UnsupportedOperationException + + + Returns the length of a file in the directory. + IOException if the file does not exist + + + Not implemented + UnsupportedOperationException + + + Not implemented + UnsupportedOperationException + + + Implementation of an IndexInput that reads from a portion of the + compound file. The visibility is left as "package" *only* because + this helps with testing since JUnit test cases in a different class + can then access package fields of this class. + + + + Base implementation class for buffered . + + + Default buffer size + + + Inits BufferedIndexInput with a specific bufferSize + + + Change the buffer size used by this IndexInput + + + Expert: implements buffer refill. Reads bytes from the current position + in the input. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + Expert: implements seek. Sets current position in this file, where the + next will occur. + + + + + + + + + + Expert: implements buffer refill. Reads bytes from the current + position in the input. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + Expert: implements seek. Sets current position in this file, where + the next will occur. + + + + + + Combines multiple files into a single compound file. + The file format:
+ + VInt fileCount + {Directory} + fileCount entries with the following structure: + + long dataOffset + String fileName + + {File Data} + fileCount entries with the raw data of the corresponding file + + + The fileCount integer indicates how many files are contained in this compound + file. The {directory} that follows has that many entries. Each directory entry + contains a long pointer to the start of this file's data section, and a String + with that file's name. +
+
+ + Create the compound stream in the specified file. The file name is the + entire name (no extensions are added). + + NullPointerException if dir or name is null + + + Add a source stream. file is the string by which the + sub-stream will be known in the compound stream. + + + IllegalStateException if this writer is closed + NullPointerException if file is null + IllegalArgumentException if a file with the same name + has been added already + + + + Merge files with the extensions added up to now. + All files with these extensions are combined sequentially into the + compound stream. After successful merge, the source files + are deleted. + + IllegalStateException if close() had been called before or + if no file has been added to this object + + + + Copy the contents of the file with specified extension into the + provided output stream. Use the provided buffer for moving data + to reduce memory allocation. + + + + Returns the directory of the compound file. + + + Returns the name of the compound file. + + + source file + + + temporary holder for the start of directory entry for this file + + + temporary holder for the start of this file's data section + + + A that runs each merge using a + separate thread, up until a maximum number of threads + () at which when a merge is + needed, the thread(s) that are updating the index will + pause until one or more merges completes. This is a + simple way to use concurrency in the indexing process + without having to create and manage application level + threads. + + + +

Expert: uses an instance + implementing this interface to execute the merges + selected by a . The default + MergeScheduler is .

+ +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+ +

NOTE: This class typically requires access to + package-private APIs (eg, SegmentInfos) to do its job; + if you implement your own MergePolicy, you'll need to put + it in package Lucene.Net.Index in order to use + these APIs. +

+
+ + Run the merges provided by . + + + Close this MergeScheduler. + + + Return the priority that merge threads run at. By + default the priority is 1 plus the priority of (ie, + slightly higher priority than) the first thread that + calls merge. + + + + Set the priority that merge threads run at. + + + Does the actual merge, by calling + + + Create and return a new MergeThread + + + Called when an exception is hit in a background merge + thread + + + + Used for testing + + + Used for testing + + + Used for testing + + + Used for testing + + + Used for testing + + + Gets or sets the max # simultaneous threads that may be + running. If a merge is necessary yet we already have + this many threads running, the incoming thread (that + is calling add/updateDocument) will block until + a merge thread has completed. + + + + + Support class used to handle threads + + + + + This interface should be implemented by any class whose instances are intended + to be executed by a thread. + + + + + This method has to be implemented in order that starting of the thread causes the object's + run method to be called in that separately executing thread. + + + + + The instance of System.Threading.Thread + + + + + Initializes a new instance of the ThreadClass class + + + + + Initializes a new instance of the Thread class. + + The name of the thread + + + + Initializes a new instance of the Thread class. + + A ThreadStart delegate that references the methods to be invoked when this thread begins executing + + + + Initializes a new instance of the Thread class. + + A ThreadStart delegate that references the methods to be invoked when this thread begins executing + The name of the thread + + + + This method has no functionality unless the method is overridden + + + + + Causes the operating system to change the state of the current thread instance to ThreadState.Running + + + + + Interrupts a thread that is in the WaitSleepJoin thread state + + + + + Blocks the calling thread until a thread terminates + + + + + Blocks the calling thread until a thread terminates or the specified time elapses + + Time of wait in milliseconds + + + + Blocks the calling thread until a thread terminates or the specified time elapses + + Time of wait in milliseconds + Time of wait in nanoseconds + + + + Resumes a thread that has been suspended + + + + + Raises a ThreadAbortException in the thread on which it is invoked, + to begin the process of terminating the thread. Calling this method + usually terminates the thread + + + + + Raises a ThreadAbortException in the thread on which it is invoked, + to begin the process of terminating the thread while also providing + exception information about the thread termination. + Calling this method usually terminates the thread. + + An object that contains application-specific information, such as state, which can be used by the thread being aborted + + + + Suspends the thread, if the thread is already suspended it has no effect + + + + + Obtain a String that represents the current object + + A String that represents the current object + + + + Gets the currently running thread + + The currently running thread + + + + Gets the current thread instance + + + + + Gets or sets the name of the thread + + + + + Gets or sets a value indicating the scheduling priority of a thread + + + + + Gets a value indicating the execution status of the current thread + + + + + Gets or sets a value indicating whether or not a thread is a background thread. + + + + This exception is thrown when Lucene detects + an inconsistency in the index. + + + + Implements the skip list reader for the default posting list format + that stores positions and payloads. + + + + + This abstract class reads skip lists with multiple levels. + + See for the information about the encoding + of the multi level skip lists. + + Subclasses must implement the abstract method + which defines the actual format of the skip data. + + + + Returns the id of the doc to which the last call of + has skipped. + + + + Skips entries to the first beyond the current whose document number is + greater than or equal to target. Returns the current doc count. + + + + Seeks the skip entry on the given level + + + initializes the reader + + + Loads the skip levels + + + Subclasses must implement the actual skip data encoding in this method. + + + the level skip data shall be read from + + the skip stream to read from + + + + Copies the values of the last read skip entry on this level + + + used to buffer the top skip levels + + + Returns the freq pointer of the doc to which the last call of + has skipped. + + + + Returns the prox pointer of the doc to which the last call of + has skipped. + + + + Returns the payload length of the payload stored just before + the doc to which the last call of + has skipped. + + + + Implements the skip list writer for the default posting list format + that stores positions and payloads. + + + + + This abstract class writes skip lists with multiple levels. + + Example for skipInterval = 3: + c (skip level 2) + c c c (skip level 1) + x x x x x x x x x x (skip level 0) + d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) + 3 6 9 12 15 18 21 24 27 30 (df) + + d - document + x - skip data + c - skip data with child pointer + + Skip level i contains every skipInterval-th entry from skip level i-1. + Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))). + + Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1. + This guarantess a logarithmic amount of skips to find the target document. + + While this class takes care of writing the different skip levels, + subclasses must define the actual format of the skip data. + + + + + Subclasses must implement the actual skip data encoding in this method. + + + the level skip data shall be writting for + + the skip buffer to write to + + + + Writes the current skip data to the buffers. The current document frequency determines + the max level is skip data is to be written to. + + + the current document frequency + + IOException + + + Writes the buffered skip lists to the given output. + + + the IndexOutput the skip lists shall be written to + + the pointer the skip list starts + + + + Sets the values for the current skip data. + + + An IndexReader which reads indexes with multiple segments. + + + IndexReader is an abstract class, providing an interface for accessing an + index. Search of an index is done entirely through this abstract interface, + so that any subclass which implements it is searchable. +

Concrete subclasses of IndexReader are usually constructed with a call to + one of the static open() methods, e.g. + . +

For efficiency, in this API documents are often referred to via + document numbers, non-negative integers which each name a unique + document in the index. These document numbers are ephemeral--they may change + as documents are added to and deleted from an index. Clients should thus not + rely on a given document having the same number between sessions. +

An IndexReader can be opened on a directory for which an IndexWriter is + opened already, but it cannot be used to delete documents from the index then. +

+ NOTE: for backwards API compatibility, several methods are not listed + as abstract, but have no useful implementations in this base class and + instead always throw UnsupportedOperationException. Subclasses are + strongly encouraged to override these methods, but in many cases may not + need to. +

+

+ NOTE: as of 2.4, it's possible to open a read-only + IndexReader using the static open methods that accepts the + boolean readOnly parameter. Such a reader has better + better concurrency as it's not necessary to synchronize on the + isDeleted method. You must explicitly specify false + if you want to make changes with the resulting IndexReader. +

+

NOTE: + instances are completely thread + safe, meaning multiple threads can call any of its methods, + concurrently. If your application requires external + synchronization, you should not synchronize on the + IndexReader instance; use your own + (non-Lucene) objects instead. +

+
+ + Expert: increments the refCount of this IndexReader + instance. RefCounts are used to determine when a + reader can be closed safely, i.e. as soon as there are + no more references. Be sure to always call a + corresponding , in a finally clause; + otherwise the reader may never be closed. Note that + simply calls decRef(), which means that + the IndexReader will not really be closed until + has been called for all outstanding + references. + + + + + + + Expert: decreases the refCount of this IndexReader + instance. If the refCount drops to 0, then pending + changes (if any) are committed to the index and this + reader is closed. + + + IOException in case an IOException occurs in commit() or doClose() + + + + + + + AlreadyClosedException if this IndexReader is closed + + + Returns an IndexReader reading the index in the given + Directory. You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the reader. + + the index directory + true if no changes (deletions, norms) will be made with this IndexReader + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns an IndexReader reading the index in the given + . You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the reader. + + the commit point to open + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, with a custom + . You should pass readOnly=true, + since it gives much better concurrent performance, + unless you intend to do write operations (delete + documents or change norms) with the reader. + + the index directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, with a custom + . You should pass readOnly=true, + since it gives much better concurrent performance, + unless you intend to do write operations (delete + documents or change norms) with the reader. + + the index directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + Subsamples which indexed + terms are loaded into RAM. This has the same effect as + IndexWriter.SetTermIndexInterval + except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, using a specific commit and with + a custom . You should pass + readOnly=true, since it gives much better concurrent + performance, unless you intend to do write operations + (delete documents or change norms) with the reader. + + the specific to open; + see to list all commits + in a directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, using a specific commit and with + a custom . You should pass + readOnly=true, since it gives much better concurrent + performance, unless you intend to do write operations + (delete documents or change norms) with the reader. + + the specific to open; + see to list all commits + in a directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + Subsambles which indexed + terms are loaded into RAM. This has the same effect as + IndexWriter.SetTermIndexInterval + except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Refreshes an IndexReader if the index has changed since this instance + was (re)opened. +

+ Opening an IndexReader is an expensive operation. This method can be used + to refresh an existing IndexReader to reduce these costs. This method + tries to only load segments that have changed or were created after the + IndexReader was (re)opened. +

+ If the index has not changed since this instance was (re)opened, then this + call is a NOOP and returns this instance. Otherwise, a new instance is + returned. The old instance is not closed and remains usable.
+

+ If the reader is reopened, even though they share + resources internally, it's safe to make changes + (deletions, norms) with the new reader. All shared + mutable state obeys "copy on write" semantics to ensure + the changes are not seen by other readers. +

+ You can determine whether a reader was actually reopened by comparing the + old instance with the instance returned by this method: + + IndexReader reader = ... + ... + IndexReader newReader = r.reopen(); + if (newReader != reader) { + ... // reader was reopened + reader.close(); + } + reader = newReader; + ... + + + Be sure to synchronize that code so that other threads, + if present, can never use reader after it has been + closed and before it's switched to newReader. + +

NOTE: If this reader is a near real-time + reader (obtained from , + reopen() will simply call writer.getReader() again for + you, though this may change in the future. + +

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error +
+ + Just like , except you can change the + readOnly of the original reader. If the index is + unchanged but readOnly is different then a new reader + will be returned. + + + + Expert: reopen this reader on a specific commit point. + This always returns a readOnly reader. If the + specified commit point matches what this reader is + already on, and this reader is already readOnly, then + this same instance is returned; if it is not already + readOnly, a readOnly clone is returned. + + + + Efficiently clones the IndexReader (sharing most + internal state). +

+ On cloning a reader with pending changes (deletions, + norms), the original reader transfers its write lock to + the cloned reader. This means only the cloned reader + may make further changes to the index, and commit the + changes to the index on close, but the old reader still + reflects all changes made up until it was cloned. +

+ Like , it's safe to make changes to + either the original or the cloned reader: all shared + mutable state obeys "copy on write" semantics to ensure + the changes are not seen by other readers. +

+

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error +
+ + Clones the IndexReader and optionally changes readOnly. A readOnly + reader cannot open a writeable reader. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Returns the directory associated with this index. The Default + implementation returns the directory specified by subclasses when + delegating to the IndexReader(Directory) constructor, or throws an + UnsupportedOperationException if one was not specified. + + UnsupportedOperationException if no directory + + + Returns the time the index in the named directory was last modified. + Do not use this to check whether the reader is still up-to-date, use + instead. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Reads version number from segments files. The version number is + initialized with a timestamp and then increased by one for each change of + the index. + + + where the index resides. + + version number. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + Reads commitUserData, previously passed to + , + from current index segments file. This will return null if + + has never been called for this index. + + where the index resides. + + commit userData. + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + + + + + Check whether any new changes have occurred to the index since this + reader was opened. + +

+ If this reader is based on a Directory (ie, was created by calling + + Open(Store.Directory) + , or on a reader based on a Directory), then + this method checks if any further commits (see + have occurred in that directory). +

+ +

+ If instead this reader is a near real-time reader (ie, obtained by a call + to , or by calling on a near + real-time reader), then this method checks if either a new commmit has + occurred, or any new uncommitted changes have taken place via the writer. + Note that even if the writer has only performed merging, this method will + still return false. +

+ +

+ In any event, if this returns false, you should call to + get a new reader that sees the changes. +

+ +

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error + UnsupportedOperationException unless overridden in subclass +
+ + Checks is the index is optimized (if it has a single segment and + no deletions). Not implemented in the IndexReader base class. + + &lt;c&gt;true&lt;/c&gt; if the index is optimized; &lt;c&gt;false&lt;/c&gt; otherwise + UnsupportedOperationException unless overridden in subclass + + + Return an array of term frequency vectors for the specified document. + The array contains a vector for each vectorized field in the document. + Each vector contains terms and frequencies for all terms in a given vectorized field. + If no such fields existed, the method returns null. The term vectors that are + returned may either be of type + or of type if + positions or offsets have been stored. + + + document for which term frequency vectors are returned + + array of term frequency vectors. May be null if no term vectors have been + stored for the specified document. + + IOException if index cannot be accessed + + + + + Return a term frequency vector for the specified document and field. The + returned vector contains terms and frequencies for the terms in + the specified field of this document, if the field had the storeTermVector + flag set. If termvectors had been stored with positions or offsets, a + is returned. + + + document for which the term frequency vector is returned + + field for which the term frequency vector is returned. + + term frequency vector May be null if field does not exist in the specified + document or term vector was not stored. + + IOException if index cannot be accessed + + + + + Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of + the . + + The number of the document to load the vector for + + The name of the field to load + + The to process the vector. Must not be null + + IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. + + + + + Map all the term vectors for all fields in a Document + The number of the document to load the vector for + + The to process the vector. Must not be null + + IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. + + + Returns true if an index exists at the specified directory. + If the directory does not exist or if there is no index in it. + + the directory to check for an index + + true if an index exists; false otherwise + + IOException if there is a problem with accessing the index + + + Returns the number of documents in this index. + + + Returns the stored fields of the nth + Document in this index. +

+ NOTE: for performance reasons, this method does not check if the + requested document is deleted, and therefore asking for a deleted document + may yield unspecified results. Usually this is not required, however you + can call with the requested document ID to verify + the document is not deleted. + +

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error +
+ + Get the at the n + th position. The may be used to determine + what s to load and how they should + be loaded. NOTE: If this Reader (more specifically, the underlying + FieldsReader) is closed before the lazy + is loaded an exception may be + thrown. If you want the value of a lazy + to be available after closing you + must explicitly load it or fetch the Document again with a new loader. +

+ NOTE: for performance reasons, this method does not check if the + requested document is deleted, and therefore asking for a deleted document + may yield unspecified results. Usually this is not required, however you + can call with the requested document ID to verify + the document is not deleted. + +

+ Get the document at the nth position + + The to use to determine what + Fields should be loaded on the Document. May be null, in which case + all Fields will be loaded. + + The stored fields of the + at the nth position + + CorruptIndexException if the index is corrupt + If there is a low-level IO error + + + + + + + + +
+ + Returns true if document n has been deleted + + + Returns true if there are norms stored for this field. + + + + Returns the byte-encoded normalization factor for the named field of + every document. This is used by the search code to score documents. + + + + + + Reads the byte-encoded normalization factor for the named field of every + document. This is used by the search code to score documents. + + + + + Expert: Resets the normalization factor for the named field of the named + document. The norm represents the product of the field's boost + and its length normalization. Thus, to preserve the length normalization + values when resetting this, one should base the new value upon the old. + + NOTE: If this field does not store norms, then + this method call will silently do nothing. + + + + + If the index has changed since this reader was opened + + + If the index is corrupt + + + If another writer has this index open (write.lock could not be obtained) + + + If there is a low-level IO error + + + + Implements setNorm in subclass. + + + + Expert: Resets the normalization factor for the named field of the named document. + + + + + If the index has changed since this reader was opened + + + If the index is corrupt + + + If another writer has this index open (write.lock could not be obtained) + + + If there is a low-level IO error + + + + Returns an enumeration of all the terms in the index. The + enumeration is ordered by Term.compareTo(). Each term is greater + than all that precede it in the enumeration. Note that after + calling terms(), must be called + on the resulting enumeration before calling other methods such as + . + + + If there is a low-level IO error + + + + Returns an enumeration of all terms starting at a given term. If + the given term does not exist, the enumeration is positioned at the + first term greater than the supplied term. The enumeration is + ordered by Term.compareTo(). Each term is greater than all that + precede it in the enumeration. + + + If there is a low-level IO error + + + + Returns the number of documents containing the term t. + If there is a low-level IO error + + + Returns an enumeration of all the documents which contain + term. For each document, the document number, the frequency of + the term in that document is also provided, for use in + search scoring. If term is null, then all non-deleted + docs are returned with freq=1. + Thus, this method implements the mapping: +

+ Term    =>    <docNum, freq>* + +

The enumeration is ordered by document number. Each document number + is greater than all that precede it in the enumeration. +

+ If there is a low-level IO error +
+ + Returns an unpositioned enumerator. + If there is a low-level IO error + + + Returns an enumeration of all the documents which contain + term. For each document, in addition to the document number + and frequency of the term in that document, a list of all of the ordinal + positions of the term in the document is available. Thus, this method + implements the mapping: + +

+ Term    =>    <docNum, freq, + <pos1, pos2, ... + posfreq-1> + >* + +

This positional information facilitates phrase and proximity searching. +

The enumeration is ordered by document number. Each document number is + greater than all that precede it in the enumeration. +

+ If there is a low-level IO error +
+ + Returns an unpositioned enumerator. + If there is a low-level IO error + + + + Deletes the document numbered docNum. Once a document is + deleted it will not appear in TermDocs or TermPostitions enumerations. + Attempts to read its field with the + method will result in an error. The presence of this document may still be + reflected in the statistic, though + this will be corrected eventually as the index is further modified. + + + If the index has changed since this reader was opened + + If the index is corrupt + + If another writer has this index open (write.lock could not be obtained) + + If there is a low-level IO error + + + Implements deletion of the document numbered docNum. + Applications should call or . + + + + + Deletes all documents that have a given term indexed. + This is useful if one uses a document field to hold a unique ID string for + the document. Then to delete such a document, one merely constructs a + term with the appropriate field and the unique ID string as its text and + passes it to this method. + See for information about when this deletion will + become effective. + + The number of documents deleted + + If the index has changed since this reader was opened + + If the index is corrupt + + If another writer has this index open (write.lock could not be obtained) + + If there is a low-level IO error + + + Undeletes all documents currently marked as deleted in this index. + + + + If the index has changed since this reader was opened + + If the index is corrupt + + If another writer has this index open (write.lock could not be obtained) + + If there is a low-level IO error + + + Implements actual undeleteAll() in subclass. + + + + Does nothing by default. Subclasses that require a write lock for + index modifications must implement this method. + + + + + + + + Opaque Map (String -> String) + that's recorded into the segments file in the index, + and retrievable by + + + + + Commit changes resulting from delete, undeleteAll, or + setNorm operations + + If an exception is hit, then either no changes or all + changes will have been committed to the index + (transactional semantics). + + If there is a low-level IO error + + + Commit changes resulting from delete, undeleteAll, or + setNorm operations + + If an exception is hit, then either no changes or all + changes will have been committed to the index + (transactional semantics). + + If there is a low-level IO error + + + Implements commit. + + + Closes files associated with this index. + Also saves any new deletions to disk. + No other methods should be called after this has been called. + + If there is a low-level IO error + + + Implements close. + + + Get a list of unique field names that exist in this index and have the specified + field option information. + + specifies which field option should be available for the returned fields + + Collection of Strings indicating the names of the fields. + + + + + + Prints the filename and size of each file within a given compound file. + Add the -extract flag to extract files to the current working directory. + In order to make the extracted version of the index work, you have to copy + the segments file from the compound index into the directory where the extracted files are stored. + + Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile> + + + + Returns all commit points that exist in the Directory. + Normally, because the default is + , there would be only + one commit point. But if you're using a custom + then there could be many commits. + Once you have a given commit, you can open a reader on + it by calling + There must be at least one commit in + the Directory, else this method throws . + Note that if a commit is in + progress while this method is running, that commit + may or may not be returned array. + + + + Expert: returns the sequential sub readers that this + reader is logically composed of. For example, + IndexSearcher uses this API to drive searching by one + sub reader at a time. If this reader is not composed + of sequential child readers, it should return null. + If this method returns an empty array, that means this + reader is a null reader (for example a MultiReader + that has no sub readers). +

+ NOTE: You should not try using sub-readers returned by + this method to make any changes (setNorm, deleteDocument, + etc.). While this might succeed for one composite reader + (like MultiReader), it will most likely lead to index + corruption for other readers (like DirectoryReader obtained + through . Use the parent reader directly. +

+
+ + Expert: returns the current refCount for this reader + + + Version number when this IndexReader was opened. Not implemented in the + IndexReader base class. + +

+ If this reader is based on a Directory (ie, was created by calling + , or + on a reader based on a Directory), then + this method returns the version recorded in the commit that the reader + opened. This version is advanced every time is + called. +

+ +

+ If instead this reader is a near real-time reader (ie, obtained by a call + to , or by calling on a near + real-time reader), then this method returns the version of the last + commit done by the writer. Note that even as further changes are made + with the writer, the version will not changed until a commit is + completed. Thus, you should not rely on this method to determine when a + near real-time reader should be opened. Use instead. +

+ +

+ UnsupportedOperationException + unless overridden in subclass + +
+ + Retrieve the String userData optionally passed to + . + This will return null if + + has never been called for this index. + + + + + + Returns one greater than the largest possible document number. + This may be used to, e.g., determine how big to allocate an array which + will have an element for every document number in an index. + + + + Returns the number of deleted documents. + + + Returns the stored fields of the nth + Document in this index. +

+ NOTE: for performance reasons, this method does not check if the + requested document is deleted, and therefore asking for a deleted document + may yield unspecified results. Usually this is not required, however you + can call with the requested document ID to verify + the document is not deleted. + +

+ CorruptIndexException if the index is corrupt + If there is a low-level IO error +
+ + Returns true if any documents have been deleted + + + Expert: return the IndexCommit that this reader has + opened. This method is only implemented by those + readers that correspond to a Directory with its own + segments_N file. + +

WARNING: this API is new and experimental and + may suddenly change.

+

+
+ + Expert + + + Returns the number of unique terms (across all fields) + in this reader. + + This method returns long, even though internally + Lucene cannot handle more than 2^31 unique terms, for + a possible future when this limitation is removed. + + + UnsupportedOperationException if this count + cannot be easily determined (eg Multi*Readers). + Instead, you should call + and ask each sub reader for + its unique term count. + + + + + For IndexReader implementations that use + TermInfosReader to read terms, this returns the + current indexDivisor as specified when the reader was + opened. + + + + Utility class for executing code that needs to do + something with the current segments file. This is + necessary with lock-less commits because from the time + you locate the current segments file name, until you + actually open it, read its contents, or check modified + time, etc., it could have been deleted due to a writer + commit finishing. + + + + A collection of segmentInfo objects with methods for operating on + those segments in relation to the file system. + +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + The file format version, a negative number. + + + This format adds details used for lockless commits. It differs + slightly from the previous format in that file names + are never re-used (write once). Instead, each file is + written to the next generation. For example, + segments_1, segments_2, etc. This allows us to not use + a commit lock. See file + formats for details. + + + + This format adds a "hasSingleNormFile" flag into each segment info. + See LUCENE-756 + for details. + + + + This format allows multiple segments to share a single + vectors and stored fields file. + + + + This format adds a checksum at the end of the file to + ensure all bytes were successfully written. + + + + This format adds the deletion count for each segment. + This way IndexWriter can efficiently report numDocs(). + + + + This format adds the boolean hasProx to record if any + fields in the segment store prox information (ie, have + omitTermFreqAndPositions==false) + + + + This format adds optional commit userData (String) storage. + + + This format adds optional per-segment String + dianostics storage, and switches userData to Map + + + + counts how often the index has been changed by adding or deleting docs. + starting with the current time in milliseconds forces to create unique version numbers. + + + + If non-null, information about loading segments_N files + + + + + Get the generation (N) of the current segments_N file + from a list of files. + + + -- array of file names to check + + + + Get the generation (N) of the current segments_N file + in the directory. + + + -- directory to search for the latest segments_N file + + + + Get the filename of the current segments_N file + from a list of files. + + + -- array of file names to check + + + + Get the filename of the current segments_N file + in the directory. + + + -- directory to search for the latest segments_N file + + + + Get the segments_N filename in use by this segment infos. + + + Parse the generation off the segments file name and + return it. + + + + Get the next segments_N filename that will be written. + + + Read a particular segmentFileName. Note that this may + throw an IOException if a commit is in process. + + + -- directory containing the segments file + + -- segment file to load + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + This version of read uses the retry logic (for lock-less + commits) to find the right segments file to load. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns a copy of this instance, also copying each + SegmentInfo. + + + + Current version number from segments file. + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns userData from latest segments file + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + If non-null, information about retries when loading + the segments file will be printed to this. + + + + Returns a new SegmentInfos containg the SegmentInfo + instances in the specified range first (inclusive) to + last (exclusive), so total number of segments returned + is last-first. + + + + Call this to start a commit. This writes the new + segments file, but writes an invalid checksum at the + end, so that it is not visible to readers. Once this + is called you must call to complete + the commit or to abort it. + + + + Returns all file names referenced by SegmentInfo + instances matching the provided Directory (ie files + associated with any "external" segments are skipped). + The returned collection is recomputed on each + invocation. + + + + Writes & syncs to the Directory dir, taking care to + remove the segments file on exception + + + + Replaces all segments in this instance, but keeps + generation, version, counter so that future commits + remain write once. + + + + + Simple brute force implementation. + If size is equal, compare items one by one. + + SegmentInfos object to check equality for + true if lists are equal, false otherwise + + + + Calculate hash code of SegmentInfos + + hash code as in java version of ArrayList + + + version number when this SegmentInfos was generated. + + + Advanced: Gets or sets how many times to try loading the + segments.gen file contents to determine current segment + generation. This file is only referenced when the + primary method (listing the directory) fails. + + + + Advanced: set how many times to try incrementing the + gen when loading the segments file. This only runs if + the primary (listing directory) and secondary (opening + segments.gen file) methods fail to find the segments + file. + + + + + + + + Utility class for executing code that needs to do + something with the current segments file. This is + necessary with lock-less commits because from the time + you locate the current segments file name, until you + actually open it, read its contents, or check modified + time, etc., it could have been deleted due to a writer + commit finishing. + + + + Subclass must implement this. The assumption is an + IOException will be thrown if something goes wrong + during the processing that could have been caused by + a writer committing. + + + + Constants describing field properties, for example used for + . + + + + All fields + + + All indexed fields + + + All fields that store payloads + + + All fields that omit tf + + + All fields which are not indexed + + + All fields which are indexed with termvectors enabled + + + All fields which are indexed but don't have termvectors enabled + + + All fields with termvectors enabled. Please note that only standard termvector fields are returned + + + All fields with termvectors with position values enabled + + + All fields with termvectors with offset values enabled + + + All fields with termvectors with offset values and position values enabled + + + Construct reading the named set of readers. + + + This constructor is only used for + + + Checks is the index is optimized (if it has a single segment and no deletions) + &lt;c&gt;true&lt;/c&gt; if the index is optimized; &lt;c&gt;false&lt;/c&gt; otherwise + + + Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory + owner. + + + StaleReaderException if the index has changed since this reader was opened + CorruptIndexException if the index is corrupt + Lucene.Net.Store.LockObtainFailedException + if another writer has this index open (write.lock could not be + obtained) + + IOException if there is a low-level IO error + + + Commit changes resulting from delete, undeleteAll, or setNorm operations +

+ If an exception is hit, then either no changes or all changes will have been committed to the index (transactional + semantics). + +

+ IOException if there is a low-level IO error +
+ + Returns the directory this index resides in. + + + + + + + Version number when this IndexReader was opened. + + + Expert: return the IndexCommit that this reader has opened. +

+

WARNING: this API is new and experimental and may suddenly change.

+

+
+ +

Expert: represents a single commit into an index as seen by the + or .

+ +

Changes to the content of an index are made visible + only after the writer who made that change commits by + writing a new segments file + (segments_N). This point in time, when the + action of writing of a new segments file to the directory + is completed, is an index commit.

+ +

Each index commit point has a unique segments file + associated with it. The segments file associated with a + later index commit point would have a larger N.

+ +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Delete this commit point. This only applies when using + the commit point in the context of IndexWriter's + IndexDeletionPolicy. +

+ Upon calling this, the writer is notified that this commit + point should be deleted. +

+ Decision that a commit-point should be deleted is taken by the in effect + and therefore this should only be called by its or + methods. +

+
+ + Two IndexCommits are equal if both their Directory and versions are equal. + + + Get the segments file (segments_N) associated + with this commit point. + + + + Returns all index files referenced by this commit point. + + + Returns the for the index. + + + Returns true if this commit is an optimized index. + + + Returns the version for this IndexCommit. This is the + same value that would + return if it were opened on this commit. + + + + Returns the generation (the _N in segments_N) for this + IndexCommit + + + + Convenience method that returns the last modified time + of the segments_N file corresponding to this index + commit, equivalent to + getDirectory().fileModified(getSegmentsFileName()). + + + + Returns userData, previously passed to + + for this commit. IDictionary is String -> String. + + + + Abstract class for enumerating terms. +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + Increments the enumeration to the next element. True if one exists. + + + Returns the docFreq of the current Term in the enumeration. + + + Closes the enumeration to further activity, freeing resources. + + + Closes the enumeration to further activity, freeing resources. + + + Returns the current Term in the enumeration. + + + Optimized implementation. + + + TermPositions provides an interface for enumerating the <document, + frequency, <position>* > tuples for a term.

The document and + frequency are the same as for a TermDocs. The positions portion lists the ordinal + positions of each occurrence of a term in a document. + +

+ + +
+ + Returns next position in the current document. It is an error to call + this more than times + without calling

This is + invalid until is called for + the first time. +

+
+ + Returns the payload data at the current term position. + This is invalid until is called for + the first time. + This method must not be called more than once after each call + of . However, payloads are loaded lazily, + so if the payload data for the current position is not needed, + this method may not be called at all for performance reasons.
+ +
+ the array into which the data of this payload is to be + stored, if it is big enough; otherwise, a new byte[] array + is allocated for this purpose. + + the offset in the array into which the data of this payload + is to be stored. + + a byte[] array containing the data of this payload + + IOException +
+ + Returns the length of the payload at the current term position. + This is invalid until is called for + the first time.
+
+ length of the current payload in number of bytes +
+ + Checks if a payload can be loaded at this position. +

+ Payloads can only be loaded once per call to + . + +

+ true if there is a payload available at this position that can be loaded +
+ + Process the document. If there is + something for this document to be done in docID order, + you should encapsulate that as a + DocumentsWriter.DocWriter and return it. + DocumentsWriter then calls finish() on this object + when it's its turn. + + + + Called when DocumentsWriter decides to create a new + segment + + + + Called when DocumentsWriter decides to close the doc + stores + + + + Called when an aborting exception is hit + + + Add a new thread + + + Called when DocumentsWriter is using too much RAM. + The consumer should free RAM, if possible, returning + true if any RAM was in fact freed. + + + + Processes all occurrences of a single field + + + This is just a "splitter" class: it lets you wrap two + DocFieldConsumer instances as a single consumer. + + + + Consumer returns this on each doc. This holds any + state that must be flushed synchronized "in docID + order". We gather these and flush them in order. + + + + This class accepts multiple added documents and directly + writes a single segment file. It does this more + efficiently than creating a single segment per document + (with DocumentWriter) and doing standard merges on those + segments. + + Each added document is passed to the , + which in turn processes the document and interacts with + other consumers in the indexing chain. Certain + consumers, like and + , digest a document and + immediately write bytes to the "doc store" files (ie, + they do not consume RAM per document, except while they + are processing the document). + + Other consumers, eg and + , buffer bytes in RAM and flush only + when a new segment is produced. + Once we have used our allowed RAM buffer, or the number + of added docs is large enough (in the case we are + flushing by doc count instead of RAM usage), we create a + real segment and flush it to the Directory. + + Threads: + + Multiple threads are allowed into addDocument at once. + There is an initial synchronized call to getThreadState + which allocates a ThreadState for this thread. The same + thread will get the same ThreadState over time (thread + affinity) so that if there are consistent patterns (for + example each thread is indexing a different content + source) then we make better use of RAM. Then + processDocument is called on that ThreadState without + synchronization (most of the "heavy lifting" is in this + call). Finally the synchronized "finishDocument" is + called to flush changes to the directory. + + When flush is called by IndexWriter we forcefully idle + all threads and flush only once they are all idle. This + means you can call flush with a given thread even while + other threads are actively adding/deleting documents. + + + Exceptions: + + Because this class directly updates in-memory posting + lists, and flushes stored fields and term vectors + directly to files in the directory, there are certain + limited times when an exception can corrupt this state. + For example, a disk full while flushing stored fields + leaves this file in a corrupt state. Or, an OOM + exception while appending to the in-memory posting lists + can corrupt that posting list. We call such exceptions + "aborting exceptions". In these cases we must call + abort() to discard all docs added since the last flush. + + All other exceptions ("non-aborting exceptions") can + still partially update the index structures. These + updates are consistent, but, they represent only a part + of the document seen up until the exception was hit. + When this happens, we immediately mark the document as + deleted so that the document is always atomically ("all + or none") added to the index. + + + + Returns true if any of the fields in the current + buffered docs have omitTermFreqAndPositions==false + + + + If non-null, various details of indexing are printed + here. + + + + Set how much RAM we can use before flushing. + + + Closes the current open doc stores an returns the doc + store segment name. This returns null if there are * + no buffered documents. + + + + Called if we hit an exception at a bad time (when + updating the index files) and must discard all + currently buffered docs. This resets our state, + discarding any docs added since last flush. + + + + Reset after a flush + + + Flush all pending docs to a new segment + + + Build compound file for the segment we just flushed + + + Set flushPending if it is not already set and returns + whether it was set. This is used by IndexWriter to + trigger a single flush even when multiple threads are + trying to do so. + + + + Returns a free (idle) ThreadState that may be used for + indexing this one document. This call also pauses if a + flush is pending. If delTerm is non-null then we + buffer this deleted term after the thread state has + been acquired. + + + + Returns true if the caller (IndexWriter) should now + flush. + + + + Called whenever a merge has completed and the merged segments had deletions + + + Does the synchronized work to finish/flush the + inverted document. + + + + Gets or sets max buffered docs, which means we will flush by + doc count instead of by RAM usage. + + + + Get current segment name we are writing. + + + Returns how many docs are currently buffered in RAM. + + + Returns the current doc store segment we are writing + to. + + + + Returns the doc offset into the shared doc store for + the current buffered docs. + + + + The IndexingChain must define the method + which returns the DocConsumer that the DocumentsWriter calls to process the + documents. + + + + Consumer returns this on each doc. This holds any + state that must be flushed synchronized "in docID + order". We gather these and flush them in order. + + + + Expert: allocate a new buffer. + Subclasses can allocate differently. + + size of allocated buffer. + + allocated buffer. + + + + This is a DocConsumer that gathers all fields under the + same name, and calls per-field consumers to process field + by field. This class doesn't doesn't do any "real" work + of its own: it just forwards the fields to a + DocFieldConsumer. + + + + Holds all per thread, per field state. + + + Gathers all Fieldables for a document under the same + name, updates FieldInfos, and calls per-field consumers + to process field by field. + + Currently, only a single thread visits the fields, + sequentially, for processing. + + + + If there are fields we've seen but did not see again + in the last run, then free them up. + + + + This is a DocFieldConsumer that inverts each field, + separately, from a Document, and accepts a + InvertedTermsConsumer to process those terms. + + + + Holds state for inverting all occurrences of a single + field in the document. This class doesn't do anything + itself; instead, it forwards the tokens produced by + analysis to its own consumer + (InvertedDocConsumerPerField). It also interacts with an + endConsumer (InvertedDocEndConsumerPerField). + + + + This is a DocFieldConsumer that inverts each field, + separately, from a Document, and accepts a + InvertedTermsConsumer to process those terms. + + + + Used by DocumentsWriter to maintain per-thread state. + We keep a separate Posting hash and other state for each + thread and then merge postings hashes from all threads + when writing the segment. + + + + Access to the Fieldable Info file that describes document fields and whether or + not they are indexed. Each segment has a separate Fieldable Info file. Objects + of this class are thread-safe for multiple readers, but only one thread can + be adding documents at a time, with no other reader or writer threads + accessing this object. + + + + Construct a FieldInfos object using the directory and the name of the file + IndexInput + + The directory to open the IndexInput from + + The name of the file to open the IndexInput from in the Directory + + IOException + + + Returns a deep clone of this FieldInfos instance. + + + Adds field info for a Document. + + + Returns true if any fields do not omitTermFreqAndPositions + + + Add fields that are indexed. Whether they have termvectors has to be specified. + + + The names of the fields + + Whether the fields store term vectors or not + + true if positions should be stored. + + true if offsets should be stored + + + + Assumes the fields are not storing term vectors. + + + The names of the fields + + Whether the fields are indexed or not + + + + + + + Calls 5 parameter add with false for all TermVector parameters. + + + The name of the Fieldable + + true if the field is indexed + + + + + + Calls 5 parameter add with false for term vector positions and offsets. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + true if the norms for the indexed field should be omitted + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + true if the norms for the indexed field should be omitted + + true if payloads should be stored for this field + + true if term freqs should be omitted for this field + + + + Return the fieldName identified by its number. + + + + + the fieldName or an empty string when the field + with the given number doesn't exist. + + + + Return the fieldinfo object referenced by the fieldNumber. + + + the FieldInfo object or null when the given fieldNumber + doesn't exist. + + + + This class tracks the number and position / offset parameters of terms + being added to the index. The information collected in this class is + also used to calculate the normalization factor for a field. + +

WARNING: This API is new and experimental, and may suddenly + change.

+

+
+ + Re-initialize the state, using this boost value. + boost value to use. + + + + Get the last processed term position. + the position + + + Get total number of terms in this field. + the length + + + Get the number of terms with positionIncrement == 0. + the numOverlap + + + Get end offset of the last processed term. + the offset + + + Get boost value. This is the cumulative product of + document boost and field boost for all field instances + sharing the same field name. + + the boost + + + + + + + + + Constructs a new runtime exception with null as its + detail message. The cause is not initialized, and may subsequently be + initialized by a call to . + + + + Constructs a new runtime exception with the specified cause and a + detail message of (cause==null ? null : cause.toString()) + (which typically contains the class and detail message of + cause). +

+ This constructor is useful for runtime exceptions + that are little more than wrappers for other throwables. + +

+ the cause (which is saved for later retrieval by the + ). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.) + + 1.4 + +
+ + Constructs a new runtime exception with the specified detail message. + The cause is not initialized, and may subsequently be initialized by a + call to . + + + the detail message. The detail message is saved for + later retrieval by the method. + + + + Constructs a new runtime exception with the specified detail message and + cause.

Note that the detail message associated with + cause is not automatically incorporated in + this runtime exception's detail message. + +

+ the detail message (which is saved for later retrieval + by the method). + + the cause (which is saved for later retrieval by the + method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.) + + 1.4 + +
+ + For each Field, store a sorted collection of s +

+ This is not thread-safe. +

+
+ + The TermVectorMapper can be used to map Term Vectors into your own + structure instead of the parallel array structure used by + . +

+ It is up to the implementation to make sure it is thread-safe. + + + +

+
+ + + true if this mapper should tell Lucene to ignore positions even if they are stored + + similar to ignoringPositions + + + + Tell the mapper what to expect in regards to field, number of terms, offset and position storage. + This method will be called once before retrieving the vector for a field. + + This method will be called before . + + The field the vector is for + + The number of terms that need to be mapped + + true if the mapper should expect offset information + + true if the mapper should expect positions info + + + + Map the Term Vector information into your own structure + The term to add to the vector + + The frequency of the term in the document + + null if the offset is not specified, otherwise the offset into the field of the term + + null if the position is not specified, otherwise the position in the field of the term + + + + Passes down the index of the document whose term vector is currently being mapped, + once for each top level call to a term vector reader. +

+ Default implementation IGNORES the document number. Override if your implementation needs the document number. +

+ NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. + +

+ index of document currently being mapped + +
+ + Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they + can be skipped over. Derived classes should set this to true if they want to ignore positions. The default + is false, meaning positions will be loaded if they are stored. + + false + + + + Same principal as , but applied to offsets. false by default. + + false + + + + A Comparator for sorting s + + + + Get the mapping between fields and terms, sorted by the comparator + + + A map between field names and <see cref="System.Collections.Generic.SortedDictionary{Object,Object}" />s per field. SortedSet entries are <see cref="TermVectorEntry" /> + + + Class responsible for access to stored document fields. +

+ It uses <segment>.fdt and <segment>.fdx; files. + +

+
+ + Returns a cloned FieldsReader that shares open + IndexInputs with the original one. It is the caller's + job not to close the original FieldsReader until all + clones are called (eg, currently SegmentReader manages + this logic). + + + + AlreadyClosedException if this FieldsReader is closed + + + Closes the underlying streams, including any ones associated with a + lazy implementation of a Field. This means that the Fields values will not be accessible. + + + IOException + + + Returns the length in bytes of each raw document in a + contiguous range of length numDocs starting with + startDocID. Returns the IndexInput (the fieldStream), + already seeked to the starting point for startDocID. + + + + Skip the field. We still have to read some of the information about the field, but can skip past the actual content. + This will have the most payoff on large fields. + + + + A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is + loaded. + + + + The value of the field as a Reader, or null. If null, the String value, + binary value, or TokenStream value is used. Exactly one of StringValue(), + ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + + + + The value of the field as a TokenStream, or null. If null, the Reader value, + String value, or binary value is used. Exactly one of StringValue(), + ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + + + + The value of the field as a String, or null. If null, the Reader value, + binary value, or TokenStream value is used. Exactly one of StringValue(), + ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + + + + Bulk write a contiguous series of documents. The + lengths array is the length (in bytes) of each raw + document. The stream IndexInput is the + fieldsStream from which we should bulk-copy all + bytes. + + + + A FilterIndexReader contains another IndexReader, which it + uses as its basic source of data, possibly transforming the data along the + way or providing additional functionality. The class + FilterIndexReader itself simply implements all abstract methods + of IndexReader with versions that pass all requests to the + contained index reader. Subclasses of FilterIndexReader may + further override some of these methods and may also provide additional + methods and fields. + + + +

Construct a FilterIndexReader based on the specified base reader. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the base reader.

+

Note that base reader is closed if this FilterIndexReader is closed.

+

+ specified base reader. + +
+ + + If the subclass of FilteredIndexReader modifies the + contents of the FieldCache, you must override this + method to provide a different key */ + + + + + If the subclass of FilteredIndexReader modifies the + deleted docs, you must override this method to provide + a different key */ + + + + Base class for filtering implementations. + + + Base class for filtering implementations. + + + Base class for filtering implementations. + + + NOTE: this API is experimental and will likely change + + + Adds a new doc in this term. If this returns null + then we just skip consuming positions/payloads. + + + + Called when we are done adding docs to this term + + + Consumes doc and freq, writing them using the current + index file format + + + + Adds a new doc in this term. If this returns null + then we just skip consuming positions/payloads. + + + + Called when we are done adding docs to this term + + + Abstract API that consumes terms, doc, freq, prox and + payloads postings. Concrete implementations of this + actually do "something" with the postings (write it into + the index in a specific format). + + NOTE: this API is experimental and will likely change + + + + Add a new field + + + Called when we are done adding everything. + + + Add a new field + + + Called when we are done adding everything. + + + Add a new position & payload. If payloadLength > 0 + you must read those bytes from the IndexInput. + + + + Called when we are done adding positions & payloads + + + Add a new position & payload + + + Called when we are done adding positions & payloads + + + NOTE: this API is experimental and will likely change + + + Adds a new term in this field; term ends with U+FFFF + char + + + + Called when we are done adding terms to this field + + + Adds a new term in this field + + + Called when we are done adding terms to this field + + + Used by DocumentsWriter to merge the postings from + multiple ThreadStates when creating a segment + + + + This is the base class for an in-memory posting list, + keyed by a Token. maintains a hash + table holding one instance of this per unique Token. + Consumers of TermsHash () must + subclass this class with its own concrete class. + FreqProxTermsWriter.PostingList is a private inner class used + for the freq/prox postings, and + TermVectorsTermsWriter.PostingList is a private inner class + used to hold TermVectors postings. + + + + Implement this class to plug into the TermsHash + processor, which inverts and stores Tokens into a hash + table and provides an API for writing bytes into + multiple streams for each unique Token. + + + +

Expert: policy for deletion of stale index commits. + +

Implement this interface, and pass it to one + of the or + constructors, to customize when older + point-in-time commits + are deleted from the index directory. The default deletion policy + is , which always + removes old commits as soon as a new commit is done (this + matches the behavior before 2.2).

+ +

One expected use case for this (and the reason why it + was first created) is to work around problems with an + index directory accessed via filesystems like NFS because + NFS does not provide the "delete on last close" semantics + that Lucene's "point in time" search normally relies on. + By implementing a custom deletion policy, such as "a + commit is only removed once it has been stale for more + than X minutes", you can give your readers time to + refresh to the new commit before + removes the old commits. Note that doing so will + increase the storage requirements of the index. See LUCENE-710 + for details.

+

+
+ +

This is called once when a writer is first + instantiated to give the policy a chance to remove old + commit points.

+ +

The writer locates all index commits present in the + index directory and calls this method. The policy may + choose to delete some of the commit points, doing so by + calling method + of .

+ +

Note: the last CommitPoint is the most recent one, + i.e. the "front index state". Be careful not to delete it, + unless you know for sure what you are doing, and unless + you can afford to lose the index content while doing that. + +

+ List of current + point-in-time commits, + sorted by age (the 0th one is the oldest commit). + +
+ + +

This is called each time the writer completed a commit. + This gives the policy a chance to remove old commit points + with each commit.

+ +

The policy may now choose to delete old commit points + by calling method + of .

+ +

This method is only called when + or is called, or possibly not at + all if the is called.

+ +

Note: the last CommitPoint is the most recent one, + i.e. the "front index state". Be careful not to delete it, + unless you know for sure what you are doing, and unless + you can afford to lose the index content while doing that.

+
+ + List of , sorted by age (the 0th one is the oldest commit). + +
+ + + This class keeps track of each SegmentInfos instance that + is still "live", either because it corresponds to a + segments_N file in the Directory (a "commit", i.e. a + committed SegmentInfos) or because it's an in-memory + SegmentInfos that a writer is actively updating but has + not yet committed. This class uses simple reference + counting to map the live SegmentInfos instances to + individual files in the Directory. + + The same directory file may be referenced by more than + one IndexCommit, i.e. more than one SegmentInfos. + Therefore we count how many commits reference each file. + When all the commits referencing a certain file have been + deleted, the refcount for that file becomes zero, and the + file is deleted. + + A separate deletion policy interface + (IndexDeletionPolicy) is consulted on creation (onInit) + and once per commit (onCommit), to decide when a commit + should be removed. + + It is the business of the IndexDeletionPolicy to choose + when to delete commit points. The actual mechanics of + file deletion, retrying, etc, derived from the deletion + of commit points is the business of the IndexFileDeleter. + + The current default deletion policy is + , which removes all + prior commits when a new commit has completed. This + matches the behavior before 2.2. + + Note that you must hold the write.lock before + instantiating this class. It opens segments_N file(s) + directly with no retry logic. + + + + because they are open and we are running on Windows), + so we will retry them again later: //// + + + Counts how many existing commits reference a file. + Maps String to RefCount (class below) instances: //// + + + This will have just 1 commit if you are using the + default delete policy (KeepOnlyLastCommitDeletionPolicy). + Other policies may leave commit points live for longer + in which case this list would be longer than 1: //// + + + non-commit checkpoint: //// + + + Change to true to see details of reference counts when + infoStream != null + + + + Initialize the deleter: find all previous commits in + the Directory, incref the files they reference, call + the policy to let it delete commits. This will remove + any files not referenced by any of the commits. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Remove the CommitPoints in the commitsToDelete List by + DecRef'ing all files from each SegmentInfos. + + + + Writer calls this when it has hit an error and had to + roll back, to tell us that there may now be + unreferenced files in the filesystem. So we re-list + the filesystem and delete such files. If segmentName + is non-null, we will only delete files corresponding to + that segment. + + + + For definition of "check point" see IndexWriter comments: + "Clarification: Check Points (and commits)". + + Writer calls this when it has made a "consistent + change" to the index, meaning new files are written to + the index and the in-memory SegmentInfos have been + modified to point to those files. + + This may or may not be a commit (segments_N may or may + not have been written). + + We simply incref the files referenced by the new + SegmentInfos and decref the files we had previously + seen (if any). + + If this is a commit, we also call the policy to give it + a chance to remove other commits. If any commits are + removed, we decref their files as well. + + + + Deletes the specified files, but only if they are new + (have not yet been incref'd). + + + + Tracks the reference count for a single index file: + + + Holds details for each commit point. This class is + also passed to the deletion policy. Note: this class + has a natural ordering that is inconsistent with + equals. + + + + Called only be the deletion policy, to remove this + commit point from the index. + + + + Filename filter that accept filenames and extensions only created by Lucene. + + + Returns true if this is a file that would be contained + in a CFS file. This function should only be called on + files that pass the above "accept" (ie, are already + known to be a Lucene index file). + + + + Useful constants representing filenames and extensions used by lucene + + + Name of the index segment file + + + Name of the generation reference file name + + + Name of the index deletable file (only used in + pre-lockless indices) + + + + Extension of norms file + + + Extension of freq postings file + + + Extension of prox postings file + + + Extension of terms file + + + Extension of terms index file + + + Extension of stored fields index file + + + Extension of stored fields file + + + Extension of vectors fields file + + + Extension of vectors documents file + + + Extension of vectors index file + + + Extension of compound file + + + Extension of compound file for doc store files + + + Extension of deletes + + + Extension of field infos + + + Extension of plain norms + + + Extension of separate norms + + + Extension of gen file + + + This array contains all filename extensions used by + Lucene's index files, with two exceptions, namely the + extension made up from .f + a number and + from .s + a number. Also note that + Lucene's segments_N files do not have any + filename extension. + + + + File extensions that are added to a compound file + (same as above, minus "del", "gen", "cfs"). + + + + File extensions of old-style index files + + + File extensions for term vector support + + + Computes the full file name from base, extension and + generation. If the generation is -1, the file name is + null. If it's 0, the file name is + If it's > 0, the file name is + + + -- main part of the file name + + -- extension of the filename (including .) + + -- generation + + + + Returns true if the provided filename is one of the doc + store files (ends with an extension in + STORE_INDEX_EXTENSIONS). + + + + An IndexWriter creates and maintains an index. +

The create argument to the + constructor determines + whether a new index is created, or whether an existing index is + opened. Note that you can open an index with create=true + even while readers are using the index. The old readers will + continue to search the "point in time" snapshot they had opened, + and won't see the newly created index until they re-open. There are + also constructors + with no create argument which will create a new index + if there is not already an index at the provided path and otherwise + open the existing index.

+

In either case, documents are added with + and removed with or + . A document can be updated with + (which just deletes + and then adds the entire document). When finished adding, deleting + and updating documents, should be called.

+ +

These changes are buffered in memory and periodically + flushed to the (during the above method + calls). A flush is triggered when there are enough + buffered deletes (see ) + or enough added documents since the last flush, whichever + is sooner. For the added documents, flushing is triggered + either by RAM usage of the documents (see + ) or the number of added documents. + The default is to flush when RAM usage hits 16 MB. For + best indexing speed you should flush by RAM usage with a + large RAM buffer. Note that flushing just moves the + internal buffered state in IndexWriter into the index, but + these changes are not visible to IndexReader until either + or is called. A flush may + also trigger one or more segment merges which by default + run with a background thread so as not to block the + addDocument calls (see below + for changing the ). +

+ If an index will not have more documents added for a while and optimal search + performance is desired, then either the full + method or partial method should be + called before the index is closed. +

+ Opening an IndexWriter creates a lock file for the directory in use. Trying to open + another IndexWriter on the same directory will lead to a + . The + is also thrown if an IndexReader on the same directory is used to delete documents + from the index.

+

+ +

Expert: IndexWriter allows an optional + implementation to be + specified. You can use this to control when prior commits + are deleted from the index. The default policy is + which removes all prior + commits as soon as a new commit is done (this matches + behavior before 2.2). Creating your own policy can allow + you to explicitly keep previous "point in time" commits + alive in the index for some time, to allow readers to + refresh to the new commit without having the old commit + deleted out from under them. This is necessary on + filesystems like NFS that do not support "delete on last + close" semantics, which Lucene's "point in time" search + normally relies on.

+

Expert: + IndexWriter allows you to separately change + the and the . + The is invoked whenever there are + changes to the segments in the index. Its role is to + select which merges to do, if any, and return a + describing the merges. It + also selects merges to do for optimize(). (The default is + . Then, the + is invoked with the requested merges and + it decides when and how to run the merges. The default is + .

+

NOTE: if you hit an + OutOfMemoryError then IndexWriter will quietly record this + fact and block all future segment commits. This is a + defensive measure in case any internal state (buffered + documents and deletions) were corrupted. Any subsequent + calls to will throw an + IllegalStateException. The only course of action is to + call , which internally will call + , to undo any changes to the index since the + last commit. You can also just call + directly.

+

NOTE: + instances are completely thread + safe, meaning multiple threads can call any of its + methods, concurrently. If your application requires + external synchronization, you should not + synchronize on the IndexWriter instance as + this may cause deadlock; use your own (non-Lucene) objects + instead.

+ NOTE: if you call + Thread.Interrupt() on a thread that's within + IndexWriter, IndexWriter will try to catch this (eg, if + it's in a Wait() or Thread.Sleep()), and will then throw + the unchecked exception + and clear the interrupt status on the thread

+

+
+ + Name of the write lock in the index. + + + Value to denote a flush trigger is disabled + + + Default value is 16 MB (which means flush when buffered + docs consume 16 MB RAM). Change using . + + + + Default value is 10,000. Change using . + + + Default value is 128. Change using . + + + Default value for the write lock timeout (1,000). + + + + + Disabled by default (because IndexWriter flushes by RAM usage + by default). Change using . + + + + Disabled by default (because IndexWriter flushes by RAM usage + by default). Change using . + + + + Absolute hard maximum length for a term. If a term + arrives from the analyzer longer than this length, it + is skipped and a message is printed to infoStream, if + set (see ). + + + + Expert: returns a readonly reader, covering all committed as well as + un-committed changes to the index. This provides "near real-time" + searching, in that changes made during an IndexWriter session can be + quickly made available for searching without closing the writer nor + calling . + +

+ Note that this is functionally equivalent to calling {#commit} and then + using to open a new reader. But the turarnound + time of this method should be faster since it avoids the potentially + costly . +

+ + You must close the returned by this method once you are done using it. + +

+ It's near real-time because there is no hard + guarantee on how quickly you can get a new reader after + making changes with IndexWriter. You'll have to + experiment in your situation to determine if it's + faster enough. As this is a new and experimental + feature, please report back on your findings so we can + learn, improve and iterate.

+ +

The resulting reader suppports + , but that call will simply forward + back to this method (though this may change in the + future).

+ +

The very first time this method is called, this + writer instance will make every effort to pool the + readers that it opens for doing merges, applying + deletes, etc. This means additional resources (RAM, + file descriptors, CPU time) will be consumed.

+ +

For lower latency on reopening a reader, you should call + to call to + pre-warm a newly merged segment before it's committed + to the index. This is important for minimizing index-to-search + delay after a large merge. + +

If an addIndexes* call is running in another thread, + then this reader will only search those segments from + the foreign index that have been successfully copied + over, so far

. + +

NOTE: Once the writer is closed, any + outstanding readers may continue to be used. However, + if you attempt to reopen any of those readers, you'll + hit an .

+ +

NOTE: This API is experimental and might + change in incompatible ways in the next release.

+ +

+ IndexReader that covers entire index plus all + changes made so far by this IndexWriter instance + + + IOException +
+ + Expert: like , except you can + specify which termInfosIndexDivisor should be used for + any newly opened readers. + + Subsambles which indexed + terms are loaded into RAM. This has the same effect as + except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + + + Obtain the number of deleted docs for a pooled reader. + If the reader isn't being pooled, the segmentInfo's + delCount is returned. + + + + Used internally to throw an + if this IndexWriter has been + closed. + + AlreadyClosedException if this IndexWriter is + + + Prints a message to the infoStream (if non-null), + prefixed with the identifying information for this + writer and the thread that's calling it. + + + + Expert: Set the Similarity implementation used by this IndexWriter. + + + + Constructs an IndexWriter for the index in d. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + d, replacing the index already there, if any. + + + the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + + + Constructs an IndexWriter for the index in + d, first creating it if it does not + already exist. + + + the index directory + + the analyzer to use + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + + + Expert: constructs an IndexWriter with a custom + , for the index in d, + first creating it if it does not already exist. Text + will be analyzed with a. + + + the index directory + + the analyzer to use + + see above + + whether or not to limit field lengths + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + + + Expert: constructs an IndexWriter with a custom + , for the index in d. + Text will be analyzed with a. If + create is true, then a new, empty index + will be created in d, replacing the index + already there, if any. + + + the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + see above + + , whether or not to limit field lengths. Value is in number of terms/tokens + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + + + Expert: constructs an IndexWriter with a custom + and , + for the index in d. + Text will be analyzed with a. If + create is true, then a new, empty index + will be created in d, replacing the index + already there, if any. + + + the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + see above + + whether or not to limit field lengths, value is in number of terms/tokens. See . + + the chain to be used to + process documents + + which commit to open + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + + + Expert: constructs an IndexWriter on specific commit + point, with a custom , for + the index in d. Text will be analyzed + with a. + +

This is only meaningful if you've used a + in that past that keeps more than + just the last commit. + +

This operation is similar to , + except that method can only rollback what's been done + with the current instance of IndexWriter since its last + commit, whereas this method can rollback to an + arbitrary commit point from the past, assuming the + has preserved past + commits. + +

+ the index directory + + the analyzer to use + + see above + + whether or not to limit field lengths, value is in number of terms/tokens. See . + + which commit to open + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + +
+ + Expert: set the merge policy used by this writer. + + + Expert: set the merge scheduler used by this writer. + + + The maximum number of terms that will be indexed for a single field in a + document. This limits the amount of memory required for indexing, so that + collections with very large files will not crash the indexing process by + running out of memory. This setting refers to the number of running terms, + not to the number of different terms.

+ Note: this silently truncates large documents, excluding from the + index all terms that occur further in the document. If you know your source + documents are large, be sure to set this value high enough to accomodate + the expected size. If you set it to Integer.MAX_VALUE, then the only limit + is your memory, but you should anticipate an OutOfMemoryError.

+ By default, no more than terms + will be indexed for a field. +

+
+ + Returns the maximum number of terms that will be + indexed for a single field in a document. + + + + + + Determines the minimal number of documents required + before the buffered in-memory documents are flushed as + a new Segment. Large values generally gives faster + indexing. + +

When this is set, the writer will flush every + maxBufferedDocs added documents. Pass in + to prevent triggering a flush due + to number of buffered documents. Note that if flushing + by RAM usage is also enabled, then the flush will be + triggered by whichever comes first.

+ +

Disabled by default (writer flushes by RAM usage).

+ +

+ IllegalArgumentException if maxBufferedDocs is + enabled but smaller than 2, or it disables maxBufferedDocs + when ramBufferSize is already disabled + + + +
+ + If we are flushing by doc count (not by RAM usage), and + using LogDocMergePolicy then push maxBufferedDocs down + as its minMergeDocs, to keep backwards compatibility. + + + + Returns the number of buffered added documents that will + trigger a flush if enabled. + + + + + + Determines the amount of RAM that may be used for + buffering added documents and deletions before they are + flushed to the Directory. Generally for faster + indexing performance it's best to flush by RAM usage + instead of document count and use as large a RAM buffer + as you can. + +

When this is set, the writer will flush whenever + buffered documents and deletions use this much RAM. + Pass in to prevent + triggering a flush due to RAM usage. Note that if + flushing by document count is also enabled, then the + flush will be triggered by whichever comes first.

+ +

NOTE: the account of RAM usage for pending + deletions is only approximate. Specifically, if you + delete by Query, Lucene currently has no way to measure + the RAM usage if individual Queries so the accounting + will under-estimate and you should compensate by either + calling commit() periodically yourself, or by using + to flush by count + instead of RAM usage (each buffered delete Query counts + as one). + +

+ NOTE: because IndexWriter uses ints when managing its + internal storage, the absolute maximum value for this setting is somewhat + less than 2048 MB. The precise limit depends on various factors, such as + how large your documents are, how many fields have norms, etc., so it's + best to set this value comfortably under 2048. +

+ +

The default value is .

+ +

+ IllegalArgumentException if ramBufferSize is + enabled but non-positive, or it disables ramBufferSize + when maxBufferedDocs is already disabled + +
+ + Returns the value set by if enabled. + + +

Determines the minimal number of delete terms required before the buffered + in-memory delete terms are applied and flushed. If there are documents + buffered in memory at the time, they are merged and a new segment is + created.

+

Disabled by default (writer flushes by RAM usage).

+ +

+ IllegalArgumentException if maxBufferedDeleteTerms + is enabled but smaller than 1 + + + +
+ + Returns the number of buffered deleted terms that will + trigger a flush if enabled. + + + + + + If non-null, information about merges, deletes and a + message when maxFieldLength is reached will be printed + to this. + + + + Commits all changes to an index and closes all + associated files. Note that this may be a costly + operation, so, try to re-use a single writer instead of + closing and opening a new one. See for + caveats about write caching done by some IO devices. + +

If an Exception is hit during close, eg due to disk + full or some other reason, then both the on-disk index + and the internal state of the IndexWriter instance will + be consistent. However, the close will not be complete + even though part of it (flushing buffered documents) + may have succeeded, so the write lock will still be + held.

+ +

If you can correct the underlying cause (eg free up + some disk space) then you can call close() again. + Failing that, if you want to force the write lock to be + released (dangerous, because you may then lose buffered + docs in the IndexWriter instance) then you can do + something like this:

+ + + try { + writer.close(); + } finally { + if (IndexWriter.isLocked(directory)) { + IndexWriter.unlock(directory); + } + } + + + after which, you must be certain not to use the writer + instance anymore.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Commits all changes to an index and closes all + associated files. Note that this may be a costly + operation, so, try to re-use a single writer instead of + closing and opening a new one. See for + caveats about write caching done by some IO devices. + +

If an Exception is hit during close, eg due to disk + full or some other reason, then both the on-disk index + and the internal state of the IndexWriter instance will + be consistent. However, the close will not be complete + even though part of it (flushing buffered documents) + may have succeeded, so the write lock will still be + held.

+ +

If you can correct the underlying cause (eg free up + some disk space) then you can call close() again. + Failing that, if you want to force the write lock to be + released (dangerous, because you may then lose buffered + docs in the IndexWriter instance) then you can do + something like this:

+ + + try { + writer.close(); + } finally { + if (IndexWriter.isLocked(directory)) { + IndexWriter.unlock(directory); + } + } + + + after which, you must be certain not to use the writer + instance anymore.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Closes the index with or without waiting for currently + running merges to finish. This is only meaningful when + using a MergeScheduler that runs merges in background + threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

NOTE: it is dangerous to always call + close(false), especially when IndexWriter is not open + for very long, because this can result in "merge + starvation" whereby long merges will never have a + chance to finish. This will cause too many segments in + your index over time.

+ +

+ if true, this call will block + until all merges complete; else, it will ask all + running merges to abort, wait until those merges have + finished (which should be at most a few seconds), and + then return. + +
+ + Closes the index with or without waiting for currently + running merges to finish. This is only meaningful when + using a MergeScheduler that runs merges in background + threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

NOTE: it is dangerous to always call + close(false), especially when IndexWriter is not open + for very long, because this can result in "merge + starvation" whereby long merges will never have a + chance to finish. This will cause too many segments in + your index over time.

+ +

+ if true, this call will block + until all merges complete; else, it will ask all + running merges to abort, wait until those merges have + finished (which should be at most a few seconds), and + then return. + +
+ + Tells the docWriter to close its currently open shared + doc stores (stored fields & vectors files). + Return value specifices whether new doc store files are compound or not. + + + + Returns total number of docs in this index, including + docs not yet flushed (still in the RAM buffer), + not counting deletions. + + + + + + Returns total number of docs in this index, including + docs not yet flushed (still in the RAM buffer), and + including deletions. NOTE: buffered deletions + are not counted. If you really need these to be + counted you should call first. + + + + + + The maximum number of terms that will be indexed for a single field in a + document. This limits the amount of memory required for indexing, so that + collections with very large files will not crash the indexing process by + running out of memory.

+ Note that this effectively truncates large documents, excluding from the + index terms that occur further in the document. If you know your source + documents are large, be sure to set this value high enough to accomodate + the expected size. If you set it to Integer.MAX_VALUE, then the only limit + is your memory, but you should anticipate an OutOfMemoryError.

+ By default, no more than 10,000 terms will be indexed for a field. + +

+ + +
+ + Adds a document to this index. If the document contains more than + terms for a given field, the remainder are + discarded. + +

Note that if an Exception is hit (for example disk full) + then the index will be consistent, but this document + may not have been added. Furthermore, it's possible + the index will have one segment in non-compound format + even when using compound files (when a merge has + partially succeeded).

+ +

This method periodically flushes pending documents + to the Directory (see above), and + also periodically triggers segment merges in the index + according to the in use.

+ +

Merges temporarily consume space in the + directory. The amount of space required is up to 1X the + size of all segments being merged, when no + readers/searchers are open against the index, and up to + 2X the size of all segments being merged when + readers/searchers are open against the index (see + for details). The sequence of + primitive merge operations performed is governed by the + merge policy. + +

Note that each term in the document can be no longer + than 16383 characters, otherwise an + IllegalArgumentException will be thrown.

+ +

Note that it's possible to create an invalid Unicode + string in java if a UTF16 surrogate pair is malformed. + In this case, the invalid characters are silently + replaced with the Unicode replacement character + U+FFFD.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Adds a document to this index, using the provided analyzer instead of the + value of . If the document contains more than + terms for a given field, the remainder are + discarded. + +

See for details on + index and IndexWriter state after an Exception, and + flushing/merging temporary free space requirements.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) containing term. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the documents to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) containing any of the + terms. All deletes are flushed at the same time. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ array of terms to identify the documents + to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) matching the provided query. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the query to identify the documents to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) matching any of the provided queries. + All deletes are flushed at the same time. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ array of queries to identify the documents + to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Updates a document by first deleting the document(s) + containing term and then adding the new + document. The delete and then add are atomic as seen + by a reader on the same index (flush may happen only after + the add). + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the document(s) to be + deleted + + the document to be added + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Updates a document by first deleting the document(s) + containing term and then adding the new + document. The delete and then add are atomic as seen + by a reader on the same index (flush may happen only after + the add). + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the document(s) to be + deleted + + the document to be added + + the analyzer to use when analyzing the document + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + If non-null, information about merges will be printed to this. + + + Requests an "optimize" operation on an index, priming the index + for the fastest available search. Traditionally this has meant + merging all segments into a single segment as is done in the + default merge policy, but individaul merge policies may implement + optimize in different ways. + +

It is recommended that this method be called upon completion of indexing. In + environments with frequent updates, optimize is best done during low volume times, if at all. + +

+

See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

+ +

Note that optimize requires 2X the index size free + space in your Directory (3X if you're using compound + file format). For example, if your index + size is 10 MB then you need 20 MB free for optimize to + complete (30 MB if you're using compound fiel format).

+ +

If some but not all readers re-open while an + optimize is underway, this will cause > 2X temporary + space to be consumed as those new readers will then + hold open the partially optimized segments at that + time. It is best not to re-open readers while optimize + is running.

+ +

The actual temporary usage could be much less than + these figures (it depends on many factors).

+ +

In general, once the optimize completes, the total size of the + index will be less than the size of the starting index. + It could be quite a bit smaller (if there were many + pending deletes) or just slightly smaller.

+ +

If an Exception is hit during optimize(), for example + due to disk full, the index will not be corrupt and no + documents will have been lost. However, it may have + been partially optimized (some segments were merged but + not all), and it's possible that one of the segments in + the index will be in non-compound format even when + using compound file format. This will occur when the + Exception is hit during conversion of the segment into + compound format.

+ +

This call will optimize those segments present in + the index when the call started. If other threads are + still adding documents and flushing segments, those + newly created segments will not be optimized unless you + call optimize again.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + +
+ + Optimize the index down to <= maxNumSegments. If + maxNumSegments==1 then this is the same as + . + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ maximum number of segments left + in the index after optimization finishes + +
+ + Just like , except you can specify + whether the call should block until the optimize + completes. This is only meaningful with a + that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Just like , except you can + specify whether the call should block until the + optimize completes. This is only meaningful with a + that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Returns true if any merges in pendingMerges or + runningMerges are optimization merges. + + + + Just like , except you can + specify whether the call should block until the + operation completes. This is only meaningful with a + that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expunges all deletes from the index. When an index + has many document deletions (or updates to existing + documents), it's best to either call optimize or + expungeDeletes to remove all unused data in the index + associated with the deleted documents. To see how + many deletions you have pending in your index, call + + This saves disk space and memory usage while + searching. expungeDeletes should be somewhat faster + than optimize since it does not insist on reducing the + index to a single segment (though, this depends on the + ; see .). Note that + this call does not first commit any buffered + documents, so you must do so yourself if necessary. + See also + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expert: asks the mergePolicy whether any merges are + necessary now and if so, runs the requested merges and + then iterate (test again if merges are needed) until no + more merges are returned by the mergePolicy. + + Explicit calls to maybeMerge() are usually not + necessary. The most common case is when merge policy + parameters have changed. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expert: the calls this method + to retrieve the next merge requested by the + MergePolicy + + + + Like getNextMerge() except only returns a merge if it's + external. + + + + Close the IndexWriter without committing + any changes that have occurred since the last commit + (or since it was opened, if commit hasn't been called). + This removes any temporary files that had been created, + after which the state of the index will be the same as + it was when commit() was last called or when this + writer was first opened. This also clears a previous + call to . + + IOException if there is a low-level IO error + + + Delete all documents in the index. + +

This method will drop all buffered documents and will + remove all segments from the index. This change will not be + visible until a has been called. This method + can be rolled back using .

+ +

NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).

+ +

NOTE: this method will forcefully abort all merges + in progress. If other threads are running + or any of the addIndexes methods, they + will receive s. +

+
+ + Wait for any currently outstanding merges to finish. + +

It is guaranteed that any merges started prior to calling this method + will have completed once this method completes.

+

+
+ + Merges all segments from an array of indexes into this + index. + +

This may be used to parallelize batch indexing. A large document + collection can be broken into sub-collections. Each sub-collection can be + indexed in parallel, on a different thread, process or machine. The + complete index can then be created by merging sub-collection indexes + with this method. + +

NOTE: the index in each Directory must not be + changed (opened by a writer) while this method is + running. This method does not acquire a write lock in + each input Directory, so it is up to the caller to + enforce this. + +

NOTE: while this is running, any attempts to + add or delete documents (with another thread) will be + paused until this method completes. + +

This method is transactional in how Exceptions are + handled: it does not commit a new segments_N file until + all indexes are added. This means if an Exception + occurs (for example disk full), then either no indexes + will have been added or they all will have been.

+ +

Note that this requires temporary free space in the + Directory up to 2X the sum of all input indexes + (including the starting index). If readers/searchers + are open against the starting index, then temporary + free space required will be higher by the size of the + starting index (see for details). +

+ +

Once this completes, the final size of the index + will be less than the sum of all input index sizes + (including the starting index). It could be quite a + bit smaller (if there were many pending deletes) or + just slightly smaller.

+ +

+ This requires this index not be among those to be added. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Merges the provided indexes into this index. +

After this completes, the index is optimized.

+

The provided IndexReaders are not closed.

+ +

NOTE: while this is running, any attempts to + add or delete documents (with another thread) will be + paused until this method completes. + +

See for + details on transactional semantics, temporary free + space required in the Directory, and non-CFS segments + on an Exception.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + + A hook for extending classes to execute operations after pending added and + deleted documents have been flushed to the Directory but before the change + is committed (new segments_N file written). + + + + + A hook for extending classes to execute operations before pending added and + deleted documents are flushed to the Directory. + + + + Expert: prepare for commit. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ + +
+ +

Expert: prepare for commit, specifying + commitUserData Map (String -> String). This does the + first phase of 2-phase commit. This method does all steps + necessary to commit changes since this writer was + opened: flushes pending added and deleted docs, syncs + the index files, writes most of next segments_N file. + After calling this you must call either + to finish the commit, or + to revert the commit and undo all changes + done since the writer was opened.

+ + You can also just call directly + without prepareCommit first in which case that method + will internally call prepareCommit. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ Opaque Map (String->String) + that's recorded into the segments file in the index, + and retrievable by . + Note that when IndexWriter commits itself, during , the + commitUserData is unchanged (just carried over from + the prior commit). If this is null then the previous + commitUserData is kept. Also, the commitUserData will + only "stick" if there are actually changes in the + index to commit. + +
+ +

Commits all pending changes (added & deleted + documents, optimizations, segment merges, added + indexes, etc.) to the index, and syncs all referenced + index files, such that a reader will see the changes + and the index updates will survive an OS or machine + crash or power loss. Note that this does not wait for + any running background merges to finish. This may be a + costly operation, so you should test the cost in your + application and do it only when really necessary.

+ +

Note that this operation calls Directory.sync on + the index files. That call should not return until the + file contents & metadata are on stable storage. For + FSDirectory, this calls the OS's fsync. But, beware: + some hardware devices may in fact cache writes even + during fsync, and return before the bits are actually + on stable storage, to give the appearance of faster + performance. If you have such a device, and it does + not have a battery backup (for example) then on power + loss it may still lose data. Lucene cannot guarantee + consistency on such devices.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ + + + +
+ + Commits all changes to the index, specifying a + commitUserData Map (String -> String). This just + calls (if you didn't + already call it) and then . + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Flush all in-memory buffered udpates (adds and deletes) + to the Directory. + + if true, we may merge segments (if + deletes or docs were flushed) if necessary + + if false we are allowed to keep + doc stores open to share with the next segment + + whether pending deletes should also + be flushed + + + + Expert: Return the total size of all index files currently cached in memory. + Useful for size management with flushRamDocs() + + + + Expert: Return the number of documents currently + buffered in RAM. + + + + Carefully merges deletes for the segments we just + merged. This is tricky because, although merging will + clear all deletes (compacts the documents), new + deletes may have been flushed to the segments since + the merge was started. This method "carries over" + such new deletes onto the newly merged segment, and + saves the resulting deletes file (incrementing the + delete generation for merge.info). If no deletes were + flushed, no new deletes file is saved. + + + + Merges the indicated segments, replacing them in the stack with a + single segment. + + + + Hook that's called when the specified merge is complete. + + + Checks whether this merge involves any segments + already participating in a merge. If not, this merge + is "registered", meaning we record that its segments + are now participating in a merge, and true is + returned. Else (the merge conflicts) false is + returned. + + + + Does initial setup for a merge, which is fast but holds + the synchronized lock on IndexWriter instance. + + + + Does fininishing for a merge, which is fast but holds + the synchronized lock on IndexWriter instance. + + + + Does the actual (time-consuming) work of the merge, + but without holding synchronized lock on IndexWriter + instance + + + + Blocks until all files in syncing are sync'd + + + Walk through all files referenced by the current + segmentInfos and ask the Directory to sync each file, + if it wasn't already. If that succeeds, then we + prepare a new segments_N file but do not fully commit + it. + + + + Returns true iff the index in the named directory is + currently locked. + + the directory to check for a lock + + IOException if there is a low-level IO error + + + Forcibly unlocks the index in the named directory. +

+ Caution: this should only be used by failure recovery code, + when it is known that no other process nor thread is in fact + currently accessing this index. +

+
+ + Casts current mergePolicy to LogMergePolicy, and throws + an exception if the mergePolicy is not a LogMergePolicy. + + + +

Gets or sets the current setting of whether newly flushed + segments will use the compound file format. Note that + this just returns the value previously set with + setUseCompoundFile(boolean), or the default value + (true). You cannot use this to query the status of + previously flushed segments.

+ +

Note that this method is a convenience method: it + just calls mergePolicy.getUseCompoundFile as long as + mergePolicy is an instance of . + Otherwise an IllegalArgumentException is thrown.

+ +

+
+ + Expert: Return the Similarity implementation used by this IndexWriter. + +

This defaults to the current value of . +

+
+ + Expert: Gets or sets the interval between indexed terms. Large values cause less + memory to be used by IndexReader, but slow random-access to terms. Small + values cause more memory to be used by an IndexReader, and speed + random-access to terms. + + This parameter determines the amount of computation required per query + term, regardless of the number of documents that contain that term. In + particular, it is the maximum number of other terms that must be + scanned before a term is located and its frequency and position information + may be processed. In a large index with user-entered query terms, query + processing time is likely to be dominated not by term lookup but rather + by the processing of frequency and positional data. In a small index + or when many uncommon query terms are generated (e.g., by wildcard + queries) term lookup may become a dominant cost. + + In particular, numUniqueTerms/interval terms are read into + memory by an IndexReader, and, on average, interval/2 terms + must be scanned for each random term access. + + + + + + + Expert: returns the current MergePolicy in use by this writer. + + + + + Expert: returns the current MergePolicy in use by this + writer. + + + + + +

Gets or sets the largest segment (measured by document + count) that may be merged with other segments. +

+ Small values (e.g., less than 10,000) are best for + interactive indexing, as this limits the length of + pauses while indexing to a few seconds. Larger values + are best for batched indexing and speedier + searches. +

+ The default value is . +

+ Note that this method is a convenience method: it + just calls mergePolicy.getMaxMergeDocs as long as + mergePolicy is an instance of . + Otherwise an IllegalArgumentException is thrown.

+ + The default merge policy () + also allows you to set this + limit by net size (in MB) of the segment, using + .

+

+ + +
+ + Gets or sets the termsIndexDivisor passed to any readers that + IndexWriter opens, for example when applying deletes + or creating a near-real-time reader in + . Default value is + . + + + Gets or sets the number of segments that are merged at + once and also controls the total number of segments + allowed to accumulate in the index. +

Determines how often segment indices are merged by addDocument(). With + smaller values, less RAM is used while indexing, and searches on + unoptimized indices are faster, but indexing speed is slower. With larger + values, more RAM is used during indexing, and while searches on unoptimized + indices are slower, indexing is faster. Thus larger values (> 10) are best + for batch index creation, and smaller values (< 10) for indices that are + interactively maintained. + +

Note that this method is a convenience method: it + just calls mergePolicy.setMergeFactor as long as + mergePolicy is an instance of . + Otherwise an IllegalArgumentException is thrown.

+ +

This must never be less than 2. The default value is 10. +

+
+ + Gets or sets the default info stream. + If non-null, this will be the default infoStream used + by a newly instantiated IndexWriter. + + + + + + Returns the current infoStream in use by this writer. + + + + + Returns true if verbosing is enabled (i.e., infoStream != null). + + + Gets or sets allowed timeout when acquiring the write lock. + + + Gets or sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in + milliseconds). + + + + Returns the Directory used by this index. + + + Returns the analyzer used by this index. + + + Gets or sets the merged segment warmer. See + . + + + + Holds shared SegmentReader instances. IndexWriter uses + SegmentReaders for 1) applying deletes, 2) doing + merges, 3) handing out a real-time reader. This pool + reuses instances of the SegmentReaders in all these + places if it is in "near real-time mode" (getReader() + has been called on this instance). + + + + Forcefully clear changes for the specifed segments, + and remove from the pool. This is called on succesful merge. + + + + Release the segment reader (i.e. decRef it and close if there + are no more references. + + + + IOException + + + Release the segment reader (i.e. decRef it and close if there + are no more references. + + + + + IOException + + + Remove all our references to readers, and commits + any pending changes. + + + + Commit all segment reader in the pool. + IOException + + + Returns a ref to a clone. NOTE: this clone is not + enrolled in the pool, so you should simply close() + it when you're done (ie, do not call release()). + + + + Obtain a SegmentReader from the readerPool. The reader + must be returned by calling + + + + + + + + IOException + + + Obtain a SegmentReader from the readerPool. The reader + must be returned by calling + + + + + + + + + + + + + IOException + + + Specifies maximum field length (in number of tokens/terms) in constructors. + overrides the value set by + the constructor. + + + + Private type-safe-enum-pattern constructor. + + + instance name + + maximum field length + + + + Public constructor to allow users to specify the maximum field size limit. + + + The maximum field length + + + + Sets the maximum field length to . + + + Sets the maximum field length to + + + + + + If has been called (ie, this writer + is in near real-time mode), then after a merge + completes, this class can be invoked to warm the + reader on the newly merged segment, before the merge + commits. This is not required for near real-time + search, but will reduce search latency on opening a + new near real-time reader after a merge completes. + +

NOTE: This API is experimental and might + change in incompatible ways in the next release.

+ +

NOTE: warm is called before any deletes have + been carried over to the merged segment. +

+
+ + Add a new thread + + + Abort (called after hitting AbortException) + + + Flush a new segment + + + Close doc stores + + + Attempt to free RAM, returning true if any RAM was + freed + + + + This implementation that + keeps only the most recent commit and immediately removes + all prior commits after a new commit is done. This is + the default deletion policy. + + + + Deletes all commits except the most recent one. + + + Deletes all commits except the most recent one. + + + This is a that measures size of a + segment as the total byte size of the segment's files. + + + +

This class implements a that tries + to merge segments into levels of exponentially + increasing size, where each level has fewer segments than + the value of the merge factor. Whenever extra segments + (beyond the merge factor upper bound) are encountered, + all segments within the level are merged. You can get or + set the merge factor using and + respectively.

+ +

This class is abstract and requires a subclass to + define the method which specifies how a + segment's size is determined. + is one subclass that measures size by document count in + the segment. is another + subclass that measures size as the total byte size of the + file(s) for the segment.

+

+
+ +

Expert: a MergePolicy determines the sequence of + primitive merge operations to be used for overall merge + and optimize operations.

+ +

Whenever the segments in an index have been altered by + , either the addition of a newly + flushed segment, addition of many segments from + addIndexes* calls, or a previous merge that may now need + to cascade, invokes + to give the MergePolicy a chance to pick + merges that are now required. This method returns a + instance describing the set of + merges that should be done, or null if no merges are + necessary. When IndexWriter.optimize is called, it calls + and the MergePolicy should + then return the necessary merges.

+ +

Note that the policy can return more than one merge at + a time. In this case, if the writer is using + , the merges will be run + sequentially but if it is using + they will be run concurrently.

+ +

The default MergePolicy is + .

+ +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+ +

NOTE: This class typically requires access to + package-private APIs (e.g. SegmentInfos) to do its job; + if you implement your own MergePolicy, you'll need to put + it in package Lucene.Net.Index in order to use + these APIs. +

+
+ + Determine what set of merge operations are now necessary on the index. + calls this whenever there is a change to the segments. + This call is always synchronized on the instance so + only one thread at a time will call this method. + + + the total set of segments in the index + + + + Determine what set of merge operations is necessary in order to optimize + the index. calls this when its + method is called. This call is always + synchronized on the instance so only one thread at a + time will call this method. + + + the total set of segments in the index + + requested maximum number of segments in the index (currently this + is always 1) + + contains the specific SegmentInfo instances that must be merged + away. This may be a subset of all SegmentInfos. + + + + Determine what set of merge operations is necessary in order to expunge all + deletes from the index. + + + the total set of segments in the index + + + + Release all resources for the policy. + + + Release all resources for the policy. + + + Returns true if a newly flushed (not from merge) + segment should use the compound file format. + + + + Returns true if the doc store files should use the + compound file format. + + + + OneMerge provides the information necessary to perform + an individual primitive merge operation, resulting in + a single new segment. The merge spec includes the + subset of segments to be merged as well as whether the + new segment should use the compound file format. + + + + Record that an exception occurred while executing + this merge + + + + Retrieve previous exception set by + . + + + + Mark this merge as aborted. If this is called + before the merge is committed then the merge will + not be committed. + + + + Returns true if this merge was aborted. + + + A MergeSpecification instance provides the information + necessary to perform multiple merges. It simply + contains a list of instances. + + + + The subset of segments to be included in the primitive merge. + + + Exception thrown if there are any problems while + executing a merge. + + + + Returns the of the index that hit + the exception. + + + + Defines the allowed range of log(size) for each + level. A level is computed by taking the max segment + log size, minus LEVEL_LOG_SPAN, and finding all + segments falling within that range. + + + + Default merge factor, which is how many segments are + merged at a time + + + + Default maximum segment size. A segment of this size + + + + + Default noCFSRatio. If a merge's size is >= 10% of + the index, then we disable compound file for it. + See + + + + Gets or sets whether compound file format should be used for + newly flushed and newly merged segments. + + + + Sets whether compound file format should be used for + newly flushed and newly merged doc store + segment files (term vectors and stored fields). + + + + Returns true if newly flushed and newly merge doc + store segment files (term vectors and stored fields) + + + + + + Returns true if this single info is optimized (has no + pending norms or deletes, is in the same dir as the + writer, and matches the current compound file setting + + + + Returns the merges necessary to optimize the index. + This merge policy defines "optimized" to mean only one + segment in the index, where that segment has no + deletions pending nor separate norms, and it is in + compound file format if the current useCompoundFile + setting is true. This method returns multiple merges + (mergeFactor at a time) so the + in use may make use of concurrency. + + + + Finds merges necessary to expunge all deletes from the + index. We simply merge adjacent segments that have + deletes, up to mergeFactor at a time. + + + + Checks if any merges are now necessary and returns a + if so. A merge + is necessary when there are more than + segments at a given level. When + multiple levels have too many segments, this method + will return multiple merges, allowing the + to use concurrency. + + + + Gets or sets how often segment indices are merged by + addDocument(). With smaller values, less RAM is used + while indexing, and searches on unoptimized indices are + faster, but indexing speed is slower. With larger + values, more RAM is used during indexing, and while + searches on unoptimized indices are slower, indexing is + faster. Thus larger values (> 10) are best for batch + index creation, and smaller values (< 10) for indices + that are interactively maintained. + + + + Gets or sets whether the segment size should be calibrated by + the number of deletes when choosing segments for merge. + + + + + Gets or sets the largest segment (measured by document + count) that may be merged with other segments. +

Determines the largest segment (measured by + document count) that may be merged with other segments. + Small values (e.g., less than 10,000) are best for + interactive indexing, as this limits the length of + pauses while indexing to a few seconds. Larger values + are best for batched indexing and speedier + searches.

+ +

The default value is .

+ +

The default merge policy () + also allows you to set this + limit by net size (in MB) of the segment, using + .

+

+
+ + + + + + Default maximum segment size. A segment of this size + + + + +

Gets or sets the largest segment (measured by total + byte size of the segment's files, in MB) that may be + merged with other segments. Small values (e.g., less + than 50 MB) are best for interactive indexing, as this + limits the length of pauses while indexing to a few + seconds. Larger values are best for batched indexing + and speedier searches.

+ +

Note that is also + used to check whether a segment is too large for + merging (it's either or).

+

+
+ + Gets or sets the minimum size for the lowest level segments. + Any segments below this size are considered to be on + the same level (even if they vary drastically in size) + and will be merged whenever there are mergeFactor of + them. This effectively truncates the "long tail" of + small segments that would otherwise be created into a + single level. If you set this too large, it could + greatly increase the merging cost during indexing (if + you flush many small segments). + + + + This is a that measures size of a + segment as the number of documents (not taking deletions + into account). + + + + + + + + Gets or sets the minimum size for the lowest level segments. + Any segments below this size are considered to be on + the same level (even if they vary drastically in size) + and will be merged whenever there are mergeFactor of + them. This effectively truncates the "long tail" of + small segments that would otherwise be created into a + single level. If you set this too large, it could + greatly increase the merging cost during indexing (if + you flush many small segments). + + + + Remaps docIDs after a merge has completed, where the + merged segments had at least one deletion. This is used + to renumber the buffered deletes in IndexWriter when a + merge of segments with deletions commits. + + + + Allows you to iterate over the for multiple s as + a single . + + + + + Creates a new MultipleTermPositions instance. + + + + + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + + false + + + A PriorityQueue maintains a partial ordering of its elements such that the + least element can always be found in constant time. Put()'s and pop()'s + require log(size) time. + +

NOTE: This class pre-allocates a full array of + length maxSize+1, in . + +

+
+ + Determines the ordering of objects in this priority queue. Subclasses + must define this one method. + + + + Subclass constructors must call this. + + + + Adds an Object to a PriorityQueue in log(size) time. If one tries to add + more objects than maxSize from initialize an + is thrown. + + the new 'top' element in the queue. + + + + Adds an Object to a PriorityQueue in log(size) time. + It returns the object (if any) that was + dropped off the heap because it was full. This can be + the given parameter (in case it is smaller than the + full heap's minimum, and couldn't be added), or another + object that was previously the smallest value in the + heap and now has been replaced by a larger one, or null + if the queue wasn't yet full with maxSize elements. + + + + Returns the least element of the PriorityQueue in constant time. + + + + Removes and returns the least element of the + PriorityQueue in log(size) time. + + + + Should be called when the Object at top changes values. + Still log(n) worst case, but it's at least twice as fast to + + pq.top().change(); + pq.updateTop(); + + instead of + + o = pq.pop(); + o.change(); + pq.push(o); + + + the new 'top' element. + + + Returns the number of elements currently stored in the PriorityQueue. + + + Removes all entries from the PriorityQueue. + + + This method can be overridden by extending classes to return a sentinel + object which will be used by to fill the queue, so + that the code which uses that queue can always assume it's full and only + change the top without attempting to insert any new object.
+ + Those sentinel values should always compare worse than any non-sentinel + value (i.e., should always favor the + non-sentinel values).
+ + By default, this method returns false, which means the queue will not be + filled with sentinel values. Otherwise, the value returned will be used to + pre-populate the queue. Adds sentinel values to the queue.
+ + If this method is extended to return a non-null value, then the following + usage pattern is recommended: + + + // extends getSentinelObject() to return a non-null value. + PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits); + // save the 'top' element, which is guaranteed to not be null. + MyObject pqTop = pq.top(); + <...> + // now in order to add a new element, which is 'better' than top (after + // you've verified it is better), it is as simple as: + pqTop.change(). + pqTop = pq.updateTop(); + + + NOTE: if this method returns a non-null value, it will be called by + times, relying on a new object to + be returned and will not check if it's null again. Therefore you should + ensure any call to this method creates a new instance and behaves + consistently, e.g., it cannot return null if it previously returned + non-null. + +
+ the sentinel object to use to pre-populate the queue, or null if sentinel objects are not supported. +
+ + An IndexReader which reads multiple indexes, appending + their content. + + + +

Construct a MultiReader aggregating the named set of (sub)readers. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the subreaders.

+

Note that all subreaders are closed if this Multireader is closed.

+

+ set of (sub)readers + + IOException +
+ +

Construct a MultiReader aggregating the named set of (sub)readers. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the subreaders.

+

+ indicates whether the subreaders should be closed + when this MultiReader is closed + + set of (sub)readers + + IOException +
+ + Tries to reopen the subreaders. +
+ If one or more subreaders could be re-opened (i. e. subReader.reopen() + returned a new instance != subReader), then a new MultiReader instance + is returned, otherwise this instance is returned. +

+ A re-opened instance might share one or more subreaders with the old + instance. Index modification operations result in undefined behavior + when performed before the old instance is closed. + (see ). +

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. + +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Clones the subreaders. + (see ). +
+

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. +

+
+ + If clone is true then we clone each of the subreaders + + + New IndexReader, or same one (this) if + reopen/clone is not necessary + + CorruptIndexException + IOException + + + Checks recursively if all subreaders are up to date. + + + Not implemented. + UnsupportedOperationException + + + Writes norms. Each thread X field accumulates the norms + for the doc/fields it saw, then the flush method below + merges all of these together into a single _X.nrm file. + + + + Produce _X.nrm if any document had a field with norms + not disabled + + + + Taps into DocInverter, as an InvertedDocEndConsumer, + which is called at the end of inverting each field. We + just look at the length for the field (docState.length) + and record the norm. + + + + An IndexReader which reads multiple, parallel indexes. Each index added + must have the same number of documents, but typically each contains + different fields. Each document contains the union of the fields of all + documents with the same document number. When searching, matches for a + query term are from the first index added that has the field. + +

This is useful, e.g., with collections that have large fields which + change rarely and small fields that change more frequently. The smaller + fields may be re-indexed in a new index and both indexes may be searched + together. + +

Warning: It is up to you to make sure all indexes + are created and modified the same way. For example, if you add + documents to one index, you need to add the same documents in the + same order to the other indexes. Failure to do so will result in + undefined behavior. +

+
+ + Construct a ParallelReader. +

Note that all subreaders are closed if this ParallelReader is closed.

+

+
+ + Construct a ParallelReader. + indicates whether the subreaders should be closed + when this ParallelReader is closed + + + + Add an IndexReader. + IOException if there is a low-level IO error + + + Add an IndexReader whose stored fields will not be returned. This can + accellerate search when stored fields are only needed from a subset of + the IndexReaders. + + + IllegalArgumentException if not all indexes contain the same number + of documents + + IllegalArgumentException if not all indexes have the same value + of + + IOException if there is a low-level IO error + + + Tries to reopen the subreaders. +
+ If one or more subreaders could be re-opened (i. e. subReader.reopen() + returned a new instance != subReader), then a new ParallelReader instance + is returned, otherwise this instance is returned. +

+ A re-opened instance might share one or more subreaders with the old + instance. Index modification operations result in undefined behavior + when performed before the old instance is closed. + (see ). +

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. + +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Checks recursively if all subreaders are up to date. + + + Checks recursively if all subindexes are optimized + + + Not implemented. + UnsupportedOperationException + + + A Payload is metadata that can be stored together with each occurrence + of a term. This metadata is stored inline in the posting list of the + specific term. +

+ To store payloads in the index a has to be used that + produces payload data. +

+ Use and + to retrieve the payloads from the index.
+ +

+
+ + the byte array containing the payload data + + + the offset within the byte array + + + the length of the payload data + + + Creates an empty payload and does not allocate a byte array. + + + Creates a new payload with the the given array as data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + the data of this payload + + + + Creates a new payload with the the given array as data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + the data of this payload + + the offset in the data byte array + + the length of the data + + + + Sets this payloads data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + + Gets or sets a reference to the underlying byte array + that holds this payloads data. Data is not copied. + + + + Gets or sets a reference to the underlying byte array + that holds this payloads data. Data is not copied. + + + + Returns the byte at the given index. + + + Allocates a new byte array, copies the payload data into it and returns it. + + + Copies the payload data to a byte array. + + + the target byte array + + the offset in the target byte array + + + + Clones this payload by creating a copy of the underlying + byte array. + + + + Returns the offset in the underlying byte array + + + Returns the length of the payload data. + + + For each Field, store position by position information. It ignores frequency information +

+ This is not thread-safe. +

+
+ + A Map of Integer and TVPositionInfo + + + Callback for the TermVectorReader. + + + + + + + + + + + Callback mechanism used by the TermVectorReader + The field being read + + The number of terms in the vector + + Whether offsets are available + + Whether positions are available + + + + Never ignores positions. This mapper doesn't make much sense unless there are positions + false + + + Get the mapping between fields and terms, sorted by the comparator + + + A map between field names and a Map. The sub-Map key is the position as the integer, the value is <see cref="Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo" />. + + + Container for a term at a position + + + + The position of the term + + + + Note, there may be multiple terms at the same position + A List of Strings + + + + Parallel list (to ) of TermVectorOffsetInfo objects. + There may be multiple entries since there may be multiple terms at a position + A List of TermVectorOffsetInfo objects, if offsets are store. + + + + $Id + +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Clones the norm bytes. May be overridden by subclasses. New and experimental. + Byte array to clone + + New BitVector + + + + Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental. + BitVector to clone + + New BitVector + + + + + + + + Read norms into a pre-allocated array. + + + Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. + TermVectorsReader + + + + Return a term frequency vector for the specified document and field. The + vector returned contains term numbers and frequencies for all terms in + the specified field of this document, if the field had storeTermVector + flag set. If the flag was not set, the method returns null. + + IOException + + + Return an array of term frequency vectors for the specified document. + The array contains a vector for each vectorized field in the document. + Each vector vector contains term numbers and frequencies for all terms + in a given vectorized field. + If no such fields existed, the method returns null. + + IOException + + + Returns the directory this index resides in. + + + Lotsa tests did hacks like:
+ SegmentReader reader = (SegmentReader) IndexReader.open(dir);
+ They broke. This method serves as a hack to keep hacks working + We do it with R/W access for the tests (BW compatibility) +
+
+ + Return the name of the segment this reader is reading. + + + Return the SegmentInfo of the segment this reader is reading. + + + Sets the initial value + + + Java's builtin ThreadLocal has a serious flaw: + it can take an arbitrarily long amount of time to + dereference the things you had stored in it, even once the + ThreadLocal instance itself is no longer referenced. + This is because there is single, master map stored for + each thread, which all ThreadLocals share, and that + master map only periodically purges "stale" entries. + + While not technically a memory leak, because eventually + the memory will be reclaimed, it can take a long time + and you can easily hit OutOfMemoryError because from the + GC's standpoint the stale entries are not reclaimaible. + + This class works around that, by only enrolling + WeakReference values into the ThreadLocal, and + separately holding a hard reference to each stored + value. When you call , these hard + references are cleared and then GC is freely able to + reclaim space by objects stored in it. + + + + + Byte[] referencing is used because a new norm object needs + to be created for each clone, and the byte array is all + that is needed for sharing between cloned readers. The + current norm referencing is for sharing between readers + whereas the byte[] referencing is for copy on write which + is independent of reader references (i.e. incRef, decRef). + + + + Used by DocumentsWriter to implemented a StringReader + that can be reset to a new string; we use this when + tokenizing the string value from a Field. + + + + Information about a segment such as it's name, directory, and files related + to the segment. + + *

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + Copy everything from src SegmentInfo into our instance. + + + Construct a new SegmentInfo instance by reading a + previously saved SegmentInfo from input. + + + directory to load from + + format of the segments info file + + input handle to read segment info from + + + + Returns total size in bytes of all of files used by + this segment. + + + + Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX). + + + the field index to check + + + + Returns true if any fields in this segment have separate norms. + + + Increment the generation count for the norms file for + this field. + + + field whose norm file will be rewritten + + + + Get the file name for the norms file for this field. + + + field index + + + + Returns true if this segment is stored as a compound + file; else, false. + + + + Returns true if this segment is stored as a compound + file; else, false. + + + + Save this segment's info. + + + Used for debugging + + + We consider another SegmentInfo instance equal if it + has the same dir and same name. + + + + The SegmentMerger class combines two or more Segments, represented by an IndexReader (, + into a single Segment. After adding the appropriate readers, call the merge method to combine the + segments. +

+ If the compoundFile flag is set, then the segments will be merged into a compound file. + + +

+ + + + +
+ + Maximum number of contiguous documents to bulk-copy + when merging stored fields + + + + norms header placeholder + + + This ctor used only by test code. + + + The Directory to merge the other segments into + + The name of the new segment + + + + Add an IndexReader to the collection of readers that are to be merged + + + + + + The index of the reader to return + + The ith reader to be merged + + + + Merges the readers specified by the method into the directory passed to the constructor + The number of documents that were merged + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Merges the readers specified by the method + into the directory passed to the constructor. + + if false, we will not merge the + stored fields nor vectors files + + The number of documents that were merged + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + close all IndexReaders that have been added. + Should not be called before merge(). + + IOException + + + + The number of documents in all of the readers + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Merge the TermVectors from each of the segments into the new one. + IOException + + + Process postings from multiple segments all positioned on the + same term. Writes out merged entries into freqOutput and + the proxOutput streams. + + + array of segments + + number of cells in the array actually occupied + + number of documents across all segments where this term was found + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Records the fact that roughly units amount of work + have been done since this method was last called. + When adding time-consuming code into SegmentMerger, + you should test different values for units to ensure + that the time in between calls to merge.checkAborted + is up to ~ 1 second. + + + + Increments the enumeration to the next element. True if one exists. + + + Optimized scan, without allocating new terms. + Return number of invocations to next(). + + + + Returns the previous Term enumerated. Initially null. + + + Returns the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Sets the argument to the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Returns the docFreq from the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Closes the enumeration to further activity, freeing resources. + + + Returns the current Term in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Called by super.skipTo(). + + + Provides access to stored term vector of + a document field. The vector consists of the name of the field, an array of the terms tha occur in the field of the + and a parallel array of frequencies. Thus, getTermFrequencies()[5] corresponds with the + frequency of getTerms()[5], assuming there are at least 5 terms in the Document. + + + + An Array of term texts in ascending order. + + + + Array of term frequencies. Locations of the array correspond one to one + to the terms in the array obtained from getTerms + method. Each location in the array contains the number of times this + term occurs in the document or the document field. + + + + Return an index in the term numbers array returned from + getTerms at which the term with the specified + term appears. If this term does not appear in the array, + return -1. + + + + Just like indexOf(int) but searches for a number of terms + at the same time. Returns an array that has the same size as the number + of terms searched for, each slot containing the result of searching for + that term number. + + + array containing terms to look for + + index in the array where the list of terms starts + + the number of terms in the list + + + + The name. + The name of the field this vector is associated with. + + + The number of terms in the term vector. + + + + The number of the field this vector is associated with + + + Extends TermFreqVector to provide additional information about + positions in which each of the terms is found. A TermPositionVector not necessarily + contains both positions and offsets, but at least one of these arrays exists. + + + + Returns an array of positions in which the term is found. + Terms are identified by the index at which its number appears in the + term String array obtained from the indexOf method. + May return null if positions have not been stored. + + + + Returns an array of TermVectorOffsetInfo in which the term is found. + May return null if offsets have not been stored. + + + + + + The position in the array to get the offsets from + + An array of TermVectorOffsetInfo objects or the empty list + + + + Returns an array of TermVectorOffsetInfo in which the term is found. + + + The position in the array to get the offsets from + + An array of TermVectorOffsetInfo objects or the empty list + + + + + + Returns an array of positions in which the term is found. + Terms are identified by the index at which its number appears in the + term String array obtained from the indexOf method. + + + + A that simply does each merge + sequentially, using the current thread. + + + + Just do the merges in sequence. We do this + "synchronized" so that even if the application is using + multiple threads, only one merge may run at a time. + + + + A that wraps around any other + and adds the ability to hold and + later release a single "snapshot" of an index. While + the snapshot is held, the will not + remove any files associated with it even if the index is + otherwise being actively, arbitrarily changed. Because + we wrap another arbitrary , this + gives you the freedom to continue using whatever + you would normally want to use with your + index. Note that you can re-use a single instance of + SnapshotDeletionPolicy across multiple writers as long + as they are against the same index Directory. Any + snapshot held when a writer is closed will "survive" + when the next writer is opened. + +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Take a snapshot of the most recent commit to the + index. You must call release() to free this snapshot. + Note that while the snapshot is held, the files it + references will not be deleted, which will consume + additional disk space in your index. If you take a + snapshot at a particularly bad time (say just before + you call optimize()) then in the worst case this could + consume an extra 1X of your total index size, until + you release the snapshot. + + + + Release the currently held snapshot. + + + Store a sorted collection of s. Collects all term information + into a single, SortedSet. +
+ NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not + know what Fields they correlate with. +
+ This is not thread-safe +
+
+ + Stand-in name for the field in . + + + + A Comparator for sorting s + + + + + The term to map + + The frequency of the term + + Offset information, may be null + + Position information, may be null + + + + The TermVectorEntrySet. A SortedSet of objects. Sort is by the comparator passed into the constructor. +
+ This set will be empty until after the mapping process takes place. + +
+ The SortedSet of <see cref="TermVectorEntry" />. +
+ + This exception is thrown when an + tries to make changes to the index (via + , + or ) + but changes have already been committed to the index + since this reader was instantiated. When this happens + you must open a new reader on the current index to make + the changes. + + + + This is a DocFieldConsumer that writes stored fields. + + + Fills in any hole in the docIDs + + + A Term represents a word from text. This is the unit of search. It is + composed of two elements, the text of the word, as a string, and the name of + the field that the text occured in, an interned string. + Note that terms may represent more than words from text fields, but also + things like dates, email addresses, urls, etc. + + + + Constructs a Term with the given field and text. +

Note that a null field or null text value results in undefined + behavior for most Lucene APIs that accept a Term parameter. +

+
+ + Constructs a Term with the given field and empty text. + This serves two purposes: 1) reuse of a Term with the same field. + 2) pattern for a query. + + + + + + + Optimized construction of new Terms by reusing same field as this Term + - avoids field.intern() overhead + + The text of the new term (field is implicitly same as this Term instance) + + A new Term + + + + Compares two terms, returning a negative integer if this + term belongs before the argument, zero if this term is equal to the + argument, and a positive integer if this term belongs after the argument. + The ordering of terms is first by field, then by text. + + + + Returns the field of this term, an interned string. The field indicates + the part of a document which this term came from. + + + + Returns the text of this term. In the case of words, this is simply the + text of the word. In the case of dates and other types, this is an + encoding of the object as a string. + + + + Call this if the IndexInput passed to + stores terms in the "modified UTF8" (pre LUCENE-510) + format. + + + + A TermInfo is the record of information stored for a term. + + + The number of documents which contain the term. + + + This stores a monotonically increasing set of <Term, TermInfo> pairs in a + Directory. Pairs are accessed either by Term or by ordinal position the + set. + + + + Returns the number of term/value pairs in the set. + + + Returns the offset of the greatest index entry which is less than or equal to term. + + + Returns the TermInfo for a Term in the set, or null. + + + Returns the TermInfo for a Term in the set, or null. + + + Returns the position of a Term in the set or -1. + + + Returns an enumeration of all the Terms and TermInfos in the set. + + + Returns an enumeration of terms starting at or after the named term. + + + Per-thread resources managed by ThreadLocal + + + This stores a monotonically increasing set of <Term, TermInfo> pairs in a + Directory. A TermInfos can be written once, in order. + + + + The file format version, a negative number. + + + Expert: The fraction of terms in the "dictionary" which should be stored + in RAM. Smaller values use more memory, but make searching slightly + faster, while larger values use less memory and make searching slightly + slower. Searching is typically not dominated by dictionary lookup, so + tweaking this is rarely useful. + + + + Expert: The fraction of entries stored in skip tables, + used to accellerate . Larger values result in + smaller indexes, greater acceleration, but fewer accelerable cases, while + smaller values result in bigger indexes, less acceleration and more + accelerable cases. More detailed experiments would be useful here. + + + + Expert: The maximum number of skip levels. Smaller values result in + slightly smaller indexes, but slower skipping in big posting lists. + + + + Adds a new <fieldNumber, termBytes>, TermInfo> pair to the set. + Term must be lexicographically greater than all previous Terms added. + TermInfo pointers must be positive and greater than all previous. + + + + Called to complete TermInfos creation. + + + This class implements , which + is passed each token produced by the analyzer on each + field. It stores these tokens in a hash table, and + allocates separate byte streams per token. Consumers of + this class, eg and + , write their own byte streams + under each term. + + + + Collapse the hash table & sort in-place. + + + Compares term text for two Posting instance and + returns -1 if p1 < p2; 1 if p1 > p2; else 0. + + + + Test whether the text for current RawPostingList p equals + current tokenText. + + + + Called when postings hash is too small (> 50% + occupied) or too large (< 20% occupied). + + + + Convenience class for holding TermVector information. + + + Compares s first by frequency and then by + the term (case-sensitive) + + + + + + The TermVectorOffsetInfo class holds information pertaining to a Term in a 's + offset information. This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the + original content). + + + + Convenience declaration when creating a that stores only position information. + + + The accessor for the ending offset for the term + The offset + + + The accessor for the starting offset of the term. + + + The offset + + + Retrieve the length (in bytes) of the tvd and tvf + entries for the next numDocs starting with + startDocID. This is used for bulk copying when + merging segments, if the field numbers are + congruent. Once this returns, the tvf & tvd streams + are seeked to the startDocID. + + + + + The number of documents in the reader + + + + Retrieve the term vector for the given document and field + The document number to retrieve the vector for + + The field within the document to retrieve + + The TermFreqVector for the document and field or null if there is no termVector for this field. + + IOException if there is an error reading the term vector files + + + Return all term vectors stored for this document or null if the could not be read in. + + + The document number to retrieve the vector for + + All term frequency vectors + + IOException if there is an error reading the term vector files + + + + The field to read in + + The pointer within the tvf file where we should start reading + + The mapper used to map the TermVector + + IOException + + + Models the existing parallel array structure + + + Construct the vector + The based on the mappings. + + + + Fills in no-term-vectors for all docs we haven't seen + since the last doc that had term vectors. + + + + Called once per field per document if term vectors + are enabled, to write the vectors to + RAMOutputStream, which is then quickly flushed to + the real term vectors files in the Directory. + + + + Add a complete document specified by all its term vectors. If document has no + term vectors, add value for tvx. + + + + + IOException + + + Do a bulk copy of numDocs documents from reader to our + streams. This is used to expedite merging, if the + field numbers are congruent. + + + + Close all streams. + + + Lucene's package information, including version. * + + + Message Interface for a lazy loading. + For Native Language Support (NLS), system of software internationalization. + + + + Default implementation of Message interface. + For Native Language Support (NLS), system of software internationalization. + + + + MessageBundles classes extend this class, to implement a bundle. + + For Native Language Support (NLS), system of software internationalization. + + This interface is similar to the NLS class in eclipse.osgi.util.NLS class - + initializeMessages() method resets the values of all static strings, should + only be called by classes that extend from NLS (see TestMessages.java for + reference) - performs validation of all message in a bundle, at class load + time - performs per message validation at runtime - see NLSTest.java for + usage reference + + MessageBundle classes may subclass this type. + + + + Initialize a given class with the message bundle Keys Should be called from + a class that extends NLS in a static block at class load time. + + + Property file with that contains the message bundle + + where constants will reside + + + + + + + + - Message Key + + + + + Performs the priviliged action. + + A value that may represent the result of the action. + + + Interface that exceptions should implement to support lazy loading of messages. + + For Native Language Support (NLS), system of software internationalization. + + This Interface should be implemented by all exceptions that require + translation + + + + + a instance of a class that implements the Message interface + + + This interface describes a character stream that maintains line and + column number positions of the characters. It also has the capability + to backup the stream to some extent. An implementation of this + interface is used in the TokenManager implementation generated by + JavaCCParser. + + All the methods except backup can be implemented in any fashion. backup + needs to be implemented correctly for the correct operation of the lexer. + Rest of the methods are all used to get information like line number, + column number and the String that constitutes a token and are not used + by the lexer. Hence their implementation won't affect the generated lexer's + operation. + + + + Returns the next character from the selected input. The method + of selecting the input is the responsibility of the class + implementing this interface. Can throw any java.io.IOException. + + + + Backs up the input stream by amount steps. Lexer calls this method if it + had already read some characters, but could not use them to match a + (longer) token. So, they will be used again as the prefix of the next + token and it is the implemetation's responsibility to do this right. + + + + Returns the next character that marks the beginning of the next token. + All characters must remain in the buffer between two successive calls + to this method to implement backup correctly. + + + + Returns an array of characters that make up the suffix of length 'len' for + the currently matched token. This is used to build up the matched string + for use in actions in the case of MORE. A simple and inefficient + implementation of this is as follows : + + { + String t = GetImage(); + return t.substring(t.length() - len, t.length()).toCharArray(); + } + + + + The lexer calls this function to indicate that it is done with the stream + and hence implementations can free any resources held by this class. + Again, the body of this function can be just empty and it will not + affect the lexer's operation. + + + + Returns the column position of the character last read. + + + + + + + Returns the line number of the character last read. + + + + + + + Returns the column number of the last character for current token (being + matched after the last call to BeginTOken). + + + + Returns the line number of the last character for current token (being + matched after the last call to BeginTOken). + + + + Returns the column number of the first character for current token (being + matched after the last call to BeginTOken). + + + + Returns the line number of the first character for current token (being + matched after the last call to BeginTOken). + + + + Returns a string made up of characters from the marked token beginning + to the current buffer position. Implementations have the choice of returning + anything that they want to. For example, for efficiency, one might decide + to just return null, which is a valid implementation. + + + + An efficient implementation of JavaCC's CharStream interface.

Note that + this does not do line-number counting, but instead keeps track of the + character position of the token in the input, as required by Lucene's + API. + +

+
+ + Constructs from a Reader. + + + A QueryParser which constructs queries to search multiple fields. + + + $Revision: 829231 $ + + + + This class is generated by JavaCC. The most important method is + . + + The syntax for query strings is as follows: + A Query is a series of clauses. + A clause may be prefixed by: + + a plus (+) or a minus (-) sign, indicating + that the clause is required or prohibited respectively; or + a term followed by a colon, indicating the field to be searched. + This enables one to construct queries which search multiple fields. + + + A clause may be either: + + a term, indicating all the documents that contain this term; or + a nested query, enclosed in parentheses. Note that this may be used + with a +/- prefix to require any of a set of + terms. + + + Thus, in BNF, the query grammar is: + + Query ::= ( Clause )* + Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) + + +

+ Examples of appropriately formatted queries can be found in the query syntax + documentation. +

+ +

+ In s, QueryParser tries to detect date values, e.g. + date:[6/1/2005 TO 6/4/2005] produces a range query that searches + for "date" fields between 2005-06-01 and 2005-06-04. Note that the format + of the accepted input depends on the . + By default a date is converted into a search term using the deprecated + for compatibility reasons. + To use the new to convert dates, a + has to be set. +

+

+ The date resolution that shall be used for RangeQueries can be set + using + or . The former + sets the default date resolution for all fields, whereas the latter can + be used to set field specific date resolutions. Field specific date + resolutions take, if set, precedence over the default date resolution. +

+

+ If you use neither nor in your + index, you can create your own + query parser that inherits QueryParser and overwrites + to + use a different method for date conversion. +

+ +

Note that QueryParser is not thread-safe.

+ +

NOTE: there is a new QueryParser in contrib, which matches + the same syntax as this class, but is more modular, + enabling substantial customization to how a query is created. + +

NOTE: there is a new QueryParser in contrib, which matches + the same syntax as this class, but is more modular, + enabling substantial customization to how a query is created. + NOTE: You must specify the required compatibility when + creating QueryParser: + + As of 2.9, is true by default. + +

+
+ + Token literal values and constants. + Generated by org.javacc.parser.OtherFilesGen#start() + + + + End of File. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + Lexical state. + + + Lexical state. + + + Lexical state. + + + Lexical state. + + + Literal token values. + + + Alternative form of QueryParser.Operator.AND + + + Alternative form of QueryParser.Operator.OR + + + The actual operator that parser uses to combine query terms + + + Parses a query string, returning a {@link Lucene.Net.Search.Query}. + the query string to be parsed. + + ParseException if the parsing fails + + + Sets the default date resolution used by RangeQueries for fields for which no + specific date resolutions has been set. Field specific resolutions can be set + with {@link #SetDateResolution(String, DateTools.Resolution)}. + + + the default date resolution to set + + + + Sets the date resolution used by RangeQueries for a specific field. + + + field for which the date resolution is to be set + + date resolution to set + + + + Returns the date resolution that is used by RangeQueries for the given field. + Returns null, if no default or field specific date resolution has been set + for the given field. + + + + throw in overridden method to disallow + + + + Base implementation delegates to {@link #GetFieldQuery(String,String)}. + This method may be overridden, for example, to return + a SpanNearQuery instead of a PhraseQuery. + + + throw in overridden method to disallow + + + + throw in overridden method to disallow + + + + Builds a new BooleanQuery instance + disable coord + + new BooleanQuery instance + + + + Builds a new BooleanClause instance + sub query + + how this clause should occur when matching documents + + new BooleanClause instance + + + + Builds a new TermQuery instance + term + + new TermQuery instance + + + + Builds a new PhraseQuery instance + new PhraseQuery instance + + + + Builds a new MultiPhraseQuery instance + new MultiPhraseQuery instance + + + + Builds a new PrefixQuery instance + Prefix term + + new PrefixQuery instance + + + + Builds a new FuzzyQuery instance + Term + + minimum similarity + + prefix length + + new FuzzyQuery Instance + + + + Builds a new TermRangeQuery instance + Field + + min + + max + + true if range is inclusive + + new TermRangeQuery instance + + + + Builds a new MatchAllDocsQuery instance + new MatchAllDocsQuery instance + + + + Builds a new WildcardQuery instance + wildcard term + + new WildcardQuery instance + + + + Factory method for generating query, given a set of clauses. + By default creates a boolean query composed of clauses passed in. + + Can be overridden by extending classes, to modify query being + returned. + + + List that contains {@link BooleanClause} instances + to join. + + + Resulting {@link Query} object. + + throw in overridden method to disallow + + + + Factory method for generating query, given a set of clauses. + By default creates a boolean query composed of clauses passed in. + + Can be overridden by extending classes, to modify query being + returned. + + + List that contains {@link BooleanClause} instances + to join. + + true if coord scoring should be disabled. + + + Resulting {@link Query} object. + + throw in overridden method to disallow + + + + Factory method for generating a query. Called when parser + parses an input term token that contains one or more wildcard + characters (? and *), but is not a prefix term token (one + that has just a single * character at the end) +

+ Depending on settings, prefix term may be lower-cased + automatically. It will not go through the default Analyzer, + however, since normal Analyzers are unlikely to work properly + with wildcard templates. +

+ Can be overridden by extending classes, to provide custom handling for + wildcard queries, which may be necessary due to missing analyzer calls. + +

+ Name of the field query will use. + + Term token that contains one or more wild card + characters (? or *), but is not simple prefix term + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + +
+ + Factory method for generating a query (similar to + {@link #getWildcardQuery}). Called when parser parses an input term + token that uses prefix notation; that is, contains a single '*' wildcard + character as its last character. Since this is a special case + of generic wildcard term, and such a query can be optimized easily, + this usually results in a different query object. +

+ Depending on settings, a prefix term may be lower-cased + automatically. It will not go through the default Analyzer, + however, since normal Analyzers are unlikely to work properly + with wildcard templates. +

+ Can be overridden by extending classes, to provide custom handling for + wild card queries, which may be necessary due to missing analyzer calls. + +

+ Name of the field query will use. + + Term token to use for building term for the query + (without trailing '*' character!) + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + +
+ + Factory method for generating a query (similar to + {@link #getWildcardQuery}). Called when parser parses + an input term token that has the fuzzy suffix (~) appended. + + + Name of the field query will use. + + Term token to use for building term for the query + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + + + + Returns a String where the escape char has been + removed, or kept only once if there was a double escape. + + Supports escaped unicode characters, e. g. translates + \\u0041 to A. + + + + + Returns the numeric value of the hexadecimal character + + + Returns a String where those characters that QueryParser + expects to be escaped are escaped by a preceding \. + + + + Command line tool to test QueryParser, using {@link Lucene.Net.Analysis.SimpleAnalyzer}. + Usage:
+ java Lucene.Net.QueryParsers.QueryParser <input> +
+
+ + Constructor with user supplied CharStream. + + + Reinitialise. + + + Constructor with generated Token Manager. + + + Reinitialise. + + + Get the next Token. + + + Get the specific Token. + + + Generate ParseException. + + + Enable tracing. + + + Disable tracing. + + + Returns the analyzer. + + + Returns the field. + + + + Gets or sets the minimal similarity for fuzzy queries. + Default is 0.5f. + + + + Gets or sets the prefix length for fuzzy queries. + Returns the fuzzyPrefixLength. + + + Gets or sets the default slop for phrases. If zero, then exact phrase matches + are required. Default value is zero. + + + + Set to true to allow leading wildcard characters. +

+ When set, * or ? are allowed as + the first character of a PrefixQuery and WildcardQuery. + Note that this can produce very slow + queries on big indexes. +

+ Default: false. +

+
+ + Set to true to enable position increments in result query. +

+ When set, result phrase and multi-phrase queries will + be aware of position increments. + Useful when e.g. a StopFilter increases the position increment of + the token that follows an omitted token. +

+ Default: false. +

+
+ + Gets or sets the boolean operator of the QueryParser. + In default mode (OR_OPERATOR) terms without any modifiers + are considered optional: for example capital of Hungary is equal to + capital OR of OR Hungary.
+ In AND_OPERATOR mode terms are considered to be in conjunction: the + above mentioned query is parsed as capital AND of AND Hungary +
+
+ + Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + lower-cased or not. Default is true. + + + + By default QueryParser uses + when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + a) Runs faster b) Does not have the scarcity of terms unduly influence score + c) avoids any "TooManyBooleanClauses" exception. + However, if your application really needs to use the + old-fashioned BooleanQuery expansion rewriting and the above + points are not relevant then use this to change + the rewrite method. + + + + Gets or sets locale used by date range parsing. + + + Gets or sets the collator used to determine index term inclusion in ranges + for RangeQuerys. +

+ WARNING: Setting the rangeCollator to a non-null + collator using this method will cause every single index Term in the + Field referenced by lowerTerm and/or upperTerm to be examined. + Depending on the number of index Terms in this Field, the operation could + be very slow. + +

+ the collator to use when constructing RangeQuerys +
+ + Creates a MultiFieldQueryParser. Allows passing of a map with term to + Boost, and the boost to apply to each term. + +

+ It will, when parse(String query) is called, construct a query like this + (assuming the query consists of two terms and you specify the two fields + title and body): +

+ + + (title:term1 body:term1) (title:term2 body:term2) + + +

+ When setDefaultOperator(AND_OPERATOR) is set, the result will be: +

+ + + +(title:term1 body:term1) +(title:term2 body:term2) + + +

+ When you pass a boost (title=>5 body=>10) you can get +

+ + + +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) + + +

+ In other words, all the query's terms must appear, but it doesn't matter + in what fields they appear. +

+

+
+ + Creates a MultiFieldQueryParser. + +

+ It will, when parse(String query) is called, construct a query like this + (assuming the query consists of two terms and you specify the two fields + title and body): +

+ + + (title:term1 body:term1) (title:term2 body:term2) + + +

+ When setDefaultOperator(AND_OPERATOR) is set, the result will be: +

+ + + +(title:term1 body:term1) +(title:term2 body:term2) + + +

+ In other words, all the query's terms must appear, but it doesn't matter + in what fields they appear. +

+

+
+ + Parses a query which searches on the fields specified. +

+ If x fields are specified, this effectively constructs: + + + (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx) + + +

+ Lucene version to match; this is passed through to + QueryParser. + + Queries strings to parse + + Fields to search on + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the queries array differs from the length of + the fields array + +
+ + Parses a query, searching on the fields specified. Use this if you need + to specify certain fields as required, and others as prohibited. +

+ Uasge: + + String[] fields = {"filename", "contents", "description"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + BooleanClause.Occur.MUST, + BooleanClause.Occur.MUST_NOT}; + MultiFieldQueryParser.parse("query", fields, flags, analyzer); + +

+ The code above would construct a query: + + + (filename:query) +(contents:query) -(description:query) + + +

+ Lucene version to match; this is passed through to + QueryParser. + + Query string to parse + + Fields to search on + + Flags describing the fields + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the fields array differs from the length of + the flags array + +
+ + Parses a query, searching on the fields specified. Use this if you need + to specify certain fields as required, and others as prohibited. +

+ Usage: + + String[] query = {"query1", "query2", "query3"}; + String[] fields = {"filename", "contents", "description"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + BooleanClause.Occur.MUST, + BooleanClause.Occur.MUST_NOT}; + MultiFieldQueryParser.parse(query, fields, flags, analyzer); + +

+ The code above would construct a query: + + + (filename:query1) +(contents:query2) -(description:query3) + + +

+ Lucene version to match; this is passed through to + QueryParser. + + Queries string to parse + + Fields to search on + + Flags describing the fields + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the queries, fields, and flags array differ + +
+ + This exception is thrown when parse errors are encountered. + You can explicitly create objects of this exception type by + calling the method generateParseException in the generated + parser. + + You can modify this class to customize your error reporting + mechanisms so long as you retain the public fields. + + + + This constructor is used by the method "generateParseException" + in the generated parser. Calling this constructor generates + a new object of this type with the fields "currentToken", + "expectedTokenSequences", and "tokenImage" set. The boolean + flag "specialConstructor" is also set to true to indicate that + this constructor was used to create this object. + This constructor calls its super class with the empty string + to force the "toString" method of parent class "Throwable" to + print the error message in the form: + ParseException: <result of getMessage> + + + + The following constructors are for use by you for whatever + purpose you can think of. Constructing the exception in this + manner makes the exception behave in the normal way - i.e., as + documented in the class "Throwable". The fields "errorToken", + "expectedTokenSequences", and "tokenImage" do not contain + relevant information. The JavaCC generated code does not use + these constructors. + + + + Constructor with message. + + + Constructor with message. + + + This variable determines which constructor was used to create + this object and thereby affects the semantics of the + "getMessage" method (see below). + + + + This is the last token that has been consumed successfully. If + this object has been created due to a parse error, the token + followng this token will (therefore) be the first error token. + + + + Each entry in this array is an array of integers. Each array + of integers represents a sequence of tokens (by their ordinal + values) that is expected at this point of the parse. + + + + This is a reference to the "tokenImage" array of the generated + parser within which the parse error occurred. This array is + defined in the generated ...Constants interface. + + + + The end of line string for this machine. + + + Used to convert raw characters to their escaped version + when these raw version cannot be used as part of an ASCII + string literal. + + + + This method has the standard behavior when this object has been + created using the standard constructors. Otherwise, it uses + "currentToken" and "expectedTokenSequences" to generate a parse + error message and returns it. If this object has been created + due to a parse error, and you do not catch it (it gets thrown + from the parser), then this method is called during the printing + of the final stack trace, and hence the correct error message + gets displayed. + + + + Token Manager. + + + Debug output. + + + Set debug output. + + + Token literal values. + + + Lexer state names. + + + Lex State array. + + + Constructor. + + + Constructor. + + + Reinitialise parser. + + + Reinitialise parser. + + + Switch to specified lex state. + + + Get the next Token. + + + Describes the input token stream. + + + An integer that describes the kind of this token. This numbering + system is determined by JavaCCParser, and a table of these numbers is + stored in the file ...Constants.java. + + + + The line number of the first character of this Token. + + + The column number of the first character of this Token. + + + The line number of the last character of this Token. + + + The column number of the last character of this Token. + + + The string image of the token. + + + A reference to the next regular (non-special) token from the input + stream. If this is the last token from the input stream, or if the + token manager has not read tokens beyond this one, this field is + set to null. This is true only if this token is also a regular + token. Otherwise, see below for a description of the contents of + this field. + + + + This field is used to access special tokens that occur prior to this + token, but after the immediately preceding regular (non-special) token. + If there are no such special tokens, this field is set to null. + When there are more than one such special token, this field refers + to the last of these special tokens, which in turn refers to the next + previous special token through its specialToken field, and so on + until the first special token (whose specialToken field is null). + The next fields of special tokens refer to other special tokens that + immediately follow it (without an intervening regular token). If there + is no such token, this field is null. + + + + No-argument constructor + + + Constructs a new token for the specified Image. + + + Constructs a new token for the specified Image and Kind. + + + Returns the image. + + + Returns a new Token object, by default. However, if you want, you + can create and return subclass objects based on the value of ofKind. + Simply add the cases to the switch for all those special cases. + For example, if you have a subclass of Token called IDToken that + you want to create if ofKind is ID, simply add something like : + + case MyParserConstants.ID : return new IDToken(ofKind, image); + + to the following switch statement. Then you can cast matchedToken + variable to the appropriate type and use sit in your lexical actions. + + + + An optional attribute value of the Token. + Tokens which are not used as syntactic sugar will often contain + meaningful values that will be used later on by the compiler or + interpreter. This attribute value is often different from the image. + Any subclass of Token that actually wants to return a non-null value can + override this method as appropriate. + + + + Token Manager Error. + + + Lexical error occurred. + + + An attempt was made to create a second instance of a static token manager. + + + Tried to change to an invalid lexical state. + + + Detected (and bailed out of) an infinite loop in the token manager. + + + Indicates the reason why the exception is thrown. It will have + one of the above 4 values. + + + + Replaces unprintable characters by their escaped (or unicode escaped) + equivalents in the given string + + + + Returns a detailed message for the Error when it is thrown by the + token manager to indicate a lexical error. + Parameters : + EOFSeen : indicates if EOF caused the lexical error + curLexState : lexical state in which this error occurred + errorLine : line number when the error occurred + errorColumn : column number when the error occurred + errorAfter : prefix that was seen before this error occurred + curchar : the offending character + Note: You can customize the lexical error message by modifying this method. + + + + No arg constructor. + + + Constructor with message and reason. + + + Full Constructor. + + + You can also modify the body of this method to customize your error messages. + For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + of end-users concern, so you can return something like : + + "Internal Error : Please file a bug report .... " + + from this method for such cases in the release version of your parser. + + + + A clause in a BooleanQuery. + + + Constructs a BooleanClause. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + The query whose matching documents are combined by the boolean query. + + + A Query that matches documents matching boolean combinations of other + queries, e.g. s, s or other + BooleanQuerys. + + + + The abstract base class for queries. +

Instantiable subclasses are: + + + + + + + + + + + + + +

A parser for queries is contained in: + + QueryParser + +

+
+ + Prints a query to a string, with field assumed to be the + default field and omitted. +

The representation used is one that is supposed to be readable + by QueryParser. However, + there are the following limitations: + + If the query was created by the parser, the printed + representation may not be exactly what was parsed. For example, + characters that need to be escaped will be represented without + the required backslash. + Some of the more complicated queries (e.g. span queries) + don't have a representation that can be parsed by QueryParser. + +

+
+ + Prints a query to a string. + + + Expert: Constructs an appropriate Weight implementation for this query. + +

+ Only implemented by primitive queries, which re-write to themselves. +

+
+ + Expert: Constructs and initializes a Weight for a top-level query. + + + Expert: called to re-write queries into primitive queries. For example, + a PrefixQuery will be rewritten into a BooleanQuery that consists + of TermQuerys. + + + + Expert: called when re-writing queries under MultiSearcher. + + Create a single query suitable for use by all subsearchers (in 1-1 + correspondence with queries). This is an optimization of the OR of + all queries. We handle the common optimization cases of equal + queries and overlapping clauses of boolean OR queries (as generated + by MultiTermQuery.rewrite()). + Be careful overriding this method as queries[0] determines which + method will be called and is not necessarily of the same type as + the other queries. + + + + Expert: adds all terms occuring in this query to the terms set. Only + works if this query is in its rewritten form. + + + UnsupportedOperationException if this query is not yet rewritten + + + Expert: merges the clauses of a set of BooleanQuery's into a single + BooleanQuery. + +

A utility for use by implementations. +

+
+ + Expert: Returns the Similarity implementation to be used for this query. + Subclasses may override this method to specify their own Similarity + implementation, perhaps one that delegates through that of the Searcher. + By default the Searcher's Similarity implementation is returned. + + + + Returns a clone of this query. + + + Gets or sets the boost for this query clause to b. Documents + matching this clause will (in addition to the normal weightings) have + their score multiplied by b. The boost is 1.0 by default. + + + + Constructs an empty boolean query. + + + Constructs an empty boolean query. + + may be disabled in scoring, as + appropriate. For example, this score factor does not make sense for most + automatically generated queries, like and + . + + + disables in scoring. + + + + Returns true iff is disabled in + scoring for this query instance. + + + + + + Adds a clause to a boolean query. + + + TooManyClauses if the new number of clauses exceeds the maximum clause number + + + + + Adds a clause to a boolean query. + TooManyClauses if the new number of clauses exceeds the maximum clause number + + + + + Returns the set of clauses in this query. + + + + Returns an iterator on the clauses in this query. + + + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Gets or sets the maximum number of clauses permitted, 1024 by default. + Attempts to add more than the permitted number of clauses cause + to be thrown. + + + + + Specifies a minimum number of the optional BooleanClauses + which must be satisfied. + + By default no optional clauses are necessary for a match + (unless there are no required clauses). If this method is used, + then the specified number of clauses is required. + + + Use of this method is totally independent of specifying that + any specific clauses are required (or prohibited). This number will + only be compared against the number of matching optional clauses. + + + + + Returns the list of clauses in this query. + + + Expert: Delegating scoring implementation. Useful in + implementations, to override only certain + methods of a Searcher's Similiarty implementation.. + + + + Expert: Scoring API. +

Subclasses implement search scoring. + +

The score of query q for document d correlates to the + cosine-distance or dot-product between document and query vectors in a + + Vector Space Model (VSM) of Information Retrieval. + A document whose vector is closer to the query vector in that model is scored higher. + + The score is computed as follows: + +

+ + +
+ + + + + + + + + + + +
+ score(q,d)   =   + coord(q,d)  ·  + queryNorm(q)  ·  + + + + ( + tf(t in d)  ·  + idf(t)2  ·  + t.Boost ·  + norm(t,d) + ) +
t in q
+
+ +

where + + + + tf(t in d) + correlates to the term's frequency, + defined as the number of times term t appears in the currently scored document d. + Documents that have more occurrences of a given term receive a higher score. + The default computation for tf(t in d) in + DefaultSimilarity is: + +
 
+ + + + + +
+ tf(t in d)   =   + + frequency½ +
+
 
+
+ + + + idf(t) stands for Inverse Document Frequency. This value + correlates to the inverse of docFreq + (the number of documents in which the term t appears). + This means rarer terms give higher contribution to the total score. + The default computation for idf(t) in + DefaultSimilarity is: + +
 
+ + + + + + + +
+ idf(t)  =   + + 1 + log ( + + + + + +
numDocs
–––––––––
docFreq+1
+
+ ) +
+
 
+
+ + + + coord(q,d) + is a score factor based on how many of the query terms are found in the specified document. + Typically, a document that contains more of the query's terms will receive a higher score + than another document with fewer query terms. + This is a search time factor computed in + coord(q,d) + by the Similarity in effect at search time. +
 
+
+ + + + queryNorm(q) + + is a normalizing factor used to make scores between queries comparable. + This factor does not affect document ranking (since all ranked documents are multiplied by the same factor), + but rather just attempts to make scores from different queries (or even different indexes) comparable. + This is a search time factor computed by the Similarity in effect at search time. + + The default computation in + DefaultSimilarity + is: +
 
+ + + + + +
+ queryNorm(q)   =   + queryNorm(sumOfSquaredWeights) +   =   + + + + + +
1
+ –––––––––––––– +
sumOfSquaredWeights½
+
+
 
+ + The sum of squared weights (of the query terms) is + computed by the query object. + For example, a boolean query + computes this value as: + +
 
+ + + + + + + + + + + +
+ GetSumOfSquaredWeights   =   + q.Boost 2 +  ·  + + + + ( + idf(t)  ·  + t.Boost + ) 2 +
t in q
+
 
+ +
+ + + + t.Boost + is a search time boost of term t in the query q as + specified in the query text + (see query syntax), + or as set by application calls to + . + Notice that there is really no direct API for accessing a boost of one term in a multi term query, + but rather multi terms are represented in a query as multi + TermQuery objects, + and so the boost of a term in the query is accessible by calling the sub-query + . +
 
+
+ + + + norm(t,d) encapsulates a few (indexing time) boost and length factors: + + + Document boost - set by calling + doc.Boost + before adding the document to the index. + + Field boost - set by calling + field.Boost + before adding the field to a document. + + LengthNorm(field) - computed + when the document is added to the index in accordance with the number of tokens + of this field in the document, so that shorter fields contribute more to the score. + LengthNorm is computed by the Similarity class in effect at indexing. + + + +

+ When a document is added to the index, all the above factors are multiplied. + If the document has multiple fields with the same name, all their boosts are multiplied together: + +
 
+ + + + + + + + + + + +
+ norm(t,d)   =   + doc.Boost +  ·  + LengthNorm(field) +  ·  + + + + field.Boost +
field f in d named as t
+
 
+ However the resulted norm value is encoded as a single byte + before being stored. + At search time, the norm byte value is read from the index + directory and + decoded back to a float norm value. + This encoding/decoding, while reducing index size, comes with the price of + precision loss - it is not guaranteed that decode(encode(x)) = x. + For instance, decode(encode(0.89)) = 0.75. + Also notice that search time is too late to modify this norm part of scoring, e.g. by + using a different for search. +
 
+ + + +

+ + + + + + +
+ + The Similarity implementation used by default. + + + Cache of decoded bytes. + + + Decodes a normalization factor stored in an index. + + + + + Returns a table for decoding normalization bytes. + + + + + Compute the normalization value for a field, given the accumulated + state of term processing for this field (see ). + +

Implementations should calculate a float value based on the field + state and then return that value. + +

For backward compatibility this method by default calls + passing + as the second argument, and + then multiplies this value by .

+ +

WARNING: This API is new and experimental and may + suddenly change.

+ +

+ field name + + current processing state for this field + + the calculated float norm + +
+ + Computes the normalization value for a field given the total number of + terms contained in a field. These values, together with field boosts, are + stored in an index and multipled into scores for hits on each field by the + search code. + +

Matches in longer fields are less precise, so implementations of this + method usually return smaller values when numTokens is large, + and larger values when numTokens is small. + +

Note that the return values are computed under + + and then stored using + . + Thus they have limited precision, and documents + must be re-indexed if this method is altered. + +

+ the name of the field + + the total number of tokens contained in fields named + fieldName of doc. + + a normalization factor for hits on this field of this document + + + +
+ + Computes the normalization value for a query given the sum of the squared + weights of each of the query terms. This value is then multipled into the + weight of each query term. + +

This does not affect ranking, but rather just attempts to make scores + from different queries comparable. + +

+ the sum of the squares of query term weights + + a normalization factor for query weights + +
+ + Encodes a normalization factor for storage in an index. + +

The encoding uses a three-bit mantissa, a five-bit exponent, and + the zero-exponent point at 15, thus + representing values from around 7x10^9 to 2x10^-9 with about one + significant decimal digit of accuracy. Zero is also represented. + Negative numbers are rounded up to zero. Values too large to represent + are rounded down to the largest representable value. Positive values too + small to represent are rounded up to the smallest positive representable + value. + +

+ + +
+ + Computes a score factor based on a term or phrase's frequency in a + document. This value is multiplied by the + factor for each term in the query and these products are then summed to + form the initial score for a document. + +

Terms and phrases repeated in a document indicate the topic of the + document, so implementations of this method usually return larger values + when freq is large, and smaller values when freq + is small. + +

The default implementation calls . + +

+ the frequency of a term within a document + + a score factor based on a term's within-document frequency + +
+ + Computes the amount of a sloppy phrase match, based on an edit distance. + This value is summed for each sloppy phrase match in a document to form + the frequency that is passed to . + +

A phrase match with a small edit distance to a document passage more + closely matches the document, so implementations of this method usually + return larger values when the edit distance is small and smaller values + when it is large. + +

+ + the edit distance of this sloppy phrase match + the frequency increment for this match +
+ + Computes a score factor based on a term or phrase's frequency in a + document. This value is multiplied by the + factor for each term in the query and these products are then summed to + form the initial score for a document. + +

Terms and phrases repeated in a document indicate the topic of the + document, so implementations of this method usually return larger values + when freq is large, and smaller values when freq + is small. + +

+ the frequency of a term within a document + + a score factor based on a term's within-document frequency + +
+ + Computes a score factor for a simple term and returns an explanation + for that score factor. + +

+ The default implementation uses: + + + idf(searcher.docFreq(term), searcher.MaxDoc); + + + Note that is used instead of + because it is + proportional to , i.e., when one is + inaccurate, so is the other, and in the same direction. + +

+ the term in question + + the document collection being searched + + an IDFExplain object that includes both an idf score factor + and an explanation for the term. + + IOException +
+ + Computes a score factor for a phrase. + +

+ The default implementation sums the idf factor for + each term in the phrase. + +

+ the terms in the phrase + + the document collection being searched + + an IDFExplain object that includes both an idf + score factor for the phrase and an explanation + for each term. + + IOException +
+ + Computes a score factor based on a term's document frequency (the number + of documents which contain the term). This value is multiplied by the + factor for each term in the query and these products are + then summed to form the initial score for a document. + +

Terms that occur in fewer documents are better indicators of topic, so + implementations of this method usually return larger values for rare terms, + and smaller values for common terms. + +

+ the number of documents which contain the term + + the total number of documents in the collection + + a score factor based on the term's document frequency + +
+ + Computes a score factor based on the fraction of all query terms that a + document contains. This value is multiplied into scores. + +

The presence of a large portion of the query terms indicates a better + match with the query, so implementations of this method usually return + larger values when the ratio between these parameters is large and smaller + values when the ratio between them is small. + +

+ the number of query terms matched in the document + + the total number of terms in the query + + a score factor based on term overlap with the query + +
+ + Calculate a scoring factor based on the data in the payload. Overriding implementations + are responsible for interpreting what is in the payload. Lucene makes no assumptions about + what is in the byte array. +

+ The default implementation returns 1. + +

+ The docId currently being scored. If this value is , then it should be assumed that the PayloadQuery implementation does not provide document information + + The fieldName of the term this payload belongs to + + The start position of the payload + + The end position of the payload + + The payload byte array to be scored + + The offset into the payload array + + The length in the array + + An implementation dependent float to be used as a scoring factor + + +
+ + Gets or sets the default Similarity implementation + used by indexing and search code. +

This is initially an instance of . +

+ + + + +
+ + Small Util class used to pass both an idf factor as well as an + explanation for that factor. + + This class will likely be held on a , so be aware + before storing any large or un-serializable fields. + + + + + Expert: Describes the score computation for document and query. + + + The sub-nodes of this explanation node. + + + Adds a sub-node to this explanation node. + + + Render an explanation as text. + + + Render an explanation as HTML. + + + Indicates whether or not this Explanation models a good match. + +

+ By default, an Explanation represents a "match" if the value is positive. +

+

+ + +
+ + The value assigned to this explanation node. + + + A description of this explanation node. + + + A short one line summary which should contain all high level + information about this Explanation, without the "Details" + + + + Small Util class used to pass both an idf factor as well as an + explanation for that factor. + + This class will likely be held on a , so be aware + before storing any large or un-serializable fields. + + + + + This should be calculated lazily if possible. + + + the explanation for the idf factor. + + + + the idf factor + + + Construct a that delegates all methods to another. + the Similarity implementation to delegate to + + + Thrown when an attempt is made to add more than + clauses. This typically happens if + a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery + is expanded to many terms during search. + + + + Expert: the Weight for BooleanQuery, used to + normalize, score and explain these queries. + +

NOTE: this API and implementation is subject to + change suddenly in the next release.

+

+
+ + Expert: Calculate query weights and build query scorers. +

+ The purpose of is to ensure searching does not + modify a , so that a instance can be reused.
+ dependent state of the query should reside in the + .
+ dependent state should reside in the . +

+ A Weight is used in the following way: + + A Weight is constructed by a top-level query, given a + Searcher (). + The method is called on the + Weight to compute the query normalization factor + of the query clauses contained in the + query. + The query normalization factor is passed to . At + this point the weighting is complete. + A Scorer is constructed by . + + +

+ 2.9 + +
+ + An explanation of the score computation for the named document. + + + sub-reader containing the give doc + + + + an Explanation for the score + + IOException + + + Assigns the query normalization factor to this. + + + Returns a which scores documents in/out-of order according + to scoreDocsInOrder. +

+ NOTE: even if scoreDocsInOrder is false, it is + recommended to check whether the returned Scorer indeed scores + documents out of order (i.e., call ), as + some Scorer implementations will always return documents + in-order.
+ NOTE: null can be returned if no documents will be scored by this + query. + +

+ + the for which to return the . + + specifies whether in-order scoring of documents is required. Note + that if set to false (i.e., out-of-order scoring is required), + this method can return whatever scoring mode it supports, as every + in-order scorer is also an out-of-order one. However, an + out-of-order scorer may not support + and/or , therefore it is recommended to + request an in-order scorer if use of these methods is required. + + + if true, will be called; if false, + and/or will + be called. + + a which scores documents in/out-of order. + + IOException +
+ + The sum of squared weights of contained query clauses. + + + Returns true iff this implementation scores docs only out of order. This + method is used in conjunction with 's + AcceptsDocsOutOfOrder and + to + create a matching instance for a given , or + vice versa. +

+ NOTE: the default implementation returns false, i.e. + the Scorer scores documents in-order. +

+
+ + The query that this concerns. + + + The weight for this query. + + + The Similarity implementation. + + + Expert: Common scoring functionality for different types of queries. + +

+ A Scorer iterates over documents matching a + query in increasing order of doc Id. +

+

+ Document scores are computed using a given Similarity + implementation. +

+ +

NOTE: The values Float.Nan, + Float.NEGATIVE_INFINITY and Float.POSITIVE_INFINITY are + not valid scores. Certain collectors (eg + ) will not properly collect hits + with these scores. +

+
+ + This abstract class defines methods to iterate over a set of non-decreasing + doc ids. Note that this class assumes it iterates on doc Ids, and therefore + is set to Int32.MaxValue in order to be used as + a sentinel object. Implementations of this class are expected to consider + as an invalid value. + + + + When returned by , and + it means there are no more docs in the iterator. + + + + Returns the following: + + -1 or if or + were not called yet. + if the iterator has exhausted. + Otherwise it should return the doc ID it is currently on. + +

+

+
+ + Advances to the next document in the set and returns the doc it is + currently on, or if there are no more docs in the + set.
+ + NOTE: after the iterator has exhausted you should not call this + method, as it may result in unpredicted behavior. + +
+
+ + Advances to the first beyond the current whose document number is greater + than or equal to target. Returns the current document number or + if there are no more docs in the set. +

+ Behaves as if written: + + + int advance(int target) { + int doc; + while ((doc = nextDoc()) < target) { + } + return doc; + } + + + Some implementations are considerably more efficient than that. +

+ NOTE: certain implemenations may return a different value (each + time) if called several times in a row with the same target. +

+ NOTE: this method may be called with for + efficiency by some Scorers. If your implementation cannot efficiently + determine that it should exhaust, it is recommended that you check for that + value in each call to this method. +

+ NOTE: after the iterator has exhausted you should not call this + method, as it may result in unpredicted behavior. +

+ +

+ 2.9 +
+ + Constructs a Scorer. + The Similarity implementation used by this scorer. + + + + Scores and collects all matching documents. + The collector to which all matching documents are passed. + + + + Expert: Collects matching documents in a range. Hook for optimization. + Note, is added to ensure that + was called before this method. + + + The collector to which all matching documents are passed. + + Do not score documents past this. + + + The first document ID (ensures is called before + this method. + + true if more matching documents may remain. + + + + Returns the score of the current document matching the query. + Initially invalid, until or + is called the first time, or when called from within + . + + + + Returns the Similarity implementation used by this scorer. + + +

Expert: Collectors are primarily meant to be used to + gather raw results from a search, and implement sorting + or custom result filtering, collation, etc.

+ +

Lucene's core collectors are derived from Collector. + Likely your application can use one of these classes, or + subclass , instead of + implementing Collector directly: + + + + is an abstract base class + that assumes you will retrieve the top N docs, + according to some criteria, after collection is + done. + + is a concrete subclass + and sorts according to score + + docID. This is used internally by the + search methods that do not take an + explicit . It is likely the most frequently + used collector. + + subclasses + and sorts according to a specified + object (sort by field). This is used + internally by the search methods + that take an explicit . + + , which wraps any other + Collector and aborts the search if it's taken too much + time. + + wraps any other + Collector and prevents collection of hits whose score + is <= 0.0 + + + +

Collector decouples the score from the collected doc: + the score computation is skipped entirely if it's not + needed. Collectors that do need the score should + implement the method, to hold onto the + passed instance, and call + within the collect method to compute the + current hit's score. If your collector may request the + score for a single hit multiple times, you should use + .

+ +

NOTE: The doc that is passed to the collect + method is relative to the current reader. If your + collector needs to resolve this to the docID space of the + Multi*Reader, you must re-base it by recording the + docBase from the most recent setNextReader call. Here's + a simple example showing how to collect docIDs into a + BitSet:

+ + + Searcher searcher = new IndexSearcher(indexReader); + final BitSet bits = new BitSet(indexReader.MaxDoc); + searcher.search(query, new Collector() { + private int docBase; + + // ignore scorer + public void setScorer(Scorer scorer) { + } + + // accept docs out of order (for a BitSet it doesn't matter) + public boolean acceptsDocsOutOfOrder() { + return true; + } + + public void collect(int doc) { + bits.set(doc + docBase); + } + + public void setNextReader(IndexReader reader, int docBase) { + this.docBase = docBase; + } + }); + + +

Not all collectors will need to rebase the docID. For + example, a collector that simply counts the total number + of hits would skip it.

+ +

NOTE: Prior to 2.9, Lucene silently filtered + out hits with score <= 0. As of 2.9, the core Collectors + no longer do that. It's very unusual to have such hits + (a negative query boost, or function query returning + negative custom scores, could cause it to happen). If + you need that behavior, use + .

+ +

NOTE: This API is experimental and might change + in incompatible ways in the next release.

+ +

+ 2.9 + +
+ + Called before successive calls to . Implementations + that need the score of the current document (passed-in to + ), should save the passed-in Scorer and call + scorer.score() when needed. + + + + Called once for every document matching a query, with the unbased document + number. + +

+ Note: This is called in an inner search loop. For good search performance, + implementations of this method should not call or + on every hit. + Doing so can slow searches by an order of magnitude or more. +

+
+ + Called before collecting from each IndexReader. All doc ids in + will correspond to reader. + + Add docBase to the current IndexReaders internal document id to re-base ids + in . + + + next IndexReader + + + + + + + Return true if this collector does not + require the matching docIDs to be delivered in int sort + order (smallest to largest) to . +

Most Lucene Query implementations will visit + matching docIDs in order. However, some queries + (currently limited to certain cases of ) + can achieve faster searching if the + Collector allows them to deliver the + docIDs out of order. +

Many collectors don't mind getting docIDs out of + order, so it's important to return true + here. +

+ +
+ + A simple hash table of document scores within a range. + + + An alternative to BooleanScorer that also allows a minimum number + of optional scorers that should match. +
Implements skipTo(), and has no limitations on the numbers of added scorers. +
Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. +
+
+ + The scorer to which all scoring will be delegated, + except for computing and using the coordination factor. + + + + The number of optionalScorers that need to match (if there are any) + + + Creates a with the given similarity and lists of required, + prohibited and optional scorers. In no required scorers are added, at least + one of the optional scorers will have to match during the search. + + + The similarity to be used. + + The minimum number of optional added scorers that should match + during the search. In case no required scorers are added, at least + one of the optional scorers will have to match during the search. + + the list of required scorers. + + the list of prohibited scorers. + + the list of optional scorers. + + + + Returns the scorer to be used for match counting and score summing. + Uses requiredScorers, optionalScorers and prohibitedScorers. + + + + Returns the scorer to be used for match counting and score summing. + Uses the given required scorer and the prohibitedScorers. + + A required scorer already built. + + + + Scores and collects all matching documents. + The collector to which all matching documents are passed through. + + + + A Scorer for OR like queries, counterpart of ConjunctionScorer. + This Scorer implements and uses skipTo() on the given Scorers. + + + + The number of subscorers. + + + The subscorers. + + + The minimum number of scorers that should match. + + + The scorerDocQueue contains all subscorers ordered by their current doc(), + with the minimum at the top. +
The scorerDocQueue is initialized the first time next() or skipTo() is called. +
An exhausted scorer is immediately removed from the scorerDocQueue. +
If less than the minimumNrMatchers scorers + remain in the scorerDocQueue next() and skipTo() return false. +

+ After each to call to next() or skipTo() + currentSumScore is the total score of the current matching doc, + nrMatchers is the number of matching scorers, + and all scorers are after the matching doc, or are exhausted. +

+
+ + The document number of the current match. + + + The number of subscorers that provide the current match. + + + Construct a DisjunctionScorer. + A collection of at least two subscorers. + + The positive minimum number of subscorers that should + match to match this query. +
When minimumNrMatchers is bigger than + the number of subScorers, + no matches will be produced. +
When minimumNrMatchers equals the number of subScorers, + it more efficient to use ConjunctionScorer. + +
+ + Construct a DisjunctionScorer, using one as the minimum number + of matching subscorers. + + + + Called the first time next() or skipTo() is called to + initialize scorerDocQueue. + + + + Scores and collects all matching documents. + The collector to which all matching documents are passed through. + + + Expert: Collects matching documents in a range. Hook for optimization. + Note that must be called once before this method is called + for the first time. + + The collector to which all matching documents are passed through. + + Do not score documents past this. + + + true if more matching documents may remain. + + + + Advance all subscorers after the current document determined by the + top of the scorerDocQueue. + Repeat until at least the minimum number of subscorers match on the same + document and all subscorers are after that document or are exhausted. +
On entry the scorerDocQueue has at least minimumNrMatchers + available. At least the scorer with the minimum document number will be advanced. +
+ true iff there is a match. +
In case there is a match, currentDoc, currentSumScore, + and nrMatchers describe the match. + + TODO: Investigate whether it is possible to use skipTo() when + the minimum number of matchers is bigger than one, ie. try and use the + character of ConjunctionScorer for the minimum number of matchers. + Also delay calling score() on the sub scorers until the minimum number of + matchers is reached. +
For this, a Scorer array with minimumNrMatchers elements might + hold Scorers at currentDoc that are temporarily popped from scorerQueue. +
+
+ + Returns the score of the current document matching the query. + Initially invalid, until is called the first time. + + + + Returns the number of subscorers matching the current document. + Initially invalid, until is called the first time. + + + + Advances to the first match beyond the current whose document number is + greater than or equal to a given target.
+ The implementation uses the skipTo() method on the subscorers. + +
+ The target document number. + + the document whose number is greater than or equal to the given + target, or -1 if none exist. + +
+ + Scorer for conjunctions, sets of queries, all of which are required. + + + Count a scorer as a single match. + + + Wraps another SpanFilter's result and caches it. The purpose is to allow + filters to simply filter, and then wrap with this class to add caching. + + + + Abstract base class providing a mechanism to restrict searches to a subset + of an index and also maintains and returns position information. + This is useful if you want to compare the positions from a SpanQuery with the positions of items in + a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, + and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could + then compare position information for post processing. + + + + Abstract base class for restricting which documents may be returned during searching. + + + + Creates a enumerating the documents that should be + permitted in search results. NOTE: null can be + returned if no documents are accepted by this Filter. +

+ Note: This method will be called once per segment in + the index during searching. The returned + must refer to document IDs for that segment, not for + the top-level reader. +

+ a DocIdSet that provides the documents which should be permitted or + prohibited in search results. NOTE: null can be returned if + no documents will be accepted by this Filter. + + + A instance opened on the index currently + searched on. Note, it is likely that the provided reader does not + represent the whole underlying index i.e. if the index has more than + one segment the given reader only represents a single segment. + + + +
+ + Returns a SpanFilterResult with true for documents which should be permitted in + search results, and false for those that should not and Spans for where the true docs match. + + The to load position and DocIdSet information from + + A + + java.io.IOException if there was an issue accessing the necessary information + + + + + A transient Filter cache (internal because of test) + + + + New deletions always result in a cache miss, by default + (. + Filter to cache results of + + + + + New deletions always result in a cache miss, specify the + Filter to cache results of + See + + + Wraps another filter's result and caches it. The purpose is to allow + filters to simply filter, and then wrap with this class to add caching. + + + + + New deletes are ignored by default, which gives higher + cache hit rate on reopened readers. Most of the time + this is safe, because the filter will be AND'd with a + Query that fully enforces deletions. If instead you + need this filter to always enforce deletions, pass + either or + . + + Filter to cache results of + + + + + Expert: by default, the cached filter will be shared + across reopened segments that only had changes to their + deletions. + + Filter to cache results of + See + + + + Provide the DocIdSet to be cached, using the DocIdSet provided + by the wrapped Filter. + This implementation returns the given DocIdSet. + + + + + Expert: Specifies how new deletions against a reopened + reader should be handled. + + The default is IGNORE, which means the cache entry + will be re-used for a given segment, even when that + segment has been reopened due to changes in deletions. + This is a big performance gain, especially with + near-real-timer readers, since you don't hit a cache + miss on every reopened reader for prior segments. + + However, in some cases this can cause invalid query + results, allowing deleted documents to be returned. + This only happens if the main query does not rule out + deleted documents on its own, such as a toplevel + ConstantScoreQuery. To fix this, use RECACHE to + re-create the cached filter (at a higher per-reopen + cost, but at faster subsequent search performance), or + use DYNAMIC to dynamically intersect deleted docs (fast + reopen time but some hit to search performance). + + + + Abstract decorator class for a DocIdSet implementation + that provides on-demand filtering/validation + mechanism on a given DocIdSet. + +

+ + Technically, this same functionality could be achieved + with ChainedFilter (under contrib/misc), however the + benefit of this class is it never materializes the full + bitset for the filter. Instead, the + method is invoked on-demand, per docID visited during + searching. If you know few docIDs will be visited, and + the logic behind is relatively costly, + this may be a better way to filter than ChainedFilter. + +

+ + +
+ + A DocIdSet contains a set of doc ids. Implementing classes must + only implement to provide access to the set. + + + + An empty instance for easy use, e.g. in Filters that hit no documents. + + + Provides a to access the set. + This implementation can return null or + EMPTY_DOCIDSET.Iterator() if there + are no docs that match. + + + + This method is a hint for , if this DocIdSet + should be cached without copying it into a BitSet. The default is to return + false. If you have an own DocIdSet implementation + that does its iteration very effective and fast without doing disk I/O, + override this method and return true. + + + + Constructor. + Underlying DocIdSet + + + + Validation method to determine whether a docid should be in the result set. + docid to be tested + + true if input docid should be in the result set, false otherwise. + + + + Implementation of the contract to build a DocIdSetIterator. + + + + + + + This DocIdSet implementation is cacheable if the inner set is cacheable. + + + Abstract decorator class of a DocIdSetIterator + implementation that provides on-demand filter/validation + mechanism on an underlying DocIdSetIterator. See + . + + + + Constructor. + Underlying DocIdSetIterator. + + + + Validation method to determine whether a docid should be in the result set. + docid to be tested + + true if input docid should be in the result set, false otherwise. + + + + + + Expert: Describes the score computation for document and query, and + can distinguish a match independent of a positive value. + + + + The match status of this explanation node. + May be null if match status is unknown + + + + Indicates whether or not this Explanation models a good match. + +

+ If the match status is explicitly set (i.e.: not null) this method + uses it; otherwise it defers to the superclass. +

+

+
+ + A query that wraps a filter and simply returns a constant score equal to the + query boost for every document in the filter. + + + + Prints a user-readable version of this query. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Returns the encapsulated filter + + + Expert: Default scoring implementation. + + + Implemented as + state.getBoost()*lengthNorm(numTerms), where + numTerms is if + is false, else it's + - + . + +

WARNING: This API is new and experimental, and may suddenly + change.

+

+
+ + Implemented as 1/sqrt(numTerms). + + + Implemented as 1/sqrt(sumOfSquaredWeights). + + + Implemented as sqrt(freq). + + + Implemented as 1 / (distance + 1). + + + Implemented as log(numDocs/(docFreq+1)) + 1. + + + Implemented as overlap / maxOverlap. + + + + + + + Determines whether overlap tokens (Tokens with + 0 position increment) are ignored when computing + norm. By default this is false, meaning overlap + tokens are counted just like non-overlap tokens. + +

WARNING: This API is new and experimental, and may suddenly + change.

+ +

+ + +
+ + A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum + score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. + This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be + combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost, + not the sum of the field scores (as BooleanQuery would give). + If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching + another gets a higher score than "albino" matching both fields. + To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in + each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery. + The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that + include this term in only the best of those multiple fields, without confusing this with the better case of two different terms + in the multiple fields. + + + + Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. + the score of each non-maximum disjunct for a document is multiplied by this weight + and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that + 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique + word in the lower scored field (i.e., one that is not in any higher scored field. + + + + Creates a new DisjunctionMaxQuery + a Collection<Query> of all the disjuncts to add + + the weight to give to each matching non-maximum disjunct + + + + Add a subquery to this disjunction + the disjunct added + + + + Add a collection of disjuncts to this disjunction + via Iterable + + + + An Iterator<Query> over the disjuncts + + + Optimize our representation and our subqueries representations + the IndexReader we query + + an optimized copy of us (which may not be a copy if there is nothing to optimize) + + + + Create a shallow copy of us -- used in rewriting if necessary + a copy of us (but reuse, don't copy, our subqueries) + + + + Prettyprint us. + the field to which we are applied + + a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" + + + + Return true iff we represent the same query as o + another object + + true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us + + + + Compute a hash code for hashing us + the hash code + + + + Expert: the Weight for DisjunctionMaxQuery, used to + normalize, score and explain these queries. + +

NOTE: this API and implementation is subject to + change suddenly in the next release.

+

+
+ + The Similarity implementation. + + + The Weights for our subqueries, in 1-1 correspondence with disjuncts + + + The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers + is generated in document number order. The score for each document is the maximum of the scores computed + by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores + for the other subqueries that generate the document. + + + + Creates a new instance of DisjunctionMaxScorer + + + Multiplier applied to non-maximum-scoring subqueries for a + document as they are summed into the result. + + -- not used since our definition involves neither coord nor terms + directly + + The sub scorers this Scorer should iterate on + + The actual number of scorers to iterate on. Note that the array's + length may be larger than the actual number of scorers. + + + + Determine the current document score. Initially invalid, until is called the first time. + the score of the current generated document + + + + Expert: Scoring functionality for phrase queries. +
A document is considered matching if it contains the phrase-query terms + at "valid" positons. What "valid positions" are + depends on the type of the phrase query: for an exact phrase query terms are required + to appear in adjacent locations, while for a sloppy phrase query some distance between + the terms is allowed. The abstract method of extending classes + is invoked for each document containing all the phrase query terms, in order to + compute the frequency of the phrase query in that document. A non zero frequency + means a match. +
+
+ + + Phrase frequency in current doc as computed by PhraseFreq() + + + + + For a document containing all the phrase query terms, compute the + frequency of the phrase in that document. + A non zero frequency means a match. +
Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations. +
+ frequency of the phrase in current doc, 0 if not found. + +
+ + Expert: Maintains caches of term values. + +

Created: May 19, 2004 11:13:14 AM + +

+ lucene 1.4 + + $Id: FieldCache.java 807841 2009-08-25 22:27:31Z markrmiller $ + + + +
+ + Expert: Stores term text values and document ordering data. + + + All the term values, in natural order. + + + For each document, an index into the lookup array. + + + Creates one of these objects + + + EXPERT: A unique Identifier/Description for each item in the FieldCache. + Can be useful for logging/debugging. +

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ + + + + + Computes (and stores) the estimated size of the cache Value + + + + + The most recently estimated size of the value, null unless + estimateSize has been called. + + + + Indicator for StringIndex values in the cache. + + + Expert: The cache used internally by sorting and range query classes. + + + The default parser for byte values, which are encoded by + + + The default parser for short values, which are encoded by + + + The default parser for int values, which are encoded by + + + The default parser for float values, which are encoded by + + + The default parser for long values, which are encoded by + + + The default parser for double values, which are encoded by + + + A parser instance for int values encoded by , e.g. when indexed + via /. + + + + A parser instance for float values encoded with , e.g. when indexed + via /. + + + + A parser instance for long values encoded by , e.g. when indexed + via /. + + + + A parser instance for double values encoded with , e.g. when indexed + via /. + + + + Interface to parse bytes from document fields. + + + + + Marker interface as super-interface to all parsers. It + is used to specify a custom parser to . + + + + Return a single Byte representation of this field's value. + + + Interface to parse shorts from document fields. + + + + + Return a short representation of this field's value. + + + Interface to parse ints from document fields. + + + + + Return an integer representation of this field's value. + + + Interface to parse floats from document fields. + + + + + Return an float representation of this field's value. + + + Interface to parse long from document fields. + + + Use , this will be removed in Lucene 3.0 + + + + Return an long representation of this field's value. + + + Interface to parse doubles from document fields. + + + Use , this will be removed in Lucene 3.0 + + + + Return an long representation of this field's value. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as a single byte and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the single byte values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as bytes and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + Used to get field values. + + Which field contains the bytes. + + Computes byte for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as shorts and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the shorts. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as shorts and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + Used to get field values. + + Which field contains the shorts. + + Computes short for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as integers and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the integers. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as integers and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + Used to get field values. + + Which field contains the integers. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if + none is found, reads the terms in field as floats and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the floats. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if + none is found, reads the terms in field as floats and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + Used to get field values. + + Which field contains the floats. + + Computes float for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as longs and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + + Used to get field values. + + Which field contains the longs. + + The values in the given field for each document. + + java.io.IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as longs and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + + Used to get field values. + + Which field contains the longs. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as integers and returns an array + of size reader.MaxDoc of the value each document + has in the given field. + + + Used to get field values. + + Which field contains the doubles. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as doubles and returns an array of + size reader.MaxDoc of the value each document has in the + given field. + + + Used to get field values. + + Which field contains the doubles. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none + is found, reads the term values in field and returns an array + of size reader.MaxDoc containing the value each document + has in the given field. + + Used to get field values. + + Which field contains the strings. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none + is found reads the term values in field and returns + an array of them in natural order, along with an array telling + which element in the term array each document uses. + + Used to get field values. + + Which field contains the strings. + + Array of terms and index into the array for each document. + + IOException If any error occurs. + + + EXPERT: Generates an array of CacheEntry objects representing all items + currently in the FieldCache. +

+ NOTE: These CacheEntry objects maintain a strong refrence to the + Cached Values. Maintaining refrences to a CacheEntry the IndexReader + associated with it has garbage collected will prevent the Value itself + from being garbage collected when the Cache drops the WeakRefrence. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ +

+ EXPERT: Instructs the FieldCache to forcibly expunge all entries + from the underlying caches. This is intended only to be used for + test methods as a way to ensure a known base state of the Cache + (with out needing to rely on GC to free WeakReferences). + It should not be relied on for "Cache maintenance" in general + application code. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ + + Expert: drops all cache entries associated with this + reader. NOTE: this reader must precisely match the + reader that the cache entry is keyed on. If you pass a + top-level reader, it usually will have no effect as + Lucene now caches at the segment reader level. + + + + Gets or sets the InfoStream for this FieldCache. + If non-null, FieldCacheImpl will warn whenever + entries are created that are not sane according to + . + + + + + Expert: The default cache implementation, storing all values in memory. + A WeakDictionary is used for storage. + +

Created: May 19, 2004 4:40:36 PM + +

+ lucene 1.4 + +
+ + Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops + processing terms and returns the current FieldCache + array. + + + + Expert: Internal cache. + + + Expert: Every composite-key in the internal cache is of this type. + + + Creates one of these objects for a custom comparator/parser. + + + Two of these are equal iff they reference the same field and type. + + + Composes a hashcode based on the field and type. + + + A range filter built on top of a cached single term field (in ). + +

builds a single cache for the field the first time it is used. + Each subsequent on the same field then reuses this cache, + even if the range itself changes. + +

This means that is much faster (sometimes more than 100x as fast) + as building a if using a . However, if the range never changes it + is slower (around 2x as slow) than building a CachingWrapperFilter on top of a single . + + For numeric data types, this filter may be significantly faster than . + Furthermore, it does not need the numeric values encoded by . But + it has the problem that it only works with exact one value/document (see below). + +

As with all based functionality, is only valid for + fields which exact one term for each document (except for + where 0 terms are also allowed). Due to a restriction of , for numeric ranges + all terms that do not have a numeric value, 0 is assumed. + +

Thus it works on dates, prices and other single value fields but will not work on + regular text fields. It is preferable to use a NOT_ANALYZED field to ensure that + there is only a single term. + +

This class does not have an constructor, use one of the static factory methods available, + that create a correct instance for different data types supported by . +

+
+ + Creates a string range filter using . This works with all + fields containing zero or one term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range filter using . This works with all + byte fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range filter using . This works with all + byte fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + short fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + short fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + int fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + int fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + long fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + long fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + float fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + float fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + double fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using . This works with all + double fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + This method is implemented for each data type + + + + Returns the field name for this filter + + + + + Returns true if the lower endpoint is inclusive + + + + + Returns true if the upper endpoint is inclusive + + + + + Returns the lower value of the range filter + + + + + Returns the upper value of this range filter + + + + this method checks, if a doc is a hit, should throw AIOBE, when position invalid + + + this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs + + + A that only accepts documents whose single + term value in the specified field is contained in the + provided set of allowed terms. + +

+ + This is the same functionality as TermsFilter (from + contrib/queries), except this filter requires that the + field contains only a single term for all documents. + Because of drastically different implementations, they + also have different performance characteristics, as + described below. + +

+ + The first invocation of this filter on a given field will + be slower, since a must be + created. Subsequent invocations using the same field + will re-use this cache. However, as with all + functionality based on , persistent RAM + is consumed to hold the cache, and is not freed until the + is closed. In contrast, TermsFilter + has no persistent RAM consumption. + + +

+ + With each search, this filter translates the specified + set of Terms into a private keyed by + term number per unique (normally one + reader per segment). Then, during matching, the term + number for each docID is retrieved from the cache and + then checked for inclusion using the . + Since all testing is done using RAM resident data + structures, performance should be very fast, most likely + fast enough to not require further caching of the + DocIdSet for each possible combination of terms. + However, because docIDs are simply scanned linearly, an + index with a great many small documents may find this + linear scan too costly. + +

+ + In contrast, TermsFilter builds up an , + keyed by docID, every time it's created, by enumerating + through all matching docs using to seek + and scan through each term's docID list. While there is + no linear scan of all docIDs, besides the allocation of + the underlying array in the , this + approach requires a number of "disk seeks" in proportion + to the number of terms, which can be exceptionally costly + when there are cache misses in the OS's IO cache. + +

+ + Generally, this filter will be slower on the first + invocation for a given field, but subsequent invocations, + even if you change the allowed set of Terms, should be + faster than TermsFilter, especially as the number of + Terms being matched increases. If you are matching only + a very small number of terms, and those terms in turn + match a very small number of documents, TermsFilter may + perform faster. + +

+ + Which filter is best is very application dependent. +

+
+ + This DocIdSet implementation is cacheable. + + + Expert: a FieldComparator compares hits so as to determine their + sort order when collecting the top results with + . The concrete public FieldComparator + classes here correspond to the SortField types. + +

This API is designed to achieve high performance + sorting, by exposing a tight interaction with + as it visits hits. Whenever a hit is + competitive, it's enrolled into a virtual slot, which is + an int ranging from 0 to numHits-1. The + is made aware of segment transitions + during searching in case any internal state it's tracking + needs to be recomputed during these transitions.

+ +

A comparator must define these functions:

+ + + + Compare a hit at 'slot a' + with hit 'slot b'. + + This method is called by + to notify the + FieldComparator of the current weakest ("bottom") + slot. Note that this slot may not hold the weakest + value according to your comparator, in cases where + your comparator is not the primary one (ie, is only + used to break ties from the comparators before it). + + Compare a new hit (docID) + against the "weakest" (bottom) entry in the queue. + + Installs a new hit into the + priority queue. The + calls this method when a new hit is competitive. + + Invoked + when the search is switching to the next segment. + You may need to update internal state of the + comparator, for example retrieving new values from + the . + + Return the sort value stored in + the specified slot. This is only called at the end + of the search, in order to populate + when returning the top results. + + + NOTE: This API is experimental and might change in + incompatible ways in the next release. +

+
+ + Compare hit at slot1 with hit at slot2. + + + first slot to compare + + second slot to compare + + any N < 0 if slot2's value is sorted after + slot1, any N > 0 if the slot2's value is sorted before + slot1 and 0 if they are equal + + + + Set the bottom slot, ie the "weakest" (sorted last) + entry in the queue. When is + called, you should compare against this slot. This + will always be called before . + + + the currently weakest (sorted last) slot in the queue + + + + Compare the bottom of the queue with doc. This will + only invoked after setBottom has been called. This + should return the same result as + } as if bottom were slot1 and the new + document were slot 2. + +

For a search that hits many results, this method + will be the hotspot (invoked by far the most + frequently).

+ +

+ that was hit + + any N < 0 if the doc's value is sorted after + the bottom entry (not competitive), any N > 0 if the + doc's value is sorted before the bottom entry and 0 if + they are equal. + +
+ + This method is called when a new hit is competitive. + You should copy any state associated with this document + that will be required for future comparisons, into the + specified slot. + + + which slot to copy the hit to + + docID relative to current reader + + + + Set a new Reader. All doc correspond to the current Reader. + + + current reader + + docBase of this reader + + IOException + IOException + + + Sets the Scorer to use in case a document's score is + needed. + + + Scorer instance that you should use to + obtain the current hit's score, if necessary. + + + + Return the actual value in the slot. + + + the value + + value in this slot upgraded to Comparable + + + + Parses field's values as byte (using + and sorts by ascending value + + + + Sorts by ascending docID + + + Parses field's values as double (using + and sorts by ascending value + + + + Parses field's values as float (using + and sorts by ascending value + + + + Parses field's values as int (using + and sorts by ascending value + + + + Parses field's values as long (using + and sorts by ascending value + + + + Sorts by descending relevance. NOTE: if you are + sorting only by descending relevance and then + secondarily by ascending docID, peformance is faster + using directly (which + uses when no is + specified). + + + + Parses field's values as short (using ) + and sorts by ascending value + + + + Sorts by a field's value using the Collator for a + given Locale. + + + + Sorts by field's natural String sort order, using + ordinals. This is functionally equivalent to + , but it first resolves the string + to their relative ordinal positions (using the index + returned by ), and + does most comparisons using the ordinals. For medium + to large results, this comparator will be much faster + than . For very small + result sets it may be slower. + + + + Sorts by field's natural String sort order. All + comparisons are done using String.compareTo, which is + slow for medium to large result sets but possibly + very fast for very small results sets. + + + + Provides a for custom field sorting. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + + + Creates a comparator for the field in the given index. + + + Name of the field to create comparator for. + + FieldComparator. + + IOException + If an error occurs reading the index. + + + + Expert: A ScoreDoc which also contains information about + how to sort the referenced document. In addition to the + document number and score, this object contains an array + of values for the document from the field(s) used to sort. + For example, if the sort criteria was to sort by fields + "a", "b" then "c", the fields object array + will have three elements, corresponding respectively to + the term values for the document in fields "a", "b" and "c". + The class of each element in the array will be either + Integer, Float or String depending on the type of values + in the terms of each field. + +

Created: Feb 11, 2004 1:23:38 PM + +

+ + +
+ + Expert: Returned by low-level search implementations. + + + + + Expert: Constructs a ScoreDoc. + + + Expert: The score of this document for the query. + + + Expert: A hit document's number. + + + + + Expert: The values which are used to sort the referenced document. + The order of these will match the original sort criteria given by a + Sort object. Each Object will be either an Integer, Float or String, + depending on the type of values in the terms of the original field. + + + + + + + + Expert: Creates one of these objects with empty sort information. + + + Expert: Creates one of these objects with the given sort information. + + + Expert: Collects sorted results from Searchable's and collates them. + The elements put into this queue must be of type FieldDoc. + +

Created: Feb 11, 2004 2:04:21 PM + +

+ lucene 1.4 + +
+ + Creates a hit queue sorted by the given list of fields. + The number of hits to retain. Must be greater than zero. + + + Allows redefinition of sort fields if they are null. + This is to handle the case using ParallelMultiSearcher where the + original list contains AUTO and we don't know the actual sort + type until the values come back. The fields can only be set once. + This method is thread safe. + + + + + Returns the fields being used to sort. + + + Returns an array of collators, possibly null. The collators + correspond to any SortFields which were given a specific locale. + + Array of sort fields. + Array, possibly null. + + + Returns whether a is less relevant than b. + ScoreDoc + ScoreDoc + true if document a should be sorted after document b. + + + Expert: A hit queue for sorting by hits by terms in more than one field. + Uses FieldCache.DEFAULT for maintaining + internal term lookup tables. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + + + + + Creates a hit queue sorted by the given list of fields. + +

NOTE: The instances returned by this method + pre-allocate a full array of length numHits. + +

+ SortField array we are sorting by in priority order (highest + priority first); cannot be null or empty + + The number of hits to retain. Must be greater than zero. + + IOException +
+ + Stores the sort criteria being used. + + + Given a queue Entry, creates a corresponding FieldDoc + that contains the values used to sort the given document. + These values are not the raw values out of the index, but the internal + representation of them. This is so the given search hit can be collated by + a MultiSearcher with other search hits. + + + The Entry used to create a FieldDoc + + The newly created FieldDoc + + + + + + Returns the SortFields being used by this hit queue. + + + An implementation of which is optimized in case + there is just one comparator. + + + + Returns whether a is less relevant than b. + ScoreDoc + ScoreDoc + true if document a should be sorted after document b. + + + An implementation of which is optimized in case + there is more than one comparator. + + + + A query that applies a filter to the results of another query. + +

Note: the bits are retrieved from the filter each time this + query is used in a search - use a CachingWrapperFilter to avoid + regenerating the bits every time. + +

Created: Apr 20, 2004 8:58:29 AM + +

+ 1.4 + +
+ + Constructs a new query which applies a filter to the results of the original query. + Filter.getDocIdSet() will be called every time this query is used in a search. + + Query to be filtered, cannot be null. + + Filter to apply to query results, cannot be null. + + + + Returns a Weight that applies the filter to the enclosed query's Weight. + This is accomplished by overriding the Scorer returned by the Weight. + + + + Rewrites the wrapped query. + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Abstract class for enumerating a subset of all terms. +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + the current term + + + the delegate enum - to set this member use + + + Equality compare on the term + + + Equality measure on the term + + + Indicates the end of the enumeration has been reached + + + use this method to set the actual TermEnum (e.g. in ctor), + it will be automatically positioned on the first matching term. + + + + Returns the docFreq of the current Term in the enumeration. + Returns -1 if no Term matches or all terms have been enumerated. + + + + Increments the enumeration to the next element. True if one exists. + + + Returns the current Term in the enumeration. + Returns null if no Term matches or all terms have been enumerated. + + + + Filter caching singleton. It can be used + to save filters locally for reuse. + This class makes it possble to cache Filters even when using RMI, as it + keeps the cache on the seaercher side of the RMI connection. + + Also could be used as a persistent storage for any filter as long as the + filter provides a proper hashCode(), as that is used as the key in the cache. + + The cache is periodically cleaned up from a separate thread to ensure the + cache doesn't exceed the maximum size. + + + + The default maximum number of Filters in the cache + + + The default frequency of cache clenup + + + The cache itself + + + Maximum allowed cache size + + + Cache cleaning frequency + + + Cache cleaner that runs in a separate thread + + + Sets up the FilterManager singleton. + + + Sets the max size that cache should reach before it is cleaned up + maximum allowed cache size + + + Sets the cache cleaning frequency in milliseconds. + cleaning frequency in millioseconds + + + Returns the cached version of the filter. Allows the caller to pass up + a small filter but this will keep a persistent version around and allow + the caching filter to do its job. + + + The input filter + + The cached version of the filter + + + + Holds the filter and the last time the filter was used, to make LRU-based + cache cleaning possible. + TODO: Clean this up when we switch to Java 1.5 + + + + Keeps the cache from getting too big. + If we were using Java 1.5, we could use LinkedHashMap and we would not need this thread + to clean out the cache. + + The SortedSet sortedFilterItems is used only to sort the items from the cache, + so when it's time to clean up we have the TreeSet sort the FilterItems by + timestamp. + + Removes 1.5 * the numbers of items to make the cache smaller. + For example: + If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 round up to 8. + This way we clean the cache a bit more, and avoid having the cache cleaner having to do it frequently. + + + + Expert: obtains single byte field values from the + FieldCache + using getBytes() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements" + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+ + + +

Expert: A base class for ValueSource implementations that retrieve values for + a single field from the FieldCache. +

+ Fields used herein nust be indexed (doesn't matter if these fields are stored or not). +

+ It is assumed that each such indexed field is untokenized, or at least has a single token in a document. + For documents with multiple tokens of the same field, behavior is undefined (It is likely that current + code would use the value of one of these tokens, but this is not guaranteed). +

+ Document with no tokens in this field are assigned the Zero value. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+

+
+ + Expert: source of values for basic function queries. +

At its default/simplest form, values - one per doc - are used as the score of that doc. +

Values are instantiated as + DocValues for a particular reader. +

ValueSource implementations differ in RAM requirements: it would always be a factor + of the number of documents, but for each document the number of bytes can be 1, 2, 4, or 8. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + + +

+
+ + Return the DocValues used by the function query. + the IndexReader used to read these values. + If any caching is involved, that caching would also be IndexReader based. + + IOException for any error. + + + description of field, used in explain() + + + Needed for possible caching of query results - used by . + + + + + Needed for possible caching of query results - used by . + + + + + Create a cached field source for the input field. + + + Return cached DocValues for input field and reader. + FieldCache so that values of a field are loaded once per reader (RAM allowing) + + Field for which values are required. + + + + + + Check if equals to another , already knowing that cache and field are equal. + + + + + Return a hash code of a , without the hash-codes of the field + and the cache (those are taken care of elsewhere). + + + + + + Create a cached byte field source with default string-to-byte parser. + + + Create a cached byte field source with a specific string-to-byte parser. + + + Expert: represents field values as different types. + Normally created via a + ValueSuorce + for a particular field and reader. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + + +

+
+ + Return doc value as a float. +

Mandatory: every DocValues implementation must implement at least this method. +

+ document whose float value is requested. + +
+ + Return doc value as an int. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose int value is requested. + +
+ + Return doc value as a long. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose long value is requested. + +
+ + Return doc value as a double. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose double value is requested. + +
+ + Return doc value as a string. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose string value is requested. + +
+ + Return a string representation of a doc value, as reuired for Explanations. + + + Explain the scoring value for the input doc. + + + Returns the minimum of all values or Float.NaN if this + DocValues instance does not contain any value. +

+ This operation is optional +

+ +

+ the minimum of all values or Float.NaN if this + DocValues instance does not contain any value. + +
+ + Returns the maximum of all values or Float.NaN if this + DocValues instance does not contain any value. +

+ This operation is optional +

+ +

+ the maximum of all values or Float.NaN if this + DocValues instance does not contain any value. + +
+ + Returns the average of all values or Float.NaN if this + DocValues instance does not contain any value. * +

+ This operation is optional +

+ +

+ the average of all values or Float.NaN if this + DocValues instance does not contain any value + +
+ + Expert: for test purposes only, return the inner array of values, or null if not applicable. +

+ Allows tests to verify that loaded values are: + + indeed cached/reused. + stored in the expected size/type (byte/short/int/float). + + Note: implementations of DocValues must override this method for + these test elements to be tested, Otherwise the test would not fail, just + print a warning. +

+
+ + + An instance of this subclass should be returned by + , if you want + to modify the custom score calculation of a . + Since Lucene 2.9, queries operate on each segment of an Index separately, + so overriding the similar (now deprecated) methods in + is no longer suitable, as the supplied doc ID is per-segment + and without knowledge of the IndexReader you cannot access the + document or . + + @lucene.experimental + @since 2.9.2 + + + + + Creates a new instance of the provider class for the given IndexReader. + + + + + * Compute a custom score by the subQuery score and a number of + ValueSourceQuery scores. +

+ Subclasses can override this method to modify the custom score. +

+ If your custom scoring is different than the default herein you + should override at least one of the two customScore() methods. + If the number of ValueSourceQueries is always < 2 it is + sufficient to override the other + CustomScore() + method, which is simpler. +

+ The default computation herein is a multiplication of given scores: +

+                ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
+            
+
+ id of scored doc + score of that doc by the subQuery + scores of that doc by the ValueSourceQuery + custom score +
+ + + Compute a custom score by the subQuery score and the ValueSourceQuery score. +

+ Subclasses can override this method to modify the custom score. +

+ If your custom scoring is different than the default herein you + should override at least one of the two customScore() methods. + If the number of ValueSourceQueries is always < 2 it is + sufficient to override this customScore() method, which is simpler. +

+ The default computation herein is a multiplication of the two scores: +

+                ModifiedScore = subQueryScore * valSrcScore
+            
+
+ id of scored doc + score of that doc by the subQuery + score of that doc by the ValueSourceQuery + custom score +
+ + + Explain the custom score. + Whenever overriding , + this method should also be overridden to provide the correct explanation + for the part of the custom scoring. + + doc being explained + explanation for the sub-query part + explanation for the value source part + an explanation for the custom score + + + + Explain the custom score. + Whenever overriding , + this method should also be overridden to provide the correct explanation + for the part of the custom scoring. + + + doc being explained + explanation for the sub-query part + explanation for the value source part + an explanation for the custom score + + + Query that sets document score as a programmatic function of several (sub) scores: + + the score of its subQuery (any query) + (optional) the score of its ValueSourceQuery (or queries). + For most simple/convenient use cases this query is likely to be a + FieldScoreQuery + + Subclasses can modify the computation by overriding . + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Create a CustomScoreQuery over input subQuery. + the sub query whose scored is being customed. Must not be null. + + + + Create a CustomScoreQuery over input subQuery and a . + the sub query whose score is being customed. Must not be null. + + a value source query whose scores are used in the custom score + computation. For most simple/convineient use case this would be a + FieldScoreQuery. + This parameter is optional - it can be null or even an empty array. + + + + Create a CustomScoreQuery over input subQuery and a . + the sub query whose score is being customized. Must not be null. + + value source queries whose scores are used in the custom score + computation. For most simple/convenient use case these would be + FieldScoreQueries. + This parameter is optional - it can be null or even an empty array. + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + + Returns a that calculates the custom scores + for the given . The default implementation returns a default + implementation as specified in the docs of . + + + + + Compute a custom score by the subQuery score and a number of + ValueSourceQuery scores. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override and return a subclass + of for the given . + see CustomScoreProvider#customScore(int,float,float[]) + + + + Compute a custom score by the subQuery score and the ValueSourceQuery score. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override and return a subclass + of for the given . + + + + + Explain the custom score. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override and return a subclass + of for the given . + + + + Explain the custom score. + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override and return a subclass + of for the given . + + + + Checks if this is strict custom scoring. + In strict custom scoring, the ValueSource part does not participate in weight normalization. + This may be useful when one wants full control over how scores are modified, and does + not care about normalizing by the ValueSource part. + One particular case where this is useful if for testing this query. +

+ Note: only has effect when the ValueSource part is not null. +

+
+ + Set the strict mode of this query. + The strict mode to set. + + + + + + A short name of this query, used in . + + + A scorer that applies a (callback) function on scores of the subQuery. + + + A query that scores each document as the value of the numeric input field. +

+ The query matches all documents, and scores each document according to the numeric + value of that field. +

+ It is assumed, and expected, that: + + The field used here is indexed, and has exactly + one token in every scored document. + Best if this field is un_tokenized. + That token is parsable to the selected type. + +

+ Combining this query in a FunctionQuery allows much freedom in affecting document scores. + Note, that with this freedom comes responsibility: it is more than likely that the + default Lucene scoring is superior in quality to scoring modified as explained here. + However, in some cases, and certainly for research experiments, this capability may turn useful. +

+ When contructing this query, select the appropriate type. That type should match the data stored in the + field. So in fact the "right" type should be selected before indexing. Type selection + has effect on the RAM usage: + + consumes 1 * maxDocs bytes. + consumes 2 * maxDocs bytes. + consumes 4 * maxDocs bytes. + consumes 8 * maxDocs bytes. + +

+ Caching: + Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader. + To take advantage of this, it is extremely important to reuse index-readers or index-searchers, + otherwise, for instance if for each query a new index reader is opened, large penalties would be + paid for loading the field values into memory over and over again! + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Expert: A Query that sets the scores of document to the + values obtained from a ValueSource. +

+ This query provides a score for each and every undeleted document in the index. +

+ The value source can be based on a (cached) value of an indexed field, but it + can also be based on an external source, e.g. values read from an external database. +

+ Score is set as: Score(doc,query) = query.getBoost()2 * valueSource(doc). + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Create a value source query + provides the values defines the function to be used for scoring + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + A scorer that (simply) matches all documents, and scores each document with + the value of the value soure in effect. As an example, if the value source + is a (cached) field source, then value of that field in that document will + be used. (assuming field is indexed for this doc, with a single token.) + + + + Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field. +

+ The type param tells how to parse the field string values into a numeric score value. +

+ the numeric field to be used. + + the type of the field: either + , , , or . + +
+ + Type of score field, indicating how field values are interpreted/parsed. +

+ The type selected at search search time should match the data stored in the field. + Different types have different RAM requirements: + + consumes 1 * maxDocs bytes. + consumes 2 * maxDocs bytes. + consumes 4 * maxDocs bytes. + consumes 8 * maxDocs bytes. + +

+
+ + field values are interpreted as numeric byte values. + + + field values are interpreted as numeric short values. + + + field values are interpreted as numeric int values. + + + field values are interpreted as numeric float values. + + + Expert: obtains float field values from the + FieldCache + using getFloats() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements" + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+ + + +

Create a cached float field source with default string-to-float parser. +
+ + Create a cached float field source with a specific string-to-float parser. + + + Expert: obtains int field values from the + FieldCache + using getInts() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+ + + +

Create a cached int field source with default string-to-int parser. +
+ + Create a cached int field source with a specific string-to-int parser. + + + Expert: obtains the ordinal of the field value from the default Lucene + Fieldcache using getStringIndex(). +

+ The native lucene index order is used to assign an ordinal value for each field value. +

+ Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. +

+ Example: +
If there were only three field values: "apple","banana","pear" +
then ord("apple")=1, ord("banana")=2, ord("pear")=3 +

+ WARNING: + ord() depends on the position in an index and can thus change + when other documents are inserted or deleted, + or if a MultiSearcher is used. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+

+
+ + Constructor for a certain field. + field whose values order is used. + + + + Expert: obtains the ordinal of the field value from the default Lucene + FieldCache using getStringIndex() + and reverses the order. +

+ The native lucene index order is used to assign an ordinal value for each field value. +

+ Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. +
+ Example of reverse ordinal (rord): +
If there were only three field values: "apple","banana","pear" +
then rord("apple")=3, rord("banana")=2, ord("pear")=1 +

+ WARNING: + rord() depends on the position in an index and can thus change + when other documents are inserted or deleted, + or if a MultiSearcher is used. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+

+
+ + Contructor for a certain field. + field whose values reverse order is used. + + + + Expert: obtains short field values from the + FieldCache + using getShorts() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API.

+ + + +

Create a cached short field source with default string-to-short parser. +
+ + Create a cached short field source with a specific string-to-short parser. + + + Implements the fuzzy search query. The similarity measurement + is based on the Levenshtein (edit distance) algorithm. + + Warning: this query is not very scalable with its default prefix + length of 0 - in this case, *every* term will be enumerated and + cause an edit score calculation. + + + + + An abstract that matches documents + containing a subset of terms provided by a + enumeration. + +

This query cannot be used directly; you must subclass + it and define to provide a + that iterates through the terms to be + matched. + +

NOTE: if is either + or + , you may encounter a + exception during + searching, which happens when the number of terms to be + searched exceeds + . Setting + to + prevents this. + +

The recommended rewrite method is + : it doesn't spend CPU + computing unhelpful scores, and it tries to pick the most + performant rewrite method given the query. + + Note that produces + MultiTermQueries using + by default. +

+
+ + A rewrite method that first creates a private Filter, + by visiting each term in sequence and marking all docs + for that term. Matching documents are assigned a + constant score equal to the query's boost. + +

This method is faster than the BooleanQuery + rewrite methods when the number of matched terms or + matched documents is non-trivial. Also, it will never + hit an errant + exception. + +

+ + +
+ + A rewrite method that first translates each term into + clause in a + BooleanQuery, and keeps the scores as computed by the + query. Note that typically such scores are + meaningless to the user, and require non-trivial CPU + to compute, so it's almost always better to use + instead. + +

NOTE: This rewrite method will hit + if the number of terms + exceeds . + +

+ + +
+ + Like except + scores are not computed. Instead, each matching + document receives a constant score equal to the + query's boost. + +

NOTE: This rewrite method will hit + if the number of terms + exceeds . + +

+ + +
+ + Read-only default instance of + , with + set to + + + and + set to + + . + Note that you cannot alter the configuration of this + instance; you'll need to create a private instance + instead. + + + + Constructs a query matching terms that cannot be represented with a single + Term. + + + + Construct the enumeration to be used, expanding the pattern term. + + + Expert: Resets the counting of unique terms. + Do this before executing the query/filter. + + + + + + Expert: Return the number of unique terms visited during execution of the query. + If there are many of them, you may consider using another query type + or optimize your total term count in index. +

This method is not thread safe, be sure to only call it when no query is running! + If you re-use the same query instance for another + search, be sure to first reset the term counter + with . +

On optimized indexes / no MultiReaders, you get the correct number of + unique terms for the whole index. Use this number to compare different queries. + For non-optimized indexes this number can also be achived in + non-constant-score mode. In constant-score mode you get the total number of + terms seeked for all segments / sub-readers. +

+ + +
+ + Sets the rewrite method to be used when executing the + query. You can use one of the four core methods, or + implement your own subclass of . + + + + A rewrite method that tries to pick the best + constant-score rewrite method based on term and + document counts from the query. If both the number of + terms and documents is small enough, then + is used. + Otherwise, is + used. + + + + Abstract class that defines how the query is rewritten. + + + If the number of terms in this query is equal to or + larger than this setting then + is used. + + + + If the number of documents to be visited in the + postings exceeds this specified percentage of the + MaxDoc for the index, then + is used. + + 0.0 to 100.0 + + + Create a new FuzzyQuery that will match terms with a similarity + of at least minimumSimilarity to term. + If a prefixLength > 0 is specified, a common prefix + of that length is also required. + + + the term to search for + + a value between 0 and 1 to set the required similarity + between the query term and the matching terms. For example, for a + minimumSimilarity of 0.5 a term of the same length + as the query term is considered similar to the query term if the edit distance + between both terms is less than length(term)*0.5 + + length of common (non-fuzzy) prefix + + IllegalArgumentException if minimumSimilarity is >= 1 or < 0 + or if prefixLength < 0 + + + + Calls FuzzyQuery(term, minimumSimilarity, 0). + + + Calls FuzzyQuery(term, 0.5f, 0). + + + Returns the pattern term. + + + Returns the minimum similarity that is required for this query to match. + float value between 0.0 and 1.0 + + + Returns the non-fuzzy prefix length. This is the number of characters at the start + of a term that must be identical (not fuzzy) to the query term if the query + is to match that term. + + + + Subclass of FilteredTermEnum for enumerating all terms that are similiar + to the specified filter term. + +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ + + + + IOException + + +
+ + Creates a FuzzyTermEnum with an empty prefix. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ + + + + + + IOException + + +
+ + Constructor for enumeration of all terms from specified reader which share a prefix of + length prefixLength with term and which have a fuzzy similarity > + minSimilarity. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ Delivers terms. + + Pattern term. + + Minimum required similarity for terms from the reader. Default value is 0.5f. + + Length of required common prefix. Default value is 0. + + IOException +
+ + The termCompare method in FuzzyTermEnum uses Levenshtein distance to + calculate the distance between the given term and the comparing term. + + + +

Similarity returns a number that is 1.0f or less (including negative numbers) + based on how similar the Term is compared to a target term. It returns + exactly 0.0f when + + editDistance > maximumEditDistance + Otherwise it returns: + + 1 - (editDistance / length) + where length is the length of the shortest term (text or target) including a + prefix that are identical and editDistance is the Levenshtein distance for + the two words.

+ +

Embedded within this algorithm is a fail-fast Levenshtein distance + algorithm. The fail-fast algorithm differs from the standard Levenshtein + distance algorithm in that it is aborted if it is discovered that the + mimimum distance between the words is greater than some threshold. + +

To calculate the maximum distance threshold we use the following formula: + + (1 - minimumSimilarity) * length + where length is the shortest term including any prefix that is not part of the + similarity comparision. This formula was derived by solving for what maximum value + of distance returns false for the following statements: + + similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen))); + return (similarity > minimumSimilarity); + where distance is the Levenshtein distance for the two words. +

+

Levenshtein distance (also known as edit distance) is a measure of similiarity + between two strings where the distance is measured as the number of character + deletions, insertions or substitutions required to transform one string to + the other string. +

+ the target word or phrase + + the similarity, 0.0 or less indicates that it matches less than the required + threshold and 1.0 indicates that the text and target are identical + +
+ + The max Distance is the maximum Levenshtein distance for the text + compared to some other value that results in score that is + better than the minimum similarity. + + the length of the "other value" + + the maximum levenshtein distance that we care about + + + + Creates a new instance with size elements. If + prePopulate is set to true, the queue will pre-populate itself + with sentinel objects and set its to size. In + that case, you should not rely on to get the number of + actual elements that were added to the queue, but keep track yourself.
+ NOTE: in case prePopulate is true, you should pop + elements from the queue using the following code example: + + + PriorityQueue pq = new HitQueue(10, true); // pre-populate. + ScoreDoc top = pq.top(); + + // Add/Update one element. + top.score = 1.0f; + top.doc = 0; + top = (ScoreDoc) pq.updateTop(); + int totalHits = 1; + + // Now pop only the elements that were *truly* inserted. + // First, pop all the sentinel elements (there are pq.size() - totalHits). + for (int i = pq.size() - totalHits; i > 0; i--) pq.pop(); + + // Now pop the truly added elements. + ScoreDoc[] results = new ScoreDoc[totalHits]; + for (int i = totalHits - 1; i >= 0; i--) { + results[i] = (ScoreDoc) pq.pop(); + } + + +

NOTE: This class pre-allocate a full array of + length size. + +

+ the requested size of this queue. + + specifies whether to pre-populate the queue with sentinel values. + + + +
+ + Implements search over a single IndexReader. + +

Applications usually need only call the inherited + or methods. For performance reasons it is + recommended to open only one IndexSearcher and use it for all of your searches. + +

NOTE: + instances are completely + thread safe, meaning multiple threads can call any of its + methods, concurrently. If your application requires + external synchronization, you should not + synchronize on the IndexSearcher instance; + use your own (non-Lucene) objects instead.

+

+
+ + An abstract base class for search implementations. Implements the main search + methods. + +

+ Note that you can only access hits from a Searcher as long as it is not yet + closed, otherwise an IOException will be thrown. +

+
+ + The interface for search implementations. + +

+ Searchable is the abstract network protocol for searching. Implementations + provide search over a single index, over multiple indices, and over indices + on remote servers. + +

+ Queries, filters and sort criteria are designed to be compact so that they + may be efficiently passed to a remote index, with only the top-scoring hits + being returned, rather than every matching hit. + + NOTE: this interface is kept public for convenience. Since it is not + expected to be implemented directly, it may be changed unexpectedly between + releases. +

+
+ + Lower-level search API. + +

+ is called for every document.
+ Collector-based access to remote indexes is discouraged. + +

+ Applications should only use this if they need all of the matching + documents. The high-level search API () is + usually more efficient, as it skips non-high-scoring hits. + +

+ to match documents + + if non-null, used to permit documents to be collected. + + to receive hits + + BooleanQuery.TooManyClauses +
+ + Frees resources associated with this Searcher. + Be careful not to call this method while you are still using objects + that reference this searchable + + + + Expert: Returns the number of documents containing term. + Called by search code to compute term weights. + + + + + + Expert: For each term in the terms array, calculates the number of + documents containing term. Returns an array with these + document frequencies. Used to minimize number of remote calls. + + + + + Expert: Low-level search implementation. Finds the top n + hits for query, applying filter if non-null. + +

Applications should usually call or + instead. +

+ BooleanQuery.TooManyClauses +
+ + Expert: Returns the stored fields of document i. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Get the at the nth position. The + may be used to determine what s to load and how they should be loaded. + + NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the lazy is + loaded an exception may be thrown. If you want the value of a lazy to be available after closing you must + explicitly load it or fetch the Document again with a new loader. + + + + Get the document at the nth position + + The to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. + + The stored fields of the at the nth position + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + + + + + + + + + + + + + Expert: called to re-write queries into primitive queries. + BooleanQuery.TooManyClauses + + + Expert: low-level implementation method + Returns an Explanation that describes how doc scored against + weight. + +

This is intended to be used in developing Similarity implementations, + and, for good performance, should not be displayed with every hit. + Computing an explanation is as expensive as executing the query over the + entire index. +

Applications should call . +

+ BooleanQuery.TooManyClauses +
+ + Expert: Low-level search implementation with arbitrary sorting. Finds + the top n hits for query, applying + filter if non-null, and sorting the hits by the criteria in + sort. + +

Applications should usually call + instead. + +

+ BooleanQuery.TooManyClauses +
+ + Expert: Returns one greater than the largest possible document number. + Called by search code to compute term weights. + + + + + + Search implementation with arbitrary sorting. Finds + the top n hits for query, applying + filter if non-null, and sorting the hits by the criteria in + sort. + +

NOTE: this does not compute scores by default; use + to enable scoring. + +

+ BooleanQuery.TooManyClauses +
+ + Lower-level search API. + +

is called for every matching document. + +

Applications should only use this if they need all of the matching + documents. The high-level search API ( + ) is usually more efficient, as it skips non-high-scoring hits. +

Note: The score passed to this method is a raw score. + In other words, the score will not necessarily be a float whose value is + between 0 and 1. +

+ BooleanQuery.TooManyClauses +
+ + Lower-level search API. + +

is called for every matching + document. +
Collector-based access to remote indexes is discouraged. + +

Applications should only use this if they need all of the + matching documents. The high-level search API () + is usually more efficient, as it skips + non-high-scoring hits. + +

+ to match documents + + if non-null, used to permit documents to be collected. + + to receive hits + + BooleanQuery.TooManyClauses +
+ + Finds the top n + hits for query, applying filter if non-null. + + + BooleanQuery.TooManyClauses + + + Finds the top n + hits for query. + + + BooleanQuery.TooManyClauses + + + Returns an Explanation that describes how doc scored against + query. + +

This is intended to be used in developing Similarity implementations, + and, for good performance, should not be displayed with every hit. + Computing an explanation is as expensive as executing the query over the + entire index. +

+
+ + The Similarity implementation used by this searcher. + + + creates a weight for query + new weight + + + + Expert: Gets or Sets the Similarity implementation used by this Searcher. + + + + + + + Creates a searcher searching the index in the named + directory, with readOnly=true + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Creates a searcher searching the index in the named + directory. You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the underlying IndexReader. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + directory where IndexReader will be opened + + if true, the underlying IndexReader + will be opened readOnly + + + + Creates a searcher searching the provided index + + Note that the underlying IndexReader is not closed, if + IndexSearcher was constructed with IndexSearcher(IndexReader r). + If the IndexReader was supplied implicitly by specifying a directory, then + the IndexReader gets closed. + + + + + + Expert: directly specify the reader, subReaders and their + DocID starts +

+ NOTE: This API is experimental and + might change in incompatible ways in the next + release

+

+
+ + Just like , but you choose + whether or not the fields in the returned instances + should be set by specifying fillFields. +

+ NOTE: this does not compute scores by default. If you need scores, create + a instance by calling + and then pass that to + . +

+

+
+ + By default, no scores are computed when sorting by field (using + ). You can change that, per + IndexSearcher instance, by calling this method. Note that this will incur + a CPU cost. + + + If true, then scores are returned for every matching document + in . + + + If true, then the max score for all matching docs is computed. + + + + Return the this searches. + + + A query that matches all documents. + + + + + Field used for normalization factor (document boost). Null if nothing. + + + + MultiPhraseQuery is a generalized version of PhraseQuery, with an added + method . + To use this class, to search for the phrase "Microsoft app*" first use + add(Term) on the term "Microsoft", then find all terms that have "app" as + prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[] + terms) to add them to the query. + + + 1.0 + + + + Add a single term at the next position in the phrase. + + + + + Add multiple terms at the next position in the phrase. Any of the terms + may match. + + + + + + + Allows to specify the relative position of terms within the phrase. + + + + + + + + + + + Returns a List<Term[]> of the terms in the multiphrase. + Do not modify the List or its contents. + + + + Returns the relative positions of terms in this phrase. + + + Prints a user-readable version of this query. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Gets or sets the phrase slop for this query. + + + + + Implements search over a set of Searchables. + +

Applications usually need only call the inherited + or methods. +

+
+ + Creates a searcher which searches searchers. + + + Return the array of s this searches. + + + Returns index of the searcher for document n in the array + used to construct this searcher. + + + + Returns the document number of document n within its + sub-index. + + + + + + + Create weight in multiple index scenario. + + Distributed query processing is done in the following steps: + 1. rewrite query + 2. extract necessary terms + 3. collect dfs for these terms from the Searchables + 4. create query weight using aggregate dfs. + 5. distribute that weight to Searchables + 6. merge results + + Steps 1-4 are done here, 5+6 in the search() methods + + + rewritten queries + + + + Document Frequency cache acting as a Dummy-Searcher. This class is no + full-fledged Searcher, but only supports the methods necessary to + initialize Weights. + + + + A wrapper for , that exposes its + functionality as a . +

+ MultiTermQueryWrapperFilter is not designed to + be used by itself. Normally you subclass it to provide a Filter + counterpart for a subclass. +

+ For example, and extend + MultiTermQueryWrapperFilter. + This class also provides the functionality behind + ; + this is why it is not abstract. +

+
+ + Wrap a as a Filter. + + + Expert: Resets the counting of unique terms. + Do this before executing the filter. + + + + + + Expert: Return the number of unique terms visited during execution of the filter. + If there are many of them, you may consider using another filter type + or optimize your total term count in index. +

This method is not thread safe, be sure to only call it when no filter is running! + If you re-use the same filter instance for another + search, be sure to first reset the term counter + with . +

+ + +
+ + A that only accepts numeric values within + a specified range. To use this, you must first index the + numeric values using (expert: + ). + +

You create a new NumericRangeFilter with the static + factory methods, eg: + + + Filter f = NumericRangeFilter.newFloatRange("weight", + new Float(0.3f), new Float(0.10f), + true, true); + + + accepts all documents whose float valued "weight" field + ranges from 0.3 to 0.10, inclusive. + See for details on how Lucene + indexes and searches numeric valued fields. + +

NOTE: This API is experimental and + might change in incompatible ways in the next + release. + +

+ 2.9 + + +
+ + Returns the field name for this filter + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the lower value of this range filter + + + Returns the upper value of this range filter + + + Factory that creates a NumericRangeFilter, that filters a long + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a long + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a int + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a int + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a double + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a double + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a float + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a float + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + +

A that matches numeric values within a + specified range. To use this, you must first index the + numeric values using (expert: + ). If your terms are instead textual, + you should use . + is the filter equivalent of this + query.

+ +

You create a new NumericRangeQuery with the static + factory methods, eg: + + + Query q = NumericRangeQuery.newFloatRange("weight", + new Float(0.3f), new Float(0.10f), + true, true); + + + matches all documents whose float valued "weight" field + ranges from 0.3 to 0.10, inclusive. + +

The performance of NumericRangeQuery is much better + than the corresponding because the + number of terms that must be searched is usually far + fewer, thanks to trie indexing, described below.

+ +

You can optionally specify a precisionStep + when creating this query. This is necessary if you've + changed this configuration from its default (4) during + indexing. Lower values consume more disk space but speed + up searching. Suitable values are between 1 and + 8. A good starting point to test is 4, + which is the default value for all Numeric* + classes. See below for + details. + +

This query defaults to + for + 32 bit (int/float) ranges with precisionStep <8 and 64 + bit (long/double) ranges with precisionStep <6. + Otherwise it uses + as the + number of terms is likely to be high. With precision + steps of <4, this query can be run with one of the + BooleanQuery rewrite methods without changing + BooleanQuery's default max clause count. + +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + +

How it works

+ +

See the publication about panFMP, + where this algorithm was described (referred to as TrieRangeQuery): + +

Schindler, U, Diepenbroek, M, 2008. + Generic XML-based Framework for Metadata Portals. + Computers & Geosciences 34 (12), 1947-1955. + doi:10.1016/j.cageo.2008.02.023
+ +

A quote from this paper: Because Apache Lucene is a full-text + search engine and not a conventional database, it cannot handle numerical ranges + (e.g., field value is inside user defined bounds, even dates are numerical values). + We have developed an extension to Apache Lucene that stores + the numerical values in a special string-encoded format with variable precision + (all numerical values like doubles, longs, floats, and ints are converted to + lexicographic sortable string representations and stored with different precisions + (for a more detailed description of how the values are stored, + see ). A range is then divided recursively into multiple intervals for searching: + The center of the range is searched only with the lowest possible precision in the trie, + while the boundaries are matched more exactly. This reduces the number of terms dramatically.

+ +

For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that + uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the + lowest precision. Overall, a range could consist of a theoretical maximum of + 7*255*2 + 255 = 3825 distinct terms (when there is a term for every distinct value of an + 8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used + because it would always be possible to reduce the full 256 values to one term with degraded precision). + In practice, we have seen up to 300 terms in most cases (index with 500,000 metadata records + and a uniform value distribution).

+ +

Precision Step

+

You can choose any precisionStep when encoding values. + Lower step values mean more precisions and so more terms in index (and index gets larger). + On the other hand, the maximum number of terms to match reduces, which optimized query speed. + The formula to calculate the maximum term count is: + + n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 ) + +

(this formula is only correct, when bitsPerValue/precisionStep is an integer; + in other cases, the value must be rounded up and the last summand must contain the modulo of the division as + precision step). + For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465, and for a precision + step of 2, n = 31*3*2 + 3 = 189. But the faster search speed is reduced by more seeking + in the term enum of the index. Because of this, the ideal precisionStep value can only + be found out by testing. Important: You can index with a lower precision step value and test search speed + using a multiple of the original step value.

+ +

Good values for precisionStep are depending on usage and data type: + + The default for all data types is 4, which is used, when no precisionStep is given. + Ideal value in most cases for 64 bit data types (long, double) is 6 or 8. + Ideal value in most cases for 32 bit data types (int, float) is 4. + Steps >64 for long/double and >32 for int/float produces one token + per value in the index and querying is as slow as a conventional . But it can be used + to produce fields, that are solely used for sorting (in this case simply use as + precisionStep). Using NumericFields for sorting + is ideal, because building the field cache is much faster than with text-only numbers. + Sorting is also possible with range query optimized fields using one of the above precisionSteps. + + +

Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed + that in boolean rewrite mode (with raised clause count) + took about 30-40 secs to complete, in constant score filter rewrite mode took 5 secs + and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit + precision step). This query type was developed for a geographic portal, where the performance for + e.g. bounding boxes or exact date/time stamps is important.

+ +

+ 2.9 + + +
+ + Returns the field name for this query + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the lower value of this range query + + + Returns the upper value of this range query + + + Subclass of FilteredTermEnum for enumerating all terms that match the + sub-ranges for trie range queries. +

+ WARNING: This term enumeration is not guaranteed to be always ordered by + . + The ordering depends on how and + generates the sub-ranges. For + ordering is not relevant. +

+
+ + this is a dummy, it is not used by this class. + + + this is a dummy, it is not used by this class. + + + Compares if current upper bound is reached, + this also updates the term count for statistics. + In contrast to , a return value + of false ends iterating the current enum + and forwards to the next sub-range. + + + + Increments the enumeration to the next element. True if one exists. + + + Closes the enumeration to further activity, freeing resources. + + + Expert: Callback for . + You need to overwrite only one of the methods. +

NOTE: This is a very low-level interface, + the method signatures may change in later versions. +

+
+ + This is a helper class to generate prefix-encoded representations for numerical values + and supplies converters to represent float/double values as sortable integers/longs. + +

To quickly execute range queries in Apache Lucene, a range is divided recursively + into multiple intervals for searching: The center of the range is searched only with + the lowest possible precision in the trie, while the boundaries are matched + more exactly. This reduces the number of terms dramatically. + +

This class generates terms to achive this: First the numerical integer values need to + be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned + and the bits are converted to ASCII chars with each 7 bit. The resulting string is + sortable like the original integer value. Each value is also prefixed + (in the first char) by the shift value (number of bits removed) used + during encoding. + +

To also index floating point numbers, this class supplies two methods to convert them + to integer values by changing their bit layout: , + . You will have no precision loss by + converting floating point numbers to integers and back (only that the integer form + is not usable). Other data types like dates can easily converted to longs or ints (e.g. + date to long: ). + +

For easy usage, the trie algorithm is implemented for indexing inside + that can index int, long, + float, and double. For querying, + and implement the query part + for the same data types. + +

This class can also be used, to generate lexicographically sortable (according + ) representations of numeric data types for other + usages (e.g. sorting). + +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + +

+ 2.9 + +
+ + The default precision step used by , , + , and as default + + + + Expert: The maximum term length (used for char[] buffer size) + for encoding long values. + + + + + + Expert: The maximum term length (used for char[] buffer size) + for encoding int values. + + + + + + Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is + stored as SHIFT_START_LONG+shift in the first character + + + + Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is + stored as SHIFT_START_INT+shift in the first character + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by . + + the numeric value + + how many bits to strip from the right + + that will contain the encoded chars, must be at least of + length + + number of chars written to buffer + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by . + + the numeric value + + how many bits to strip from the right + + + + This is a convenience method, that returns prefix coded bits of a long without + reducing the precision. It can be used to store the full precision value as a + stored field in index. +

To decode, use . +

+
+ + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by . + + the numeric value + + how many bits to strip from the right + + that will contain the encoded chars, must be at least of + length + + number of chars written to buffer + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by . + + the numeric value + + how many bits to strip from the right + + + + This is a convenience method, that returns prefix coded bits of an int without + reducing the precision. It can be used to store the full precision value as a + stored field in index. +

To decode, use . +

+
+ + Returns a long from prefixCoded characters. + Rightmost bits will be zero for lower precision codes. + This method can be used to decode e.g. a stored field. + + NumberFormatException if the supplied string is + not correctly prefix encoded. + + + + + + Returns an int from prefixCoded characters. + Rightmost bits will be zero for lower precision codes. + This method can be used to decode e.g. a stored field. + + NumberFormatException if the supplied string is + not correctly prefix encoded. + + + + + + Converts a double value to a sortable signed long. + The value is converted by getting their IEEE 754 floating-point "double format" + bit layout and then some bits are swapped, to be able to compare the result as long. + By this the precision is not reduced, but the value can easily used as a long. + + + + + + Convenience method: this just returns: + longToPrefixCoded(doubleToSortableLong(val)) + + + + Converts a sortable long back to a double. + + + + + Convenience method: this just returns: + sortableLongToDouble(prefixCodedToLong(val)) + + + + Converts a float value to a sortable signed int. + The value is converted by getting their IEEE 754 floating-point "float format" + bit layout and then some bits are swapped, to be able to compare the result as int. + By this the precision is not reduced, but the value can easily used as an int. + + + + + + Convenience method: this just returns: + intToPrefixCoded(floatToSortableInt(val)) + + + + Converts a sortable int back to a float. + + + + + Convenience method: this just returns: + sortableIntToFloat(prefixCodedToInt(val)) + + + + Expert: Splits a long range recursively. + You may implement a builder that adds clauses to a + for each call to its + + method. +

This method is used by . +

+
+ + Expert: Splits an int range recursively. + You may implement a builder that adds clauses to a + for each call to its + + method. +

This method is used by . +

+
+ + This helper does the splitting for both 32 and 64 bit. + + + Helper that delegates to correct range builder + + + Expert: Callback for . + You need to overwrite only one of the methods. +

NOTE: This is a very low-level interface, + the method signatures may change in later versions. +

+
+ + Overwrite this method, if you like to receive the already prefix encoded range bounds. + You can directly build classical (inclusive) range queries from them. + + + + Overwrite this method, if you like to receive the raw long range bounds. + You can use this for e.g. debugging purposes (print out range bounds). + + + + Expert: Callback for . + You need to overwrite only one of the methods. +

NOTE: This is a very low-level interface, + the method signatures may change in later versions. +

+
+ + Overwrite this method, if you like to receive the already prefix encoded range bounds. + You can directly build classical range (inclusive) queries from them. + + + + Overwrite this method, if you like to receive the raw int range bounds. + You can use this for e.g. debugging purposes (print out range bounds). + + + +
Factory that creates a NumericRangeQuery, that queries a long + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a long + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a int + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a int + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a double + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a double + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a float + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a float + range using the default precisionStep (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Implements parallel search over a set of Searchables. + +

Applications usually need only call the inherited + or methods. +

+
+ + Creates a which searches searchables. + + + + Executes each 's docFreq() in its own thread and + waits for each search to complete and merge the results back together. + + + + A search implementation which executes each + in its own thread and waits for each search to complete + and merge the results back together. + + + + A search implementation allowing sorting which spans a new thread for each + Searchable, waits for each search to complete and merges + the results back together. + + + + Lower-level search API. + +

is called for every matching document. + +

Applications should only use this if they need all of the + matching documents. The high-level search API () + is usually more efficient, as it skips + non-high-scoring hits. +

This method cannot be parallelized, because + supports no concurrent access. +

+ to match documents + + if non-null, a bitset used to eliminate some documents + + to receive hits + + TODO: parallelize this one too + +
+ + Calculate the final score as the average score of all payloads seen. +

+ Is thread safe and completely reusable. + + +

+
+ + An abstract class that defines a way for Payload*Query instances + to transform the cumulative effects of payload scores for a document. + + + for more information + +

+ This class and its derivations are experimental and subject to change + + + + + +

Calculate the score up to this point for this doc and field + The current doc + + The field + + The start position of the matching Span + + The end position of the matching Span + + The number of payloads seen so far + + The current score so far + + The score for the current payload + + The new current Score + + + + +
+ + Calculate the final score for all the payloads seen so far for this doc/field + The current doc + + The current field + + The total number of payloads seen on this document + + The raw score for those payloads + + The final score for the payloads + + + + Returns the maximum payload score seen, else 1 if there are no payloads on the doc. +

+ Is thread safe and completely reusable. + + +

+
+ + Calculates the minimum payload seen + + + + + + This class is very similar to + except that it factors + in the value of the payloads located at each of the positions where the + occurs. +

+ In order to take advantage of this, you must override + + which returns 1 by default. +

+ Payload scores are aggregated using a pluggable . + +

+ + +
+ + Matches spans which are near one another. One can specify slop, the + maximum number of intervening unmatched positions, as well as whether + matches are required to be in-order. + + + + Base class for span-based queries. + + + Expert: Returns the matches for this query in an index. Used internally + to search for spans. + + + + Returns the name of the field matched by this query. + + + Construct a SpanNearQuery. Matches spans matching a span from each + clause, with up to slop total unmatched positions between + them. * When inOrder is true, the spans from each clause + must be * ordered as in clauses. + + + + Return the clauses whose spans are matched. + + + Returns true iff o is equal to this. + + + Return the maximum number of intervening unmatched positions permitted. + + + Return true if matches are required to be in-order. + + + Expert-only. Public for use by other weight implementations + + + Public for extension only. + + + + This method is no longer an official member of + but it is needed by SpanWeight to build an explanation. + + + + By default, uses the to score the payloads, but + can be overridden to do other things. + + + The payloads + + The start position of the span being scored + + The end position of the span being scored + + + + + + + Experimental class to get set of payloads for most standard Lucene queries. + Operates like Highlighter - IndexReader should only contain doc of interest, + best to use MemoryIndex. + +

+ + WARNING: The status of the Payloads feature is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+
+ + that contains doc with payloads to extract + + + + Query should be rewritten for wild/fuzzy support. + + + + + payloads Collection + + IOException + + + This class is very similar to + except that it factors + in the value of the payload located at each of the positions where the + occurs. +

+ In order to take advantage of this, you must override + + which returns 1 by default. +

+ Payload scores are aggregated using a pluggable . + +

+
+ + Matches spans containing a term. + + + Construct a SpanTermQuery matching the named term's spans. + + + Return the term whose spans are matched. + + + + * + + IOException + + + Returns the SpanScorer score only. +

+ Should not be overriden without good cause! + +

+ the score for just the Span part w/o the payload + + IOException + + + + +
+ + The score for the payload + + + The score, as calculated by + + + + + Position of a term in a document that takes into account the term offset within the phrase. + + + Go to next location of this term current document, and set + position as location - offset, so that a + matching exact phrase is easily identified when all PhrasePositions + have exactly the same position. + + + + A Query that matches documents containing a particular sequence of terms. + A PhraseQuery is built by QueryParser for input like "new york". + +

This query may be combined with other terms or queries with a . +

+
+ + Constructs an empty phrase query. + + + Adds a term to the end of the query phrase. + The relative position of the term is the one immediately after the last term added. + + + + Adds a term to the end of the query phrase. + The relative position of the term within the phrase is specified explicitly. + This allows e.g. phrases with more than one term at the same position + or phrases with gaps (e.g. in connection with stopwords). + + + + + + + + + Returns the set of terms in this phrase. + + + Returns the relative positions of terms in this phrase. + + + + + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Sets the number of other words permitted between words in query phrase. + If zero, then this is an exact phrase search. For larger values this works + like a WITHIN or NEAR operator. +

The slop is in fact an edit-distance, where the units correspond to + moves of terms in the query phrase out of position. For example, to switch + the order of two words requires two moves (the first move places the words + atop one another), so to permit re-orderings of phrases, the slop must be + at least two. +

More exact matches are scored higher than sloppier matches, thus search + results are sorted by exactness. +

The slop is zero by default, requiring exact matches. +

+
+ + A implementation which wraps another + and makes sure only documents with + scores > 0 are collected. + + + + A Filter that restricts search results to values that have a matching prefix in a given + field. + + + + Prints a user-readable version of this query. + + + A Query that matches documents containing terms with a specified prefix. A PrefixQuery + is built by QueryParser for input like app*. + +

This query uses the + + rewrite method. +

+
+ + Constructs a query for terms starting with prefix. + + + Prints a user-readable version of this query. + + + Returns the prefix of this query. + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified prefix filter term. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. + +

+
+ + + + + + + + + The original list of terms from the query, can contain duplicates + + + + Constrains search results to only match those which also match a provided + query. + +

This could be used, for example, with a on a suitably + formatted date field to implement date filtering. One could re-use a single + QueryFilter that matches, e.g., only documents modified within the last + week. The QueryFilter and TermRangeQuery would only need to be reconstructed + once per day. + +

+ $Id:$ + +
+ + Constructs a filter which only matches documents matching + query. + + + + A Scorer for queries with a required subscorer + and an excluding (prohibited) sub DocIdSetIterator. +
+ This Scorer implements , + and it uses the skipTo() on the given scorers. +
+
+ + Construct a ReqExclScorer. + The scorer that must match, except where + + indicates exclusion. + + + + Advance to non excluded doc. +
On entry: + + reqScorer != null, + exclScorer != null, + reqScorer was advanced once via next() or skipTo() + and reqScorer.doc() may still be excluded. + + Advances reqScorer a non excluded required doc, if any. +
+ true iff there is a non excluded required doc. + +
+ + Returns the score of the current document matching the query. + Initially invalid, until is called the first time. + + The score of the required scorer. + + + + A Scorer for queries with a required part and an optional part. + Delays skipTo() on the optional part until a score() is needed. +
+ This Scorer implements . +
+
+ + The scorers passed from the constructor. + These are set to null as soon as their next() or skipTo() returns false. + + + + Construct a ReqOptScorer. + The required scorer. This must match. + + The optional scorer. This is used for scoring only. + + + + Returns the score of the current document matching the query. + Initially invalid, until is called the first time. + + The score of the required scorer, eventually increased by the score + of the optional scorer when it also matches the current document. + + + + A which wraps another scorer and caches the score of the + current document. Successive calls to will return the same + result and will not invoke the wrapped Scorer's score() method, unless the + current document has changed.
+ This class might be useful due to the changes done to the + interface, in which the score is not computed for a document by default, only + if the collector requests it. Some collectors may need to use the score in + several places, however all they have in hand is a object, and + might end up computing the score of a document more than once. +
+
+ + Creates a new instance by wrapping the given scorer. + + + + Subclass of FilteredTermEnum for enumerating a single term. +

+ This can be used by s that need only visit one term, + but want to preserve MultiTermQuery semantics such as + . +

+
+ + + Creates a new SingleTermEnum. +

+ After calling the constructor the enumeration is already pointing to the term, + if it exists. +

+
+ + Score a candidate doc for all slop-valid position-combinations (matches) + encountered while traversing/hopping the PhrasePositions. +
The score contribution of a match depends on the distance: +
- highest score for distance=0 (exact match). +
- score gets lower as distance gets higher. +
Example: for query "a b"~2, a document "x a b a y" can be scored twice: + once for "a b" (distance=0), and once for "b a" (distance=2). +
Possibly not all valid combinations are encountered, because for efficiency + we always propagate the least PhrasePosition. This allows to base on + PriorityQueue and move forward faster. + As result, for example, document "a b c b a" + would score differently for queries "a b c"~4 and "c b a"~4, although + they really are equivalent. + Similarly, for doc "a b c b a f g", query "c b"~2 + would get same score as "g f"~2, although "c b"~2 could be matched twice. + We may want to fix this in the future (currently not, for performance reasons). +
+
+ + Init PhrasePositions in place. + There is a one time initialization for this scorer: +
- Put in repeats[] each pp that has another pp with same position in the doc. +
- Also mark each such pp by pp.repeats = true. +
Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. + In particular, this allows to score queries with no repetitions with no overhead due to this computation. +
- Example 1 - query with no repetitions: "ho my"~2 +
- Example 2 - query with repetitions: "ho my my"~2 +
- Example 3 - query with repetitions: "my ho my"~2 +
Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection. +
+ end (max position), or -1 if any term ran out (i.e. done) + + IOException +
+ + We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences + in the query of the same word would go elsewhere in the matched doc. + + null if differ (i.e. valid) otherwise return the higher offset PhrasePositions + out of the first two PPs found to not differ. + + + + Encapsulates sort criteria for returned hits. + +

The fields used to determine sort order must be carefully chosen. + Documents must contain a single term in such a field, + and the value of the term should indicate the document's relative position in + a given sort order. The field must be indexed, but should not be tokenized, + and does not need to be stored (unless you happen to want it back with the + rest of your document data). In other words: + +

document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.NOT_ANALYZED));

+ + +

Valid Types of Values

+ +

There are four possible kinds of term values which may be put into + sorting fields: Integers, Longs, Floats, or Strings. Unless + SortField objects are specified, the type of value + in the field is determined by parsing the first term in the field. + +

Integer term values should contain only digits and an optional + preceding negative sign. Values must be base 10 and in the range + Integer.MIN_VALUE and Integer.MAX_VALUE inclusive. + Documents which should appear first in the sort + should have low value integers, later documents high values + (i.e. the documents should be numbered 1..n where + 1 is the first and n the last). + +

Long term values should contain only digits and an optional + preceding negative sign. Values must be base 10 and in the range + Long.MIN_VALUE and Long.MAX_VALUE inclusive. + Documents which should appear first in the sort + should have low value integers, later documents high values. + +

Float term values should conform to values accepted by + (except that NaN + and Infinity are not supported). + Documents which should appear first in the sort + should have low values, later documents high values. + +

String term values can contain any valid String, but should + not be tokenized. The values are sorted according to their + natural order. Note that using this type + of term value has higher memory requirements than the other + two types. + +

Object Reuse

+ +

One of these objects can be + used multiple times and the sort order changed between usages. + +

This class is thread safe. + +

Memory Usage

+ +

Sorting uses of caches of term values maintained by the + internal HitQueue(s). The cache is static and contains an integer + or float array of length IndexReader.MaxDoc for each field + name for which a sort is performed. In other words, the size of the + cache in bytes is: + +

4 * IndexReader.MaxDoc * (# of different fields actually used to sort) + +

For String fields, the cache is larger: in addition to the + above array, the value of every term in the field is kept in memory. + If there are many unique terms in the field, this could + be quite large. + +

Note that the size of the cache is not affected by how many + fields are in the index and might be used to sort - only by + the ones actually used to sort a result set. + +

Created: Feb 12, 2004 10:53:57 AM + +

+
+ + Represents sorting by computed relevance. Using this sort criteria returns + the same results as calling + Searcher#search()without a sort criteria, + only with slightly more overhead. + + + + Represents sorting by index order. + + + Sorts by computed relevance. This is the same sort criteria as calling + without a sort criteria, + only with slightly more overhead. + + + + Sorts by the criteria in the given SortField. + + + Sorts in succession by the criteria in each SortField. + + + Sets the sort to the given criteria. + + + Sets the sort to the given criteria in succession. + + + Representation of the sort criteria. + Array of SortField objects used in this sort criteria + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Stores information about how to sort documents by terms in an individual + field. Fields must be indexed in order to sort by them. + +

Created: Feb 11, 2004 1:25:29 PM +

+ +
+ + Sort by document score (relevancy). Sort values are Float and higher + values are at the front. + + + + Sort by document number (index order). Sort values are Integer and lower + values are at the front. + + + + Sort using term values as Strings. Sort values are String and lower + values are at the front. + + + + Sort using term values as encoded Integers. Sort values are Integer and + lower values are at the front. + + + + Sort using term values as encoded Floats. Sort values are Float and + lower values are at the front. + + + + Sort using term values as encoded Longs. Sort values are Long and + lower values are at the front. + + + + Sort using term values as encoded Doubles. Sort values are Double and + lower values are at the front. + + + + Sort using term values as encoded Shorts. Sort values are Short and + lower values are at the front. + + + + Sort using a custom Comparator. Sort values are any Comparable and + sorting is done according to natural order. + + + + Sort using term values as encoded Bytes. Sort values are Byte and + lower values are at the front. + + + + Sort using term values as Strings, but comparing by + value (using String.compareTo) for all comparisons. + This is typically slower than , which + uses ordinals to do the sorting. + + + + Represents sorting by document score (relevancy). + + + Represents sorting by document number (index order). + + + Creates a sort by terms in the given field with the type of term + values explicitly given. + + Name of field to sort by. Can be null if + type is SCORE or DOC. + + Type of values in the terms. + + + + Creates a sort, possibly in reverse, by terms in the given field with the + type of term values explicitly given. + + Name of field to sort by. Can be null if + type is SCORE or DOC. + + Type of values in the terms. + + True if natural order should be reversed. + + + + Creates a sort by terms in the given field, parsed + to numeric values using a custom . + + Name of field to sort by. Must not be null. + + Instance of a , + which must subclass one of the existing numeric + parsers from . Sort type is inferred + by testing which numeric parser the parser subclasses. + + IllegalArgumentException if the parser fails to + subclass an existing numeric parser, or field is null + + + + Creates a sort, possibly in reverse, by terms in the given field, parsed + to numeric values using a custom . + + Name of field to sort by. Must not be null. + + Instance of a , + which must subclass one of the existing numeric + parsers from . Sort type is inferred + by testing which numeric parser the parser subclasses. + + True if natural order should be reversed. + + IllegalArgumentException if the parser fails to + subclass an existing numeric parser, or field is null + + + + Creates a sort by terms in the given field sorted + according to the given locale. + + Name of field to sort by, cannot be null. + + Locale of values in the field. + + + + Creates a sort, possibly in reverse, by terms in the given field sorted + according to the given locale. + + Name of field to sort by, cannot be null. + + Locale of values in the field. + + + + Creates a sort with a custom comparison function. + Name of field to sort by; cannot be null. + + Returns a comparator for sorting hits. + + + + Creates a sort, possibly in reverse, with a custom comparison function. + Name of field to sort by; cannot be null. + + Returns a comparator for sorting hits. + + True if natural order should be reversed. + + + + Returns true if o is equal to this. If a + or + was provided, it must properly + implement equals (unless a singleton is always used). + + + + Returns true if o is equal to this. If a + (deprecated) or + was provided, it must properly + implement hashCode (unless a singleton is always + used). + + + + Returns the to use for + sorting. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + number of top hits the queue will store + + position of this SortField within + . The comparator is primary if sortPos==0, + secondary if sortPos==1, etc. Some comparators can + optimize themselves when they are the primary sort. + + to use when sorting + + + + Returns the name of the field. Could return null + if the sort is by SCORE or DOC. + + Name of field, possibly <c>null</c>. + + + Returns the type of contents in the field. + One of the constants SCORE, DOC, STRING, INT or FLOAT. + + + Returns the Locale by which term values are interpreted. + May return null if no Locale was specified. + + Locale, or <c>null</c>. + + + Returns the instance of a parser that fits to the given sort type. + May return null if no parser was specified. Sorting is using the default parser then. + + An instance of a <see cref="FieldCache" /> parser, or <c>null</c>. + + + Returns whether the sort should be reversed. + True if natural order should be reversed. + + + + Returns the used for + custom sorting + + + + The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery + +

+ NOTE: This API is still experimental and subject to change. +

+
+ + + The DocIdSet for the Filter + + A List of objects + + + + The first entry in the array corresponds to the first "on" bit. + Entries are increasing by document order + + A List of PositionInfo objects + + + Returns the docIdSet + + + + A List of <see cref="Lucene.Net.Search.SpanFilterResult.StartEnd" /> objects + + + + The end position of this match + + + The Start position + The start position of this match + + + Constrains search results to only match those which also match a provided + query. Also provides position information about where each document matches + at the cost of extra space compared with the QueryWrapperFilter. + There is an added cost to this above what is stored in a . Namely, + the position information for each matching document is stored. +

+ This filter does not cache. See the for a wrapper that + caches. + + +

+ $Id:$ + +
+ + Constructs a filter which only matches documents matching + query. + + The to use as the basis for the Filter. + + + +

Wrapper to allow objects participate in composite + single-field SpanQueries by 'lying' about their search field. That is, + the masked SpanQuery will function as normal, + but simply hands back the value supplied + in this class's constructor.

+ +

This can be used to support Queries like or + across different fields, which is not ordinarily + permitted.

+ +

This can be useful for denormalized relational data: for example, when + indexing a document with conceptually many 'children':

+ +

+            teacherid: 1
+            studentfirstname: james
+            studentsurname: jones
+            
+            teacherid: 2
+            studenfirstname: james
+            studentsurname: smith
+            studentfirstname: sally
+            studentsurname: jones
+            
+ +

a SpanNearQuery with a slop of 0 can be applied across two + objects as follows: + + SpanQuery q1 = new SpanTermQuery(new Term("studentfirstname", "james")); + SpanQuery q2 = new SpanTermQuery(new Term("studentsurname", "jones")); + SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname"); + Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false); + + to search for 'studentfirstname:james studentsurname:jones' and find + teacherid 1 without matching teacherid 2 (which has a 'james' in position 0 + and 'jones' in position 1).

+ +

Note: as returns the masked field, scoring will be + done using the norms of the field name supplied. This may lead to unexpected + scoring behaviour.

+

+
+ + A Spans that is formed from the ordered subspans of a SpanNearQuery + where the subspans do not overlap and have a maximum slop between them. +

+ The formed spans only contains minimum slop matches.
+ The matching slop is computed from the distance(s) between + the non overlapping matching Spans.
+ Successive matches are always formed from the successive Spans + of the SpanNearQuery. +

+ The formed spans may contain overlaps when the slop is at least 1. + For example, when querying using + t1 t2 t3 + with slop at least 1, the fragment: + t1 t2 t1 t3 t2 t3 + matches twice: + t1 t2 .. t3 + t1 .. t2 t3 + + + Expert: + Only public for subclassing. Most implementations should not need this class +

+
+ + Expert: an enumeration of span matches. Used to implement span searching. + Each span represents a range of term positions within a document. Matches + are enumerated in order, by increasing document number, within that by + increasing start position and finally by increasing end position. + + + + Move to the next match, returning true iff any such exists. + + + Skips to the first match beyond the current, whose document number is + greater than or equal to target.

Returns true iff there is such + a match.

Behaves as if written: + boolean skipTo(int target) { + do { + if (!next()) + return false; + } while (target > doc()); + return true; + } + + Most implementations are considerably more efficient than that. +

+
+ + Returns the document number of the current match. Initially invalid. + + + Returns the start position of the current match. Initially invalid. + + + Returns the end position of the current match. Initially invalid. + + + Returns the payload data for the current span. + This is invalid until is called for + the first time. + This method must not be called more than once after each call + of . However, most payloads are loaded lazily, + so if the payload data for the current position is not needed, + this method may not be called at all for performance reasons. An ordered + SpanQuery does not lazy load, so if you have payloads in your index and + you do not want ordered SpanNearQuerys to collect payloads, you can + disable collection with a constructor option.
+ + Note that the return type is a collection, thus the ordering should not be relied upon. +
+

+ WARNING: The status of the Payloads feature is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case.

+ +

+ a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false + java.io.IOException +
+ + Checks if a payload can be loaded at this position. +

+ Payloads can only be loaded once per call to + . + +

+ true if there is a payload available at this position that can be loaded +
+ + The spans in the same order as the SpanNearQuery + + + Indicates that all subSpans have same doc() + + + Advances the subSpans to just after an ordered match with a minimum slop + that is smaller than the slop allowed by the SpanNearQuery. + + true iff there is such a match. + + + + Advance the subSpans to the same document + + + Check whether two Spans in the same document are ordered. + + + + + true iff spans1 starts before spans2 + or the spans start at the same position, + and spans1 ends before spans2. + + + + Like , but use the spans + starts and ends as parameters. + + + + Order the subSpans within the same document by advancing all later spans + after the previous one. + + + + The subSpans are ordered in the same doc, so there is a possible match. + Compute the slop while making the match as short as possible by advancing + all subSpans except the last one in reverse order. + + + + Similar to , but for the unordered case. + + Expert: + Only public for subclassing. Most implementations should not need this class + + + + WARNING: The List is not necessarily in order of the the positions + Collection of &lt;c&gt;byte[]&lt;/c&gt; payloads + IOException + + + Wraps a Spans, and can be used to form a linked list. + + + Matches spans near the beginning of a field. + + + Construct a SpanFirstQuery matching spans in match whose end + position is less than or equal to end. + + + + Return the SpanQuery whose matches are filtered. + + + Return the maximum end position permitted in a match. + + + Removes matches which overlap with another SpanQuery. + + + Construct a SpanNotQuery matching spans from include which + have no overlap with spans from exclude. + + + + Returns true iff o is equal to this. + + + Return the SpanQuery whose matches are filtered. + + + Return the SpanQuery whose matches must not overlap those returned. + + + Matches the union of its clauses. + + + Construct a SpanOrQuery merging the provided clauses. + + + Return the clauses whose spans are matched. + + + Expert: + Public for extension only + + + + A Query that matches documents containing a term. + This may be combined with other terms with a . + + + + Constructs a query for the term t. + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Returns the term of this query. + + + A Filter that restricts search results to a range of values in a given + field. + +

This filter matches the documents looking for terms that fall into the + supplied range according to . It is not intended + for numerical ranges, use instead. + +

If you construct a large number of range filters with different ranges but on the + same field, may have significantly better performance. +

+ 2.9 + +
+ + The field this range applies to + + The lower bound on this range + + The upper bound on this range + + Does this range include the lower bound? + + Does this range include the upper bound? + + IllegalArgumentException if both terms are null or if + lowerTerm is null and includeLower is true (similar for upperTerm + and includeUpper) + + + + WARNING: Using this constructor and supplying a non-null + value in the collator parameter will cause every single + index Term in the Field referenced by lowerTerm and/or upperTerm to be + examined. Depending on the number of index Terms in this Field, the + operation could be very slow. + + + + The lower bound on this range + + The upper bound on this range + + Does this range include the lower bound? + + Does this range include the upper bound? + + The collator to use when determining range inclusion; set + to null to use Unicode code point ordering instead of collation. + + IllegalArgumentException if both terms are null or if + lowerTerm is null and includeLower is true (similar for upperTerm + and includeUpper) + + + + Constructs a filter for field fieldName matching + less than or equal to upperTerm. + + + + Constructs a filter for field fieldName matching + greater than or equal to lowerTerm. + + + + Returns the field name for this filter + + + Returns the lower value of this range filter + + + Returns the upper value of this range filter + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the collator used to determine range inclusion, if any. + + + A Query that matches documents within an exclusive range of terms. + +

This query matches the documents looking for terms that fall into the + supplied range according to . It is not intended + for numerical ranges, use instead. + +

This query uses the + + rewrite method. +

+ 2.9 + +
+ + Constructs a query selecting all terms greater/equal than lowerTerm + but less/equal than upperTerm. + +

+ If an endpoint is null, it is said + to be "open". Either or both endpoints may be open. Open endpoints may not + be exclusive (you can't select all but the first or last term without + explicitly specifying the term to exclude.) + +

+ The field that holds both lower and upper terms. + + The term text at the lower end of the range + + The term text at the upper end of the range + + If true, the lowerTerm is + included in the range. + + If true, the upperTerm is + included in the range. + +
+ + Constructs a query selecting all terms greater/equal than + lowerTerm but less/equal than upperTerm. +

+ If an endpoint is null, it is said + to be "open". Either or both endpoints may be open. Open endpoints may not + be exclusive (you can't select all but the first or last term without + explicitly specifying the term to exclude.) +

+ If collator is not null, it will be used to decide whether + index terms are within the given range, rather than using the Unicode code + point order in which index terms are stored. +

+ WARNING: Using this constructor and supplying a non-null + value in the collator parameter will cause every single + index Term in the Field referenced by lowerTerm and/or upperTerm to be + examined. Depending on the number of index Terms in this Field, the + operation could be very slow. + +

+ + The Term text at the lower end of the range + + The Term text at the upper end of the range + + If true, the lowerTerm is + included in the range. + + If true, the upperTerm is + included in the range. + + The collator to use to collate index Terms, to determine + their membership in the range bounded by lowerTerm and + upperTerm. + +
+ + Prints a user-readable version of this query. + + + Returns the field name for this query + + + Returns the lower value of this range query + + + Returns the upper value of this range query + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the collator used to determine range inclusion, if any. + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified range parameters. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+ 2.9 + +
+ + Enumerates all terms greater/equal than lowerTerm + but less/equal than upperTerm. + + If an endpoint is null, it is said to be "open". Either or both + endpoints may be open. Open endpoints may not be exclusive + (you can't select all but the first or last term without + explicitly specifying the term to exclude.) + + + + + An interned field that holds both lower and upper terms. + + The term text at the lower end of the range + + The term text at the upper end of the range + + If true, the lowerTerm is included in the range. + + If true, the upperTerm is included in the range. + + The collator to use to collate index Terms, to determine their + membership in the range bounded by lowerTerm and + upperTerm. + + + IOException + + + Expert: A Scorer for documents matching a Term. + + + Construct a TermScorer. + + + The weight of the Term in the query. + + An iterator over the documents matching the Term. + + The Similarity implementation to be used for score + computations. + + The field norms of the document fields for the Term. + + + + Advances to the next document matching the query.
+ The iterator over the matching documents is buffered using + . + +
+ the document matching the query or -1 if there are no more documents. + +
+ + Advances to the first match beyond the current whose document number is + greater than or equal to a given target.
+ The implementation uses . + +
+ The target document number. + + the matching document or -1 if none exist. + +
+ + Returns a string representation of this TermScorer. + + + The is used to timeout search requests that + take longer than the maximum allowed search time limit. After this time is + exceeded, the search thread is stopped by throwing a + . + + + + Default timer resolution. + + + + + Default for . + + + + + Create a TimeLimitedCollector wrapper over another with a specified timeout. + the wrapped + + max time allowed for collecting hits after which is thrown + + + + Calls on the decorated + unless the allowed time has passed, in which case it throws an exception. + + + TimeExceededException + if the time allowed has exceeded. + + + + + Gets or sets the timer resolution. + The default timer resolution is 20 milliseconds. + This means that a search required to take no longer than + 800 milliseconds may be stopped after 780 to 820 milliseconds. +
Note that: + + Finer (smaller) resolution is more accurate but less efficient. + Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds. + Setting resolution smaller than current resolution might take effect only after current + resolution. (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, + then it can take up to 20 milliseconds for the change to have effect. + +
+
+ + Checks if this time limited collector is greedy in collecting the last hit. + A non greedy collector, upon a timeout, would throw a + without allowing the wrapped collector to collect current doc. A greedy one would + first allow the wrapped hit collector to collect current doc and only then + throw a . + + + + TimerThread provides a pseudo-clock service to all searching + threads, so that they can count elapsed time with less overhead + than repeatedly calling System.currentTimeMillis. A single + thread should be created to be used for all searches. + + + + Get the timer value in milliseconds. + + + Thrown when elapsed search time exceeds allowed search time. + + + Returns allowed time (milliseconds). + + + Returns elapsed time (milliseconds). + + + Returns last doc(absolute doc id) that was collected when the search time exceeded. + + + Represents hits returned by + and + + + + Constructs a TopDocs with a default maxScore=Float.NaN. + + + + + + The total number of hits for the query. + + + The top hits for the query. + + + + Gets or sets the maximum score value encountered, needed for normalizing. + Note that in case scores are not tracked, this returns . + + + + A base class for all collectors that return a output. This + collector allows easy extension by providing a single constructor which + accepts a as well as protected members for that + priority queue and a counter of the number of total hits.
+ Extending classes can override and + in order to provide their own implementation. +
+
+ + The priority queue which holds the top documents. Note that different + implementations of PriorityQueue give different meaning to 'top documents'. + HitQueue for example aggregates the top scoring documents, while other PQ + implementations may hold documents sorted by other criteria. + + + + The total number of documents that the collector encountered. + + + Populates the results array with the ScoreDoc instaces. This can be + overridden in case a different ScoreDoc type should be returned. + + + + Returns a instance containing the given results. If + results is null it means there are no results to return, + either because there were 0 calls to collect() or because the arguments to + topDocs were invalid. + + + + Returns the top docs that were collected by this collector. + + + Returns the documents in the rage [start .. pq.size()) that were collected + by this collector. Note that if start >= pq.size(), an empty TopDocs is + returned.
+ This method is convenient to call if the application allways asks for the + last results, starting from the last 'page'.
+ NOTE: you cannot call this method more than once for each search + execution. If you need to call it more than once, passing each time a + different start, you should call and work + with the returned object, which will contain all the + results this search execution collected. +
+
+ + Returns the documents in the rage [start .. start+howMany) that were + collected by this collector. Note that if start >= pq.size(), an empty + TopDocs is returned, and if pq.size() - start < howMany, then only the + available documents in [start .. pq.size()) are returned.
+ This method is useful to call in case pagination of search results is + allowed by the search application, as well as it attempts to optimize the + memory used by allocating only as much as requested by howMany.
+ NOTE: you cannot call this method more than once for each search + execution. If you need to call it more than once, passing each time a + different range, you should call and work with the + returned object, which will contain all the results this + search execution collected. +
+
+ + The total number of documents that matched this query. + + + A that sorts by using + s. +

+ See the method + for instantiating a TopFieldCollector. + +

NOTE: This API is experimental and might change in + incompatible ways in the next release.

+

+
+ + Creates a new from the given + arguments. + +

NOTE: The instances returned by this method + pre-allocate a full array of length + numHits. + +

+ the sort criteria (SortFields). + + the number of results to collect. + + specifies whether the actual field values should be returned on + the results (FieldDoc). + + specifies whether document scores should be tracked and set on the + results. Note that if set to false, then the results' scores will + be set to Float.NaN. Setting this to true affects performance, as + it incurs the score computation on each competitive result. + Therefore if document scores are not required by the application, + it is recommended to set it to false. + + specifies whether the query's maxScore should be tracked and set + on the resulting . Note that if set to false, + returns Float.NaN. Setting this to + true affects performance as it incurs the score computation on + each result. Also, setting this true automatically sets + trackDocScores to true as well. + + specifies whether documents are scored in doc Id order or not by + the given in . + + a instance which will sort the results by + the sort criteria. + + IOException +
+ + + Represents hits returned by . + + + + The fields which were used to sort results by. + + + Creates one of these objects. + Total number of hits for the query. + + The top hits for the query. + + The sort criteria used to find the top hits. + + The maximum score encountered. + + + + A implementation that collects the top-scoring hits, + returning them as a . This is used by to + implement -based search. Hits are sorted by score descending + and then (when the scores are tied) docID ascending. When you create an + instance of this collector you should know in advance whether documents are + going to be collected in doc Id order or not. + +

NOTE: The values and + are not valid scores. This + collector will not properly collect hits with such + scores. +

+
+ + Creates a new given the number of hits to + collect and whether documents are scored in order by the input + to . + +

NOTE: The instances returned by this method + pre-allocate a full array of length + numHits, and fill the array with sentinel + objects. +

+
+ + Implements the wildcard search query. Supported wildcards are *, which + matches any character sequence (including the empty one), and ?, + which matches any single character. Note this query can be slow, as it + needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, + a Wildcard term should not start with one of the wildcards * or + ?. + +

This query uses the + + rewrite method. + +

+ + +
+ + Prints a user-readable version of this query. + + + Returns the pattern term. + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified wildcard filter term. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + ***************************************** + String equality with support for wildcards + ****************************************** + + + + Creates a new WildcardTermEnum. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. +

+
+ + Determines if a word matches a wildcard pattern. + Work released by Granta Design Ltd after originally being done on + company time. + + + + This exception is thrown when there is an attempt to + access something that has already been closed. + + + + Base implementation class for buffered . + + + Abstract base class for output to a file in a Directory. A random-access + output stream. Used for all Lucene index output operations. + + + + + + + + Writes a single byte. + + + + + Writes an array of bytes. + the bytes to write + + the number of bytes to write + + + + + + Writes an array of bytes. + the bytes to write + + the offset in the byte array + + the number of bytes to write + + + + + + Writes an int as four bytes. + + + + + Writes an int in a variable-length format. Writes between one and + five bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Writes a long as eight bytes. + + + + + Writes an long in a variable-length format. Writes between one and five + bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Writes a string. + + + + + Writes a sub sequence of characters from s as the old + format (modified UTF-8 encoded bytes). + + the source of the characters + + the first character in the sequence + + the number of characters in the sequence + + -- please pre-convert to utf8 bytes + instead or use + + + + Writes a sub sequence of characters from char[] as + the old format (modified UTF-8 encoded bytes). + + the source of the characters + + the first character in the sequence + + the number of characters in the sequence + + -- please pre-convert to utf8 bytes instead or use + + + + Copy numBytes bytes from input to ourself. + + + Forces any buffered output to be written. + + + Closes this stream to further operations. + + + Closes this stream to further operations. + + + Sets current position in this file, where the next write will occur. + + + + + Set the file length. By default, this method does + nothing (it's optional for a Directory to implement + it). But, certain Directory implementations (for + + can use this to inform the + underlying IO system to pre-allocate the file to the + specified size. If the length is longer than the + current file length, the bytes added to the file are + undefined. Otherwise the file is truncated. + + file length + + + + Returns the current position in this file, where the next write will + occur. + + + + + + The number of bytes in the file. + + + Writes a single byte. + + + + + Writes an array of bytes. + the bytes to write + + the number of bytes to write + + + + + + Forces any buffered output to be written. + + + Expert: implements buffer write. Writes bytes at the current position in + the output. + + the bytes to write + + the number of bytes to write + + + + Expert: implements buffer write. Writes bytes at the current position in + the output. + + the bytes to write + + the offset in the byte array + + the number of bytes to write + + + + Closes this stream to further operations. + + + Sets current position in this file, where the next write will occur. + + + + + Returns the current position in this file, where the next write will + occur. + + + + + + The number of bytes in the file. + + + Writes bytes through to a primary IndexOutput, computing + checksum as it goes. Note that you cannot use seek(). + + + + Writes bytes through to a primary IndexOutput, computing + checksum. Note that you cannot use seek(). + + + + Starts but does not complete the commit of this file (= + writing of the final checksum at the end). After this + is called must call and the + to complete the commit. + + + + See + + + Expert: A Directory instance that switches files between + two other Directory instances. +

Files with the specified extensions are placed in the + primary directory; others are placed in the secondary + directory. The provided Set must not change once passed + to this class, and must allow multiple threads to call + contains at once.

+ +

NOTE: this API is new and experimental and is + subject to suddenly change in the next release. +

+
+ + Utility method to return a file's extension. + + + Return the primary directory + + + Return the secondary directory + + + + Base class for Directory implementations that store index + files in the file system. There are currently three core + subclasses: + + + + is a straightforward + implementation using java.io.RandomAccessFile. + However, it has poor concurrent performance + (multiple threads will bottleneck) as it + synchronizes when multiple threads read from the + same file. + + uses java.nio's + FileChannel's positional io when reading to avoid + synchronization when reading from the same file. + Unfortunately, due to a Windows-only Sun + JRE bug this is a poor choice for Windows, but + on all other platforms this is the preferred + choice. Applications using or + Future#cancel(boolean) (on Java 1.5) should use + instead. See java doc + for details. + + + + uses memory-mapped IO when + reading. This is a good choice if you have plenty + of virtual memory relative to your index size, eg + if you are running on a 64 bit JRE, or you are + running on a 32 bit JRE but your index sizes are + small enough to fit into the virtual memory space. + Java has currently the limitation of not being able to + unmap files from user code. The files are unmapped, when GC + releases the byte buffers. Due to + + this bug in Sun's JRE, MMapDirectory's + is unable to close the underlying OS file handle. Only when + GC finally collects the underlying objects, which could be + quite some time later, will the file handle be closed. + This will consume additional transient disk usage: on Windows, + attempts to delete or overwrite the files will result in an + exception; on other platforms, which typically have a "delete on + last close" semantics, while such operations will succeed, the bytes + are still consuming space on disk. For many applications this + limitation is not a problem (e.g. if you have plenty of disk space, + and you don't rely on overwriting files on Windows) but it's still + an important limitation to be aware of. This class supplies a + (possibly dangerous) workaround mentioned in the bug report, + which may fail on non-Sun JVMs. + + Applications using or + Future#cancel(boolean) (on Java 1.5) should use + instead. See + java doc for details. + + + Unfortunately, because of system peculiarities, there is + no single overall best implementation. Therefore, we've + added the method, to allow Lucene to choose + the best FSDirectory implementation given your + environment, and the known limitations of each + implementation. For users who have no reason to prefer a + specific implementation, it's best to simply use + . For all others, you should instantiate the + desired implementation directly. + +

The locking implementation is by default + , but can be changed by + passing in a custom instance. +

+
+ + Initializes the directory to create a new file with the given name. + This method should be used in . + + + + The underlying filesystem directory + + + Create a new FSDirectory for the named location (ctor for subclasses). + the path of the directory + + the lock factory to use, or null for the default + (); + + IOException + + + Creates an FSDirectory instance, trying to pick the + best implementation given the current environment. + The directory returned uses the . + +

Currently this returns as + NIOFSDirectory is currently not supported. + +

NOTE: this method may suddenly change which + implementation is returned from release to release, in + the event that higher performance defaults become + possible; if the precise implementation is important to + your application, please instantiate it directly, + instead. On 64 bit systems, it may also good to + return , but this is disabled + because of officially missing unmap support in Java. + For optimal performance you should consider using + this implementation on 64 bit JVMs. + +

See above +

+
+ + Creates an FSDirectory instance, trying to pick the + best implementation given the current environment. + The directory returned uses the . + +

Currently this returns as + NIOFSDirectory is currently not supported. + +

NOTE: this method may suddenly change which + implementation is returned from release to release, in + the event that higher performance defaults become + possible; if the precise implementation is important to + your application, please instantiate it directly, + instead. On 64 bit systems, it may also good to + return , but this is disabled + because of officially missing unmap support in Java. + For optimal performance you should consider using + this implementation on 64 bit JVMs. + +

See above +

+
+ + Just like , but allows you to + also specify a custom . + + + + Lists all files (not subdirectories) in the + directory. This method never returns null (throws + instead). + + + NoSuchDirectoryException if the directory + does not exist, or does exist but is not a + directory. + + IOException if list() returns null + + + Lists all files (not subdirectories) in the + directory. + + + + + + Returns true iff a file with the given name exists. + + + Returns the time the named file was last modified. + + + Returns the time the named file was last modified. + + + Set the modified time of an existing file to now. + + + Returns the length in bytes of a file in the directory. + + + Removes an existing file in the directory. + + + So we can do some byte-to-hexchar conversion below + + + For debug output. + + + Default read chunk size. This is a conditional + default: on 32bit JVMs, it defaults to 100 MB. On + 64bit JVMs, it's Integer.MAX_VALUE. + + + + + + The maximum number of bytes to read at once from the + underlying file during . + + + + + + Base class for file system based locking implementation. + + +

Base class for Locking implementation. uses + instances of this class to implement locking.

+ +

Note that there are some useful tools to verify that + your LockFactory is working correctly: + , , + .

+ +

+ + + + + + +
+ + Return a new Lock instance identified by lockName. + name of the lock to be created. + + + + Attempt to clear (forcefully unlock and remove) the + specified lock. Only call this at a time when you are + certain this lock is no longer in use. + + name of the lock to be cleared. + + + + Gets or sets the prefix in use for all locks created in this + LockFactory. This is normally called once, when a + Directory gets this LockFactory instance. However, you + can also call this (after this instance is assigned to + a Directory) to override the prefix in use. This + is helpful if you're running Lucene on machines that + have different mount points for the same shared + directory. + + + + Directory for the lock files. + + + Gets the lock directory. + Subclasses can use this to set the lock directory. + This method can be only called + once to initialize the lock directory. It is used by + to set the lock directory to itsself. + Subclasses can also use this method to set the directory + in the constructor. + + + + + An interprocess mutex lock. +

Typical use might look like: + new Lock.With(directory.makeLock("my.lock")) { + public Object doBody() { + ... code to execute while locked ... + } + }.run(); + +

+ +
+ + Pass this value to to try + forever to obtain the lock. + + + + How long waits, in milliseconds, + in between attempts to acquire the lock. + + + + Attempts to obtain exclusive access and immediately return + upon success or failure. + + true iff exclusive access is obtained + + + + If a lock obtain called, this failureReason may be set + with the "root cause" Exception as to why the lock was + not obtained. + + + + Attempts to obtain an exclusive lock within amount of + time given. Polls once per + (currently 1000) milliseconds until lockWaitTimeout is + passed. + + length of time to wait in + milliseconds or + to retry forever + + true if lock was obtained + + LockObtainFailedException if lock wait times out + IllegalArgumentException if lockWaitTimeout is + out of bounds + + IOException if obtain() throws IOException + + + Releases exclusive access. + + + Returns true if the resource is currently locked. Note that one must + still call before using the resource. + + + + Utility class for executing code with exclusive access. + + + Constructs an executor that will grab the named lock. + + + Code to execute with exclusive access. + + + Calls while lock is obtained. Blocks if lock + cannot be obtained immediately. Retries to obtain lock once per second + until it is obtained, or until it has tried ten times. Lock is released when + exits. + + LockObtainFailedException if lock could not + be obtained + + IOException if throws IOException + + + This exception is thrown when the write.lock + could not be acquired. This + happens when a writer tries to open an index + that another writer already has open. + + + + + + This exception is thrown when the write.lock + could not be released. + + + + + + Simple standalone tool that forever acquires & releases a + lock using a specific LockFactory. Run without any args + to see usage. + + + + + + + + + Simple standalone server that must be running when you + use . This server simply + verifies at most one process holds the lock at a time. + Run without any args to see usage. + + + + + + + + + File-based implementation that uses + mmap for reading, and + for writing. + +

NOTE: memory mapping uses up a portion of the + virtual memory address space in your process equal to the + size of the file being mapped. Before using this class, + be sure your have plenty of virtual address space, e.g. by + using a 64 bit JRE, or a 32 bit JRE with indexes that are + guaranteed to fit within the address space. + On 32 bit platforms also consult + if you have problems with mmap failing because of fragmented + address space. If you get an OutOfMemoryException, it is recommened + to reduce the chunk size, until it works. + +

Due to + this bug in Sun's JRE, MMapDirectory's + is unable to close the underlying OS file handle. Only when GC + finally collects the underlying objects, which could be quite + some time later, will the file handle be closed. + +

This will consume additional transient disk usage: on Windows, + attempts to delete or overwrite the files will result in an + exception; on other platforms, which typically have a "delete on + last close" semantics, while such operations will succeed, the bytes + are still consuming space on disk. For many applications this + limitation is not a problem (e.g. if you have plenty of disk space, + and you don't rely on overwriting files on Windows) but it's still + an important limitation to be aware of. + +

This class supplies the workaround mentioned in the bug report + (disabled by default, see ), which may fail on + non-Sun JVMs. It forcefully unmaps the buffer on close by using + an undocumented internal cleanup functionality. + is true, if the workaround + can be enabled (with no guarantees). +

+
+ + Create a new MMapDirectory for the named location. + + + the path of the directory + + the lock factory to use, or null for the default. + + IOException + + + Create a new MMapDirectory for the named location and the default lock factory. + + + the path of the directory + + IOException + + + true, if this platform supports unmapping mmaped files. + + + Try to unmap the buffer, this method silently fails if no support + for that in the JVM. On Windows, this leads to the fact, + that mmapped files cannot be modified or deleted. + + + + Creates an IndexInput for the file with the given name. + + + Creates an IndexOutput for the file with the given name. + + + Enables or disables the workaround for unmapping the buffers + from address space after closing , that is + mentioned in the bug report. This hack may fail on non-Sun JVMs. + It forcefully unmaps the buffer on close by using + an undocumented internal cleanup functionality. +

NOTE: Enabling this is completely unsupported + by Java and may lead to JVM crashs if IndexInput + is closed while another thread is still accessing it (SIGSEGV). +

+ IllegalArgumentException if + is false and the workaround cannot be enabled. + +
+ + Gets or sets the maximum chunk size (default is for + 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping. + Especially on 32 bit platform, the address space can be very fragmented, + so large index files cannot be mapped. + Using a lower chunk size makes the directory implementation a little + bit slower (as the correct chunk must be resolved on each seek) + but the chance is higher that mmap does not fail. On 64 bit + Java platforms, this parameter should always be , + as the adress space is big enough. + + + +

Implements using native OS file + locks. Note that because this LockFactory relies on + java.nio.* APIs for locking, any problems with those APIs + will cause locking to fail. Specifically, on certain NFS + environments the java.nio.* locks will fail (the lock can + incorrectly be double acquired) whereas + worked perfectly in those same + environments. For NFS based access to an index, it's + recommended that you try + first and work around the one limitation that a lock file + could be left when the JVM exits abnormally.

+ +

The primary benefit of is + that lock files will be properly removed (by the OS) if + the JVM has an abnormal exit.

+ +

Note that, unlike , the existence of + leftover lock files in the filesystem on exiting the JVM + is fine because the OS will free the locks held against + these files even though the files still remain.

+ +

If you suspect that this or any other LockFactory is + not working properly in your environment, you can easily + test it by using , + and .

+ +

+ + +
+ + Create a NativeFSLockFactory instance, with null (unset) + lock directory. When you pass this factory to a + subclass, the lock directory is automatically set to the + directory itsself. Be sure to create one instance for each directory + your create! + + + + Create a NativeFSLockFactory instance, storing lock + files into the specified lockDirName: + + + where lock files are created. + + + + Create a NativeFSLockFactory instance, storing lock + files into the specified lockDir: + + + where lock files are created. + + + + + Not implemented. Waiting for volunteers. + + + + + Not implemented. Waiting for volunteers. + + + + Use this to disable locking entirely. + Only one instance of this lock is created. You should call + to get the instance. + + + + + + + This exception is thrown when you try to list a + non-existent directory. + + + + A memory-resident implementation. Locking + implementation is by default the + but can be changed with . + + + + Constructs an empty . + + + Creates a new RAMDirectory instance from a different + Directory implementation. This can be used to load + a disk-based index into memory. +

+ This should be used only with indices that can fit into memory. +

+ Note that the resulting RAMDirectory instance is fully + independent from the original Directory (it is a + complete copy). Any subsequent changes to the + original Directory will not be visible in the + RAMDirectory instance. + +

+ a Directory value + + if an error occurs + +
+ + Returns true iff the named file exists in this directory. + + + Returns the time the named file was last modified. + IOException if the file does not exist + + + Set the modified time of an existing file to now. + IOException if the file does not exist + + + Returns the length in bytes of a file in the directory. + IOException if the file does not exist + + + Return total size in bytes of all files in this + directory. This is currently quantized to + RAMOutputStream.BUFFER_SIZE. + + + + Removes an existing file in the directory. + IOException if the file does not exist + + + Creates a new, empty file in the directory with the given name. Returns a stream writing this file. + + + Returns a stream reading an existing file. + + + Closes the store to future operations, releasing associated memory. + + + A memory-resident implementation. + + + + + A memory-resident implementation. + For lucene internal use. + + + + Construct an empty output buffer. + + + Copy the current contents of this buffer to the named output. + + + Resets this to an empty buffer. + + + Returns byte usage of all buffers. + + + A straightforward implementation of + using java.io.RandomAccessFile. However, this class has + poor concurrent performance (multiple threads will + bottleneck) as it synchronizes when multiple threads + read from the same file. It's usually better to use + or instead. + + + + Create a new SimpleFSDirectory for the named location. + + + the path of the directory + + the lock factory to use, or null for the default. + + IOException + + + Create a new SimpleFSDirectory for the named location and the default lock factory. + + + the path of the directory + + IOException + + + Creates an IndexOutput for the file with the given name. + + + Creates an IndexInput for the file with the given name. + + + IndexInput methods + + + Method used for testing. Returns true if the underlying + file descriptor is valid. + + + + output methods: + + + Random-access methods + + +

Implements using + .

+ +

NOTE: the javadocs + for File.createNewFile contain a vague + yet spooky warning about not using the API for file + locking. This warning was added due to this + bug, and in fact the only known problem with using + this API for locking is that the Lucene write lock may + not be released when the JVM exits abnormally.

+

When this happens, a + is hit when trying to create a writer, in which case you + need to explicitly clear the lock file first. You can + either manually remove the file, or use the + + API. But, first be certain that no writer is in fact + writing to the index otherwise you can easily corrupt + your index.

+ +

If you suspect that this or any other LockFactory is + not working properly in your environment, you can easily + test it by using , + and .

+ +

+ + +
+ + Create a SimpleFSLockFactory instance, with null (unset) + lock directory. When you pass this factory to a + subclass, the lock directory is automatically set to the + directory itsself. Be sure to create one instance for each directory + your create! + + + + Instantiate using the provided directory (as a File instance). + where lock files should be created. + + + + Instantiate using the provided directory name (String). + where lock files should be created. + + + + Implements for a single in-process instance, + meaning all locking will take place through this one instance. + Only use this when you are certain all + IndexReaders and IndexWriters for a given index are running + against a single shared in-process Directory instance. This is + currently the default locking for RAMDirectory. + + + + + + + A that wraps another + and verifies that each lock obtain/release + is "correct" (never results in two processes holding the + lock at the same time). It does this by contacting an + external server () to assert that + at most one process holds the lock at a time. To use + this, you should also run on the + host & port matching what you pass to the constructor. + + + + + + + + + should be a unique id across all clients + + the LockFactory that we are testing + + host or IP where + is running + + the port is + listening on + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A simple wrapper to allow for the use of the GeneralKeyedCollection. The + wrapper is required as there can be several keys for an object depending + on how many interfaces it implements. + + + + + This class provides supporting methods of java.util.BitSet + that are not present in System.Collections.BitArray. + + + + + Returns the next set bit at or after index, or -1 if no such bit exists. + + + the index of bit array at which to start checking + the next set bit or -1 + + + + Returns the next un-set bit at or after index, or -1 if no such bit exists. + + + the index of bit array at which to start checking + the next set bit or -1 + + + + Returns the number of bits set to true in this BitSet. + + The BitArray object. + The number of bits set to true in this BitSet. + + + + Mimics Java's Character class. + + + + + + + + + + + + + + + + + + + + + For Debuging purposes. + + + + + Support class used to handle Hashtable addition, which does a check + first to make sure the added item is unique in the hash. + + + + + Converts the specified collection to its string representation. + + The collection to convert to string. + A string representation of the specified collection. + + + + Compares two string arrays for equality. + + First string array list to compare + Second string array list to compare + true if the strings are equal in both arrays, false otherwise + + + + Sorts an IList collections + + The System.Collections.IList instance that will be sorted + The Comparator criteria, null to use natural comparator. + + + + Fills the array with an specific value from an specific index to an specific index. + + The array to be filled. + The first index to be filled. + The last index to be filled. + The value to fill the array with. + + + + Fills the array with an specific value. + + The array to be filled. + The value to fill the array with. + + + + Compares the entire members of one array whith the other one. + + The array to be compared. + The array to be compared with. + Returns true if the two specified arrays of Objects are equal + to one another. The two arrays are considered equal if both arrays + contain the same number of elements, and all corresponding pairs of + elements in the two arrays are equal. Two objects e1 and e2 are + considered equal if (e1==null ? e2==null : e1.equals(e2)). In other + words, the two arrays are equal if they contain the same elements in + the same order. Also, two array references are considered equal if + both are null. + + + + Summary description for TestSupportClass. + + + + + Compares two Term arrays for equality. + + First Term array to compare + Second Term array to compare + true if the Terms are equal in both arrays, false otherwise + + + + Contains conversion support elements such as classes, interfaces and static methods. + + + + + + + + + Represents a strongly typed list of objects that can be accessed by index. + Provides methods to search, sort, and manipulate lists. Also provides functionality + to compare lists against each other through an implementations of + . + The type of elements in the list. + + + Initializes a new instance of the + class that is empty and has the + default initial capacity. + + + Initializes a new instance of the + class that contains elements copied from the specified collection and has + sufficient capacity to accommodate the number of elements copied. + The collection whose elements are copied to the new list. + + + Initializes a new instance of the + class that is empty and has the specified initial capacity. + The number of elements that the new list can initially store. + + + Adds a range of objects represented by the + implementation. + The + implementation to add to this list. + + + Compares the counts of two + implementations. + This uses a trick in LINQ, sniffing types for implementations + of interfaces that might supply shortcuts when trying to make comparisons. + In this case, that is the and + interfaces, either of which can provide a count + which can be used in determining the equality of sequences (if they don't have + the same count, then they can't be equal). + The from the left hand side of the + comparison to check the count of. + The from the right hand side of the + comparison to check the count of. + Null if the result is indeterminate. This occurs when either + or doesn't implement or . + Otherwise, it will get the count from each and return true if they are equal, false otherwise. + + + Compares the contents of a + implementation to another one to determine equality. + Thinking of the implementation as + a string with any number of characters, the algorithm checks + each item in each list. If any item of the list is not equal (or + one list contains all the elements of another list), then that list + element is compared to the other list element to see which + list is greater. + The implementation + that is considered the left hand side. + The implementation + that is considered the right hand side. + True if the items are equal, false otherwise. + + + Compares this sequence to another + implementation, returning true if they are equal, false otherwise. + The other implementation + to compare against. + True if the sequence in + is the same as this one. + + + Compares this object for equality against other. + The other object to compare this object against. + True if this object and are equal, false + otherwise. + + + Gets the hash code for the list. + The hash code value. + + + Gets the hash code for the list. + The + implementation which will have all the contents hashed. + The hash code value. + + + Clones the . + This is a shallow clone. + A new shallow clone of this + . + + + + Represents the methods to support some operations over files. + + + + + Returns an array of abstract pathnames representing the files and directories of the specified path. + + The abstract pathname to list it childs. + An array of abstract pathnames childs of the path specified or null if the path is not a directory + + + + Returns a list of files in a give directory. + + The full path name to the directory. + + An array containing the files. + + + + Flushes the specified file stream. Ensures that all buffered + data is actually written to the file system. + + The file stream. + + + A collection of which can be + looked up by instances of . + The type of the items contains in this + collection. + The type of the keys that can be used to look + up the items. + + + Creates a new instance of the + class. + The which will convert + instances of to + when the override of is called. + + + The which will convert + instances of to + when the override of is called. + + + Converts an item that is added to the collection to + a key. + The instance of + to convert into an instance of . + The instance of which is the + key for this item. + + + Determines if a key for an item exists in this + collection. + The instance of + to see if it exists in this collection. + True if the key exists in the collection, false otherwise. + + + + A C# emulation of the Java Hashmap + + A is a close equivalent to the Java + Hashmap. One difference java implementation of the class is that + the Hashmap supports both null keys and values, where the C# Dictionary + only supports null values not keys. Also, V Get(TKey) + method in Java returns null if the key doesn't exist, instead of throwing + an exception. This implementation doesn't throw an exception when a key + doesn't exist, it will return null. This class is slower than using a + , because of extra checks that have to be + done on each access, to check for null. + + + NOTE: This class works best with nullable types. default(T) is returned + when a key doesn't exist in the collection (this being similar to how Java returns + null). Therefore, if the expected behavior of the java code is to execute code + based on if the key exists, when the key is an integer type, it will return 0 instead of null. + + + Consider also implementing IDictionary, IEnumerable, and ICollection + like does, so HashMap can be + used in substituted in place for the same interfaces it implements. + + + The type of keys in the dictionary + The type of values in the dictionary + + + + Wraps a dictionary and adds the value + represented by the null key + + + + + Wraps a dictionary's collection, adding in a + null key. + + + + + A simple class for number conversions. + + + + + Min radix value. + + + + + Max radix value. + + + + + Converts a number to System.String. + + + + + + + Converts a number to System.String. + + + + + + + Converts a number to System.String in the specified radix. + + A number to be converted. + A radix. + A System.String representation of the number in the specified redix. + + + + Parses a number in the specified radix. + + An input System.String. + A radix. + The parsed number in the specified radix. + + + + Performs an unsigned bitwise right shift with the specified number + + Number to operate on + Ammount of bits to shift + The resulting number from the shift operation + + + + Performs an unsigned bitwise right shift with the specified number + + Number to operate on + Ammount of bits to shift + The resulting number from the shift operation + + + + Returns the index of the first bit that is set to true that occurs + on or after the specified starting index. If no such bit exists + then -1 is returned. + + The BitArray object. + The index to start checking from (inclusive). + The index of the next set bit. + + + + Converts a System.String number to long. + + + + + + + Provides platform infos. + + + + + Whether we run under a Unix platform. + + + + + Whether we run under a supported Windows platform. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Copies an array of chars obtained from a String into a specified array of chars + + The String to get the chars from + Position of the String to start getting the chars + Position of the String to end getting the chars + Array to return the chars + Position of the destination array of chars to start storing the chars + An array of chars + + + + Abstract base class that provides a synchronization interface + for derived lock types + + + + + A ThreadLock class that actually does no locking + Used in ParallelMultiSearcher/MultiSearcher + + + + + Wrapper class for the Monitor Enter/Exit methods + using the interface + + + + + A weak reference wrapper for the hashtable keys. Whenever a key\value pair + is added to the hashtable, the key is wrapped using a WeakKey. WeakKey saves the + value of the original object hashcode for fast comparison. + + + + Methods for manipulating arrays. + + + Parses the string argument as if it was an int value and returns the + result. Throws NumberFormatException if the string does not represent an + int quantity. + + + a string representation of an int quantity. + + int the value represented by the argument + + NumberFormatException if the argument could not be parsed as an int quantity. + + + Parses a char array into an int. + the character array + + The offset into the array + + The length + + the int + + NumberFormatException if it can't parse + + + Parses the string argument as if it was an int value and returns the + result. Throws NumberFormatException if the string does not represent an + int quantity. The second argument specifies the radix to use when parsing + the value. + + + a string representation of an int quantity. + + + + the base to use for conversion. + + int the value represented by the argument + + NumberFormatException if the argument could not be parsed as an int quantity. + + + Returns hash of chars in range start (inclusive) to + end (inclusive) + + + + Returns hash of chars in range start (inclusive) to + end (inclusive) + + + + An average, best guess, MemoryModel that should work okay on most systems. + + + + + Returns primitive memory sizes for estimating RAM usage. + + + + + a primitive Class - bool, byte, char, short, long, float, + short, double, int + + the size in bytes of given primitive Class + + + + size of array beyond contents + + + Class size overhead + + + size of reference + + + A variety of high efficiencly bit twiddling routines. + + + $Id$ + + + + Returns the number of bits set in the long + + + Returns the number of set bits in an array of longs. + + + Returns the popcount or cardinality of the two sets after an intersection. + Neither array is modified. + + + + Returns the popcount or cardinality of the union of two sets. + Neither array is modified. + + + + Returns the popcount or cardinality of A & ~B + Neither array is modified. + + + + table of number of trailing zeros in a byte + + + Returns number of trailing zeros in a 64 bit long value. + + + Returns number of trailing zeros in a 32 bit int value. + + + returns 0 based index of first set bit + (only works for x!=0) +
This is an alternate implementation of ntz() +
+
+ + returns 0 based index of first set bit +
This is an alternate implementation of ntz() +
+
+ + returns true if v is a power of two or zero + + + returns true if v is a power of two or zero + + + returns the next highest power of two, or the current value if it's already a power of two or zero + + + returns the next highest power of two, or the current value if it's already a power of two or zero + + + Optimized implementation of a vector of bits. This is more-or-less like + java.util.BitSet, but also includes the following: + + a count() method, which efficiently computes the number of one bits; + optimized read from and write to disk; + inlinable get() method; + store and load, as bit set or d-gaps, depending on sparseness; + + + + + Constructs a vector capable of holding n bits. + + + Sets the value of bit to one. + + + Sets the value of bit to true, and + returns true if bit was already set + + + + Sets the value of bit to zero. + + + Returns true if bit is one and + false if it is zero. + + + + Returns the number of bits in this vector. This is also one greater than + the number of the largest valid bit number. + + + + Returns the total number of one bits in this vector. This is efficiently + computed and cached, so that, if the vector is not changed, no + recomputation is done for repeated calls. + + + + + For testing + + + + Writes this vector to the file name in Directory + d, in a format that can be read by the constructor + . + + + + Write as a bit set + + + Write as a d-gaps list + + + Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. + + + Constructs a bit vector from the file name in Directory + d, as written by the method. + + + + Read as a bit set + + + read as a d-gaps list + + + Retrieve a subset of this BitVector. + + + starting index, inclusive + + ending index, exclusive + + subset + + + + Some useful constants. + + + The value of System.getProperty("java.version"). * + + + True iff this is Java version 1.1. + + + True iff this is Java version 1.2. + + + True iff this is Java version 1.3. + + + The value of System.getProperty("os.name"). * + + + True iff running on Linux. + + + True iff running on Windows. + + + True iff running on SunOS. + + + Simple DocIdSet and DocIdSetIterator backed by a BitSet + + + This DocIdSet implementation is cacheable. + + + Returns the underlying BitSet. + + + Provides methods for sanity checking that entries in the FieldCache + are not wasteful or inconsistent. +

+

+ Lucene 2.9 Introduced numerous enhancements into how the FieldCache + is used by the low levels of Lucene searching (for Sorting and + ValueSourceQueries) to improve both the speed for Sorting, as well + as reopening of IndexReaders. But these changes have shifted the + usage of FieldCache from "top level" IndexReaders (frequently a + MultiReader or DirectoryReader) down to the leaf level SegmentReaders. + As a result, existing applications that directly access the FieldCache + may find RAM usage increase significantly when upgrading to 2.9 or + Later. This class provides an API for these applications (or their + Unit tests) to check at run time if the FieldCache contains "insane" + usages of the FieldCache. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced and + experimental. It may be removed or altered w/o warning in future releases + of Lucene. +

+

+ + + + + + +
+ + If set, will be used to estimate size for all CacheEntry objects + dealt with. + + + + Quick and dirty convenience method + + + + + Quick and dirty convenience method that instantiates an instance with + "good defaults" and uses it to test the CacheEntrys + + + + + + Tests a CacheEntry[] for indication of "insane" cache usage. +

+ NOTE:FieldCache CreationPlaceholder objects are ignored. + (:TODO: is this a bad idea? are we masking a real problem?) +

+

+
+ + Internal helper method used by check that iterates over + valMismatchKeys and generates a Collection of Insanity + instances accordingly. The MapOfSets are used to populate + the Insantiy objects. + + + + + + Internal helper method used by check that iterates over + the keys of readerFieldToValIds and generates a Collection + of Insanity instances whenever two (or more) ReaderField instances are + found that have an ancestery relationships. + + + + + + + Checks if the seed is an IndexReader, and if so will walk + the hierarchy of subReaders building up a list of the objects + returned by obj.getFieldCacheKey() + + + + Simple pair object for using "readerKey + fieldName" a Map key + + + Simple container for a collection of related CacheEntry objects that + in conjunction with eachother represent some "insane" usage of the + FieldCache. + + + + CacheEntry objects which suggest a problem + + + Multi-Line representation of this Insanity object, starting with + the Type and Msg, followed by each CacheEntry.toString() on it's + own line prefaced by a tab character + + + + Type of insane behavior this object represents + + + Description of hte insane behavior + + + An Enumaration of the differnet types of "insane" behavior that + may be detected in a FieldCache. + + + + + + + + + + + Indicates an overlap in cache usage on a given field + in sub/super readers. + + + +

+ Indicates entries have the same reader+fieldname but + different cached values. This can happen if different datatypes, + or parsers are used -- and while it's not necessarily a bug + it's typically an indication of a possible problem. +

+

+ PNOTE: Only the reader, fieldname, and cached value are actually + tested -- if two cache entries have different parsers or datatypes but + the cached values are the same Object (== not just equal()) this method + does not consider that a red flag. This allows for subtle variations + in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) +

+

+
+ + Indicates an expected bit of "insanity". This may be useful for + clients that wish to preserve/log information about insane usage + but indicate that it was expected. + + + + + A class that mimics Java's IdentityHashMap in that it determines + object equality solely on ReferenceEquals rather than (possibly overloaded) + object.Equals(). + + NOTE: Java's documentation on IdentityHashMap says that it also uses + ReferenceEquals on it's Values as well. This class does not follow this behavior + + The type of the keys in the dictionary + The type of the values in the dictionary + + + Provides support for converting byte sequences to Strings and back again. + The resulting Strings preserve the original byte sequences' sort order. + + The Strings are constructed using a Base 8000h encoding of the original + binary data - each char of an encoded String represents a 15-bit chunk + from the byte sequence. Base 8000h was chosen because it allows for all + lower 15 bits of char to be used without restriction; the surrogate range + [U+D8000-U+DFFF] does not represent valid chars, and would require + complicated handling to avoid them and allow use of char's high bit. + + Although unset bits are used as padding in the final char, the original + byte sequence could contain trailing bytes with no set bits (null bytes): + padding is indistinguishable from valid information. To overcome this + problem, a char is appended, indicating the number of encoded bytes in the + final content char. + + This class's operations are defined over CharBuffers and ByteBuffers, to + allow for wrapped arrays to be reused, reducing memory allocation costs for + repeated operations. Note that this class calls array() and arrayOffset() + on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be + used. This class interprets the arrayOffset() and limit() values returned by + its input buffers as beginning and end+1 positions on the wrapped array, + resprectively; similarly, on the output buffer, arrayOffset() is the first + position written to, and limit() is set to one past the final output array + position. + + + + Returns the number of chars required to encode the given byte sequence. + + + The byte sequence to be encoded. Must be backed by an array. + + The number of chars required to encode the given byte sequence + + IllegalArgumentException If the given ByteBuffer is not backed by an array + + + Returns the number of bytes required to decode the given char sequence. + + + The char sequence to be encoded. Must be backed by an array. + + The number of bytes required to decode the given char sequence + + IllegalArgumentException If the given CharBuffer is not backed by an array + + + Encodes the input byte sequence into the output char sequence. Before + calling this method, ensure that the output CharBuffer has sufficient + capacity by calling . + + + The byte sequence to encode + + Where the char sequence encoding result will go. The limit + is set to one past the position of the final char. + + IllegalArgumentException If either the input or the output buffer + is not backed by an array + + + + Decodes the input char sequence into the output byte sequence. Before + calling this method, ensure that the output ByteBuffer has sufficient + capacity by calling . + + + The char sequence to decode + + Where the byte sequence decoding result will go. The limit + is set to one past the position of the final char. + + IllegalArgumentException If either the input or the output buffer + is not backed by an array + + + + Decodes the given char sequence, which must have been encoded by + or + . + + + The char sequence to decode + + A byte sequence containing the decoding result. The limit + is set to one past the position of the final char. + + IllegalArgumentException If the input buffer is not backed by an + array + + + + Encodes the input byte sequence. + + + The byte sequence to encode + + A char sequence containing the encoding result. The limit is set + to one past the position of the final char. + + IllegalArgumentException If the input buffer is not backed by an + array + + + + Helper class for keeping Listss of Objects associated with keys. WARNING: THIS CLASS IS NOT THREAD SAFE + + + the backing store for this object + + + + Adds val to the Set associated with key in the Map. If key is not + already in the map, a new Set will first be created. + + the size of the Set associated with key once val is added to it. + + + + Adds multiple vals to the Set associated with key in the Map. + If key is not + already in the map, a new Set will first be created. + + the size of the Set associated with key once val is added to it. + + + + direct access to the map backing this object. + + + An "open" BitSet implementation that allows direct access to the array of words + storing the bits. +

+ Unlike java.util.bitset, the fact that bits are packed into an array of longs + is part of the interface. This allows efficient implementation of other algorithms + by someone other than the author. It also allows one to efficiently implement + alternate serialization or interchange formats. +

+ OpenBitSet is faster than java.util.BitSet in most operations + and *much* faster at calculating cardinality of sets and results of set operations. + It can also handle sets of larger cardinality (up to 64 * 2**32-1) +

+ The goals of OpenBitSet are the fastest implementation possible, and + maximum code reuse. Extra safety and encapsulation + may always be built on top, but if that's built in, the cost can never be removed (and + hence people re-implement their own version in order to get better performance). + If you want a "safe", totally encapsulated (and slower and limited) BitSet + class, use java.util.BitSet. +

+

Performance Results

+ + Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M +
BitSet size = 1,000,000 +
Results are java.util.BitSet time divided by OpenBitSet time. + + + + + + + + + + +
cardinality intersect_count union nextSetBit get iterator
50% full 3.36 3.96 1.44 1.46 1.99 1.58
1% full 3.31 3.90   1.04   0.99
+
+ Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M +
BitSet size = 1,000,000 +
Results are java.util.BitSet time divided by OpenBitSet time. + + + + + + + + + + +
cardinality intersect_count union nextSetBit get iterator
50% full 2.50 3.50 1.00 1.03 1.12 1.25
1% full 2.51 3.49   1.00   1.02
+
+ $Id$ + +
+ + Constructs an OpenBitSet large enough to hold numBits. + + + + + + + Constructs an OpenBitSet from an existing long[]. +
+ The first 64 bits are in long[0], + with bit index 0 at the least significant bit, and bit index 63 at the most significant. + Given a bit index, + the word containing it is long[index/64], and it is at bit number index%64 within that word. +

+ numWords are the number of elements in the array that contain + set bits (non-zero longs). + numWords should be <= bits.length, and + any existing words in the array at position >= numWords should be zero. + +

+
+ + Returns the current capacity in bits (1 greater than the index of the last bit) + + + Returns the current capacity of this set. Included for + compatibility. This is *not* equal to + + + + Returns true if there are no set bits + + + Returns true or false for the specified bit index. + + + Returns true or false for the specified bit index. + The index should be less than the OpenBitSet size + + + + Returns true or false for the specified bit index + + + Returns true or false for the specified bit index. + The index should be less than the OpenBitSet size. + + + + returns 1 if the bit is set, 0 if not. + The index should be less than the OpenBitSet size + + + + sets a bit, expanding the set size if necessary + + + Sets the bit at the specified index. + The index should be less than the OpenBitSet size. + + + + Sets the bit at the specified index. + The index should be less than the OpenBitSet size. + + + + Sets a range of bits, expanding the set size if necessary + + + lower index + + one-past the last bit to set + + + + clears a bit. + The index should be less than the OpenBitSet size. + + + + clears a bit. + The index should be less than the OpenBitSet size. + + + + clears a bit, allowing access beyond the current set size without changing the size. + + + Clears a range of bits. Clearing past the end does not change the size of the set. + + + lower index + + one-past the last bit to clear + + + + Clears a range of bits. Clearing past the end does not change the size of the set. + + + lower index + + one-past the last bit to clear + + + + Sets a bit and returns the previous value. + The index should be less than the OpenBitSet size. + + + + Sets a bit and returns the previous value. + The index should be less than the OpenBitSet size. + + + + flips a bit. + The index should be less than the OpenBitSet size. + + + + flips a bit. + The index should be less than the OpenBitSet size. + + + + flips a bit, expanding the set size if necessary + + + flips a bit and returns the resulting bit value. + The index should be less than the OpenBitSet size. + + + + flips a bit and returns the resulting bit value. + The index should be less than the OpenBitSet size. + + + + Flips a range of bits, expanding the set size if necessary + + + lower index + + one-past the last bit to flip + + + + the number of set bits + + + + Returns the popcount or cardinality of the intersection of the two sets. + Neither set is modified. + + + + Returns the popcount or cardinality of the union of the two sets. + Neither set is modified. + + + + Returns the popcount or cardinality of "a and not b" + or "intersection(a, not(b))". + Neither set is modified. + + + + Returns the popcount or cardinality of the exclusive-or of the two sets. + Neither set is modified. + + + + Returns the index of the first set bit starting at the index specified. + -1 is returned if there are no more set bits. + + + + Returns the index of the first set bit starting at the index specified. + -1 is returned if there are no more set bits. + + + + this = this AND other + + + this = this OR other + + + Remove all elements set in other. this = this AND_NOT other + + + this = this XOR other + + + returns true if the sets have any elements in common + + + Expand the long[] with the size given as a number of words (64 bit longs). + getNumWords() is unchanged by this call. + + + + Ensure that the long[] is big enough to hold numBits, expanding it if necessary. + getNumWords() is unchanged by this call. + + + + Lowers numWords, the number of words in use, + by checking for trailing zero words. + + + + returns the number of 64 bit words it would take to hold numBits + + + returns true if both sets have the same bits set + + + This DocIdSet implementation is cacheable. + + + Expert: Gets or sets the long[] storing the bits + + + Expert: gets or sets the number of longs in the array that are in use + + + Construct an OpenBitSetDISI with its bits set + from the doc ids of the given DocIdSetIterator. + Also give a maximum size one larger than the largest doc id for which a + bit may ever be set on this OpenBitSetDISI. + + + + Construct an OpenBitSetDISI with no bits set, and a given maximum size + one larger than the largest doc id for which a bit may ever be set + on this OpenBitSetDISI. + + + + Perform an inplace OR with the doc ids from a given DocIdSetIterator, + setting the bit for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace AND with the doc ids from a given DocIdSetIterator, + leaving only the bits set for which the doc ids are in common. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace NOT with the doc ids from a given DocIdSetIterator, + clearing all the bits for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace XOR with the doc ids from a given DocIdSetIterator, + flipping all the bits for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + An iterator to iterate over set bits in an OpenBitSet. + This is faster than nextSetBit() for iterating over the complete set of bits, + especially when the density of the bits set is high. + + + $Id$ + + + + ** the python code that generated bitlist + def bits2int(val): + arr=0 + for shift in range(8,0,-1): + if val & 0x80: + arr = (arr << 4) | shift + val = val << 1 + return arr + def int_table(): + tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ] + return ','.join(tbl) + **** + + + + Base class for cache implementations. + + + Returns a thread-safe cache backed by the specified cache. + In order to guarantee thread-safety, all access to the backed cache must + be accomplished through the returned cache. + + + + Called by . This method + returns a instance that wraps + this instance by default and can be overridden to return + e. g. subclasses of or this + in case this cache is already synchronized. + + + + Puts a (key, value)-pair into the cache. + + + Returns the value for the given key. + + + Returns whether the given key is in this cache. + + + Closes the cache. + + + Simple Cache wrapper that synchronizes all + calls that access the cache. + + + + Simple cache implementation that uses a HashMap to store (key, value) pairs. + This cache is not synchronized, use + if needed. + + + + Returns a Set containing all keys in this cache. + + + + The maximum number of items to cache. + + + + + The list to efficiently maintain the LRU state. + + + + + The dictionary to hash into any location in the list. + + + + + The node instance to use/re-use when adding an item to the cache. + + + + + Container to hold the key and value to aid in removal from + the dictionary when an item is removed from cache. + + + + Estimates the size of a given Object using a given MemoryModel for primitive + size information. + + Resource Usage: + + Internally uses a Map to temporally hold a reference to every + object seen. + + If checkIntered, all Strings checked will be interned, but those + that were not already interned will be released for GC when the + estimate is complete. + + + + Constructs this object with an AverageGuessMemoryModel and + checkInterned = true. + + + + check if Strings are interned and don't add to size + if they are. Defaults to true but if you know the objects you are checking + won't likely contain many interned Strings, it will be faster to turn off + intern checking. + + + + MemoryModel to use for primitive object sizes. + + + + MemoryModel to use for primitive object sizes. + + check if Strings are interned and don't add to size + if they are. Defaults to true but if you know the objects you are checking + won't likely contain many interned Strings, it will be faster to turn off + intern checking. + + + + Return good default units based on byte size. + + + + Common util methods for dealing with s. + + + + Gathers sub-readers from reader into a List. + + + + + Returns sub IndexReader that contains the given document id. + + + id of document + + parent reader + + sub reader of parent which contains the specified doc id + + + + Returns sub-reader subIndex from reader. + + + parent reader + + index of desired sub reader + + the subreader at subINdex + + + + Returns index of the searcher/reader for document n in the + array used to construct this searcher/reader. + + + + A ScorerDocQueue maintains a partial ordering of its Scorers such that the + least Scorer can always be found in constant time. Put()'s and pop()'s + require log(size) time. The ordering is by Scorer.doc(). + + + + Create a ScorerDocQueue with a maximum size. + + + Adds a Scorer to a ScorerDocQueue in log(size) time. + If one tries to add more Scorers than maxSize + a RuntimeException (ArrayIndexOutOfBound) is thrown. + + + + Adds a Scorer to the ScorerDocQueue in log(size) time if either + the ScorerDocQueue is not full, or not lessThan(scorer, top()). + + + + true if scorer is added, false otherwise. + + + + Returns the least Scorer of the ScorerDocQueue in constant time. + Should not be used when the queue is empty. + + + + Returns document number of the least Scorer of the ScorerDocQueue + in constant time. + Should not be used when the queue is empty. + + + + Removes and returns the least scorer of the ScorerDocQueue in log(size) + time. + Should not be used when the queue is empty. + + + + Removes the least scorer of the ScorerDocQueue in log(size) time. + Should not be used when the queue is empty. + + + + Should be called when the scorer at top changes doc() value. + Still log(n) worst case, but it's at least twice as fast to + { pq.top().change(); pq.adjustTop(); } + instead of + { o = pq.pop(); o.change(); pq.push(o); } + + + + + Returns the number of scorers currently stored in the ScorerDocQueue. + + + Removes all entries from the ScorerDocQueue. + + + Simple lockless and memory barrier free String intern cache that is guaranteed + to return the same String instance as String.intern() does. + + + + Subclasses of StringInterner are required to + return the same single String object for all equal strings. + Depending on the implementation, this may not be + the same object returned as String.intern(). + + This StringInterner base class simply delegates to String.intern(). + + + + Returns a single object instance for each equal string. + + + Returns a single object instance for each equal string. + + + Size of the hash table, should be a power of two. + + Maximum length of each bucket, after which the oldest item inserted is dropped. + + + + Floating point numbers smaller than 32 bits. + + + $Id$ + + + + Converts a 32 bit float to an 8 bit float. +
Values less than zero are all mapped to zero. +
Values are truncated (rounded down) to the nearest 8 bit value. +
Values between zero and the smallest representable value + are rounded up. + +
+ the 32 bit float to be converted to an 8 bit float (byte) + + the number of mantissa bits to use in the byte, with the remainder to be used in the exponent + + the zero-point in the range of exponent values + + the 8 bit float representation + +
+ + Converts an 8 bit float to a 32 bit float. + + + floatToByte(b, mantissaBits=3, zeroExponent=15) +
smallest non-zero value = 5.820766E-10 +
largest value = 7.5161928E9 +
epsilon = 0.125 +
+
+ + byteToFloat(b, mantissaBits=3, zeroExponent=15) + + + floatToByte(b, mantissaBits=5, zeroExponent=2) +
smallest nonzero value = 0.033203125 +
largest value = 1984.0 +
epsilon = 0.03125 +
+
+ + byteToFloat(b, mantissaBits=5, zeroExponent=2) + + + Stores and iterate on sorted integers in compressed form in RAM.
+ The code for compressing the differences between ascending integers was + borrowed from and + .

+ NOTE: this class assumes the stored integers are doc Ids (hence why it + extends ). Therefore its assumes + can be used as sentinel. If you intent to use + this value, then make sure it's not used during search flow. +

+
+ + When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, + a SortedVIntList representing the index numbers of the set bits + will be smaller than that BitSet. + + + + Create a SortedVIntList from all elements of an array of integers. + + + A sorted array of non negative integers. + + + + Create a SortedVIntList from an array of integers. + An array of sorted non negative integers. + + The number of integers to be used from the array. + + + + Create a SortedVIntList from a BitSet. + A bit set representing a set of integers. + + + + Create a SortedVIntList from an OpenBitSet. + A bit set representing a set of integers. + + + + Create a SortedVIntList. + An iterator providing document numbers as a set of integers. + This DocIdSetIterator is iterated completely when this constructor + is called and it must provide the integers in non + decreasing order. + + + + An iterator over the sorted integers. + + + + The total number of sorted integers. + + + The size of the byte array storing the compressed sorted integers. + + + This DocIdSet implementation is cacheable. + + + Borrowed from Cglib. Allows custom swap so that two arrays can be sorted + at the same time. + + + + Methods for manipulating strings. + + + Expert: + The StringInterner implementation used by Lucene. + This shouldn't be changed to an incompatible implementation after other Lucene APIs have been used. + + + + Return the same string object for all equal strings + + + Compares two byte[] arrays, element by element, and returns the + number of elements common to both arrays. + + + The first byte[] to compare + + + The second byte[] to compare + + + The number of common elements. + + + + Compares two strings, character by character, and returns the + first position where the two strings differ from one another. + + + The first string to compare + + The second string to compare + + The first position where the two strings differ. + + + + Helper methods to ease implementing . + + + for printing boost only if not 1.0 + + + Class to encode java's UTF16 char[] into UTF8 byte[] + without always allocating a new byte[] as + String.getBytes("UTF-8") does. + +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Encode characters from a char[] source, starting at + offset and stopping when the character 0xffff is seen. + Returns the number of bytes written to bytesOut. + + + + Encode characters from a char[] source, starting at + offset for length chars. Returns the number of bytes + written to bytesOut. + + + + Encode characters from this String, starting at offset + for length characters. Returns the number of bytes + written to bytesOut. + + + + Convert UTF8 bytes into UTF16 characters. If offset + is non-zero, conversion starts at that starting point + in utf8, re-using the results from the previous call + up until offset. + + + + Use by certain classes to match version compatibility + across releases of Lucene. +

+ WARNING: When changing the version parameter + that you supply to components in Lucene, do not simply + change the version at search-time, but instead also adjust + your indexing code to match, and re-index. +

+
+ + Match settings and bugs in Lucene's 2.0 release. + + + Match settings and bugs in Lucene's 2.1 release. + + + Match settings and bugs in Lucene's 2.2 release. + + + Match settings and bugs in Lucene's 2.3 release. + + + Match settings and bugs in Lucene's 2.4 release. + + + Match settings and bugs in Lucene's 2.9 release. + + + + Match settings and bugs in Lucene's 3.0 release. + + Use this to get the latest and greatest settings, bug fixes, + etc, for Lucene. + + + + + +

WARNING: if you use this setting, and then + upgrade to a newer release of Lucene, sizable changes + may happen. If precise back compatibility is important + then you should instead explicitly specify an actual + version. + If you use this constant then you may need to + re-index all of your documents when upgrading + Lucene, as the way text is indexed may have changed. + Additionally, you may need to re-test your entire + application to ensure it behaves as expected, as + some defaults may have changed and may break functionality + in your application. +

+
+
+
diff --git a/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.dll b/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.dll new file mode 100644 index 0000000..cbf0220 Binary files /dev/null and b/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.dll differ diff --git a/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.pdb b/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.pdb new file mode 100644 index 0000000..6083156 Binary files /dev/null and b/packages/Lucene.Net.3.0.3/lib/NET40/Lucene.Net.pdb differ diff --git a/packages/Lucene.Net.Linq.3.1.45/Lucene.Net.Linq.3.1.45.nupkg b/packages/Lucene.Net.Linq.3.1.45/Lucene.Net.Linq.3.1.45.nupkg new file mode 100644 index 0000000..b084aeb Binary files /dev/null and b/packages/Lucene.Net.Linq.3.1.45/Lucene.Net.Linq.3.1.45.nupkg differ diff --git a/packages/Lucene.Net.Linq.3.1.45/Lucene.Net.Linq.3.1.45.nuspec b/packages/Lucene.Net.Linq.3.1.45/Lucene.Net.Linq.3.1.45.nuspec new file mode 100644 index 0000000..65427d0 --- /dev/null +++ b/packages/Lucene.Net.Linq.3.1.45/Lucene.Net.Linq.3.1.45.nuspec @@ -0,0 +1,31 @@ + + + + Lucene.Net.Linq + 3.1.45 + Lucene.Net.Linq + Chris Eldredge + Chris Eldredge + https://github.com/themotleyfool/Lucene.Net.Linq/blob/master/LICENSE.txt + https://github.com/themotleyfool/Lucene.Net.Linq + https://svn.apache.org/repos/asf/incubator/lucene.net/trunk/branding/logo/lucene-net-icon-128x128.png + false + Execute LINQ queries on Lucene.Net complete with object to Document mapping. + Provides LINQ IQueryable interface and object/document mapping over a Lucene.Net index. + * Automatically creates Analyzer using metadata attributes like [Field(Analyzer=typeof(StandardAnalyzer)]. + * Store different object types in single index. + * Prevent documents with null values on key properties from being stored. + * Enable sorting on types implementing IComparable<T> but not non-generic IComparable. + * Allow client to provide custom IDocumentMapper<T> implementations. + Copyright 2012 The Motley Fool, LLC + lucene.net lucene linq odata search nosql + + + + + + + + + + \ No newline at end of file diff --git a/packages/Lucene.Net.Linq.3.1.45/lib/net40-Client/Lucene.Net.Linq.dll b/packages/Lucene.Net.Linq.3.1.45/lib/net40-Client/Lucene.Net.Linq.dll new file mode 100644 index 0000000..969d04e Binary files /dev/null and b/packages/Lucene.Net.Linq.3.1.45/lib/net40-Client/Lucene.Net.Linq.dll differ diff --git a/packages/Lucene.Net.Linq.3.1.45/lib/net40-Client/Lucene.Net.Linq.xml b/packages/Lucene.Net.Linq.3.1.45/lib/net40-Client/Lucene.Net.Linq.xml new file mode 100644 index 0000000..5326627 --- /dev/null +++ b/packages/Lucene.Net.Linq.3.1.45/lib/net40-Client/Lucene.Net.Linq.xml @@ -0,0 +1,817 @@ + + + + Lucene.Net.Linq + + + + + Abstraction of IndexWriter to faciliate unit testing. + + + + + + + + + + + + + + + + + + + + + + + + + + + Wraps an IndexWriter with an implementation of . + + + + The IndexWriter instance to delegate method calls to. + + + + Decorates to convert the token stream + to lowercase, allowing queries with different case-spelling to match. + + + + + Similar to but + prevents collisions of different analyzers being + added for the same field. + + + + Constructs with default analyzer. + + + Any fields not specifically + defined to use a different analyzer will use the one provided here. + + + + Defines an analyzer to use for the specified field. + + + field name requiring a non-default analyzer + + non-default analyzer to use for field + + + + Return the positionIncrementGap from the analyzer assigned to fieldName + + + + Copy field analyzers from another instance into this instance. + + + + + + Provides extensions to built in Lucene.Net Analyzer classes + + + + + Defines an analyzer to use for the specified field in a strongly typed manner + + Type of the stored Lucene document + + field name requiring a non-default analyzer as a member expression + non-default analyzer to use for field + + + + Provides context for a search being + prepared or executed to + + + + + The phase that the query execution is + currently in. When the value is + , + the properties + and will + be null because they have not yet been + constructed. + + + + + Provides access to all hits returned + by the search. + + + + + Returns the current index in the + array of hits. + + + + + Convenience method for returning + the current ScoreDoc, which could + also be retrieved by doing e.g. + Hits.ScoreDocs[CurrentHit]. + + + + + Provides a reference to the searcher + to allow custom implementations to + enable additional features as needed. + + + + + Provides access to the query that will + be executed, allowing custom implementations + of to + customize it. + + + + + Provides access to the filter that will + be applied, allowing custom implementations + of to + customize it. + + When + has one or more entries, the filter will + be initialized to match documents that + have the corresponding fields or match + specific criteria defined by + + + + + Represents a unique key for a document + + + + + Converts the key to a Lucene.Net + that will match a unique document in the index. + + + + + Flag indicating if the key is empty, meaning + that no key fields are defined for the document. + + + + + Maps Lucene.Net s onto instances + of . + + + + + Holds mapping information that allows + properties on types to be mapped to Lucene + Fields and vice versa. + + + + + In cases of complex types or numeric fields, + converts a value into a query expression. + For string fields, simply returns a string + representation of the value. + + + + + Creates a query based on the supplied pattern. + The pattern should be analyzed and parsed + (typically by using a ) + to analyze the pattern and create + , + or as needed. + + + + + Creates a range query with the provided criteria. + + + + + Creates an appropriate SortField instance for the + underlying Lucene field. + + + + + + Name of Lucene field. By default, this + will be the same as . + + + + + Property name. + + + + + Retrieve or other metadata + from and + and apply to . + + + + + Convert a Property or other data on an instance + of into a + on the . + + + + + Retrieve a value from + for the purposes of constructing an + or comparing instances of + to detect dirty objects. + + + + + Gets the Analyzer to be used for indexing this field + or parsing queries on this field. + + + + + Converts objects of type to + and back. Also creates + s to track, update + or delete documents by key. + + + + + Provides mapping information for the properties + of a given type and corresponding field metadata. + + + + + Returns detailed mapping info for a given property name. + + + + + Create a query that matches the pattern on any field. + Used in conjunction with + + + + + Returns the set of fields defined for the given document. + + + + + Returns the set of property names used to compose + a for the document. + + + + + Hydrates the properties on the target type using fields + in the Lucene.Net Document. + + + + + Transfers property values on the source object + to fields on the Lucene.Net Document. + + + + + Create a composite key representing a unique + identity for the document. + + + + + Compare two instances of + to determine if they are considered equal. This + method is used to detect modified objects in a + to determine which + objects are dirty and need to be updated during + commit. + + + + + Called before a search is executed to allow + customizations to be applied on the , + and . + + + + + Gets an analyzer to be used for preparing queries + and writing documents. + + + + + Creates an that will execute + + before executing + and other default processors. + + + + + Provides IQueryable access to a Lucene.Net index as well as an API + for adding, deleting and replacing documents within atomic transactions. + + + + + Constructs a new read-only instance without supplying an IndexWriter. + + + + + Constructs a new read-only instance with a client provided + and without supplying an IndexWriter. + + + + + Constructs a new instance. + + + + + Constructs a new instance with a client provided . + + + + + Constructs a new instance with a client provided . + If the supplied IndexWriter will be written to outside of this instance of LuceneDataProvider, + the will be used to coordinate writes. + + + + + Constructs a new instance. + If the supplied IndexWriter will be written to outside of this instance of LuceneDataProvider, + the will be used to coordinate writes. + + + + + + + + + + + + + + + + + + + + Returns an IQueryable implementation where the type being mapped + from is constructed by a factory delegate. + + The type of object that Document will be mapped onto. + Factory method to instantiate new instances of T. + Mapper that will convert documents to objects and vice versa. + + + + + + + + + + + + + + + + + + + Opens a session for staging changes and then committing them atomically. + + Factory delegate that creates new instances of + Mapper that will convert documents to objects and vice versa. + The type of object that will be mapped to . + + + + + + + + + + + + + + + + + + + Registers a callback to be invoked when a new IndexSearcher is being initialized. + This method allows an IndexSearcher to be "warmed up" by executing one or more + queries before the instance becomes visible on other threads. + + While callbacks are being executed, other threads will continue to use the previous + instance of IndexSearcher if this is not the first instance being initialized. + + If this is the first instance, other threads will block until all callbacks complete. + + + + + Retrieves the instance of IndexWriter that will be used by all + sessions created by this instance. + + + + + Contains custom extensions to LINQ for integrating with Lucene.Net. + + + + + Expression to be used in a LINQ where clauses to search + for documents where any field matches a given pattern. + + + + + Applies a boost to a property in a where clause. + + + + + Applies a custom boost function to customize query scoring. When multiple boost functions + are added by calling this method more than once, the return values from each function are + multiplied to yield a final result. Warning: this method will cause each document that + matches the query to be converted to an instance of in order + for the score to be computed, significantly degrading performance. + + + + + Applies a custom boost function to customize query scoring. When multiple boost functions + are added by calling this method more than once, the return values from each function are + multiplied to yield a final result. + + + + + Applies the provided Query. Enables queries to be constructed from outside of + LINQ to be executed as part of a LINQ query. + + + + + + Expression to be used in a LINQ orderby clause to sort results by score. + Note: since score is a decimal based weight, ordering by score normally + results in additional orderby clauses having no effect. + + + + + Instructs the query parser that a given query pattern + in a LINQ where clause should not have special characters + (such as *) escaped. + + Disabling escaping allows prefix, wildcard, phrase and range queries + to be parsed from the instead of + treating it as a verbatim search term. + + + + The following two samples will produce the same Query: + + var query = "Foo*"; + + var results = from doc in documents + where doc.Title == query.AllowSpecialCharacters() + select doc; + + + var query = "Foo"; + + var results = from doc in documents + where doc.Title.StartsWith(query) + select doc; + + + + + + + + Base attribute for customizing how properties are stored and indexed. + + + + + Specifies the name of the backing field that the property value will be mapped to. + When not specified, defaults to the name of the property being decorated by this attribute. + + + + + Set to true to store value in index for later retrieval, or + false if the field should only be indexed. + + + + + Provides a custom TypeConverter implementation that can convert the property type + to and from strings so they can be stored and indexed by Lucene.Net. + + + + + Specifies that the property value, combined with any other properties that also + specify Key = true, represents a unique primary key to the document. + + Key fields are used to replace or delete documents. + + + + + Customizes how a property is converted to a field as well as + storage and indexing options. + + + + + Default constructor + + + + How the field should be indexed for searching and sorting. + + + Backing field used to store data in Lucene index. + + + Backing field used to store data in Lucene index. + How the field should be indexed for searching and sorting. + + + + How the field should be indexed for searching and sorting. + + + + + Overrides default format pattern to use when converting ValueType + to string. If both Format and + Converter are specified, Converter + will take precedence and Format will be ignored. + + + + + When true, causes to + be set to false to prevent wildcard queries like Foo* from being + converted to lowercase. + + + + + When set, supplies a custom analyzer for this field. The analyzer type + must have either a parameterless public constructor, or a public constructor + that accepts a argument. + + When an external Analyzer is provided on + methods it will override this setting. + + + + + Maps a , or any type that can be converted + to , , , or + to a that will be + indexed as a trie structure to enable more efficient range filtering + and sorting. + + + + + + Default constructor + + + + Backing field used to store data in Lucene index. + + + + + + + Specifies that a public property should be ignored by the Lucene.Net.Linq + mapping engine when converting objects to Documents and vice-versa. + + + + + When set on a property, the property will be set with the score (relevance) + of the document based on the queries and boost settings. + + + + + When set on a class, defines a fixed-value key that will always + be used when querying for objects of this type or deleting and + replacing documents with matching keys. + + This attribute enables multiple object types to be stored in + the same index by ensuring that unrelated documents of other + types will not be returned when querying. + + + + [DocumentKey(FieldName="Type", Value="Customer")] + public class Customer + { + } + + + + + + The field name that will be queried. + + + + + The constant value that will be queried. + + + + + + + + + + + + + + + + + + + + + + + Maps public properties on to + Lucene s using optional metadata + attributes such as , + , + , + + and . + + + + + Constructs an instance that will create an + using metadata on public properties on the type . + + Version compatibility for analyzers and indexers. + + + + Constructs an instance with an externall supplied analyzer + and the compatibility version of the index. + + Version compatibility for analyzers and indexers. + + + + + + + + + + + + + + Replaces boolean binary expressions like [LuceneQueryPredicateExpression](+field:query) == false to [LuceneQueryPredicateExpression](-field:query) + + + + + Replaces method calls like Matches with query expressions. + + + + + Replaces expressions like (bool)(Constant(bool?)) with Constant(bool?). + + + + + Replaces subqueries like {[doc].Tags => Contains("c")} with BinaryExpressions like ([doc].Tags == "c"). + + + + + Converts pointless BinaryExpressions like "True AndAlso Expression" + or "False OrElse Expression" to take only the right side. Applies + recursively to collapse deeply nested pointless expressions. + + + + + Transforms various expressions in a QueryModel instance to make it easier to convert into a Lucene Query. + + + + + Replaces supported method calls like string.Compare([LuceneQueryFieldExpression], "abc") > 0 to LuceneQueryPredicateExpression + + + + + Replaces method calls like string.Concat([LuceneQueryFieldExpression], [LuceneQueryFieldExpression]) to LuceneCompositeOrderingExpression + + + + + Replaces supported method calls like [LuceneQueryFieldExpression].StartsWith("foo") with a LuceneQueryPredicateExpression like [LuceneQueryPredicateExpression](+Field:foo*) + + + + + Removes method calls like string.ToLower() that have no effect on a query due to + case sensitivity in Lucene being configured elsewhere by the Analyzer. + + + + + Locates expressions like IFF(x != null, x, null) and converts them to x. + When combined with a null-safe + ToLower operation like IFF(x != null, x.ToLower(), null) is simplified to x. + + + + + Replaces MemberExpression instances like [QuerySourceReferenceExpression].PropertyName with + + + + + Converts pretty IndexMode.AnalyzedNoNorms to ugly Field.Index.ANALYZED_NO_NORMS. + + + + + Converts supported value types such as DateTime to an underlying ValueType that is supported by + . + + + + diff --git a/packages/Lucene.Net.Linq.3.1.45/readme.txt b/packages/Lucene.Net.Linq.3.1.45/readme.txt new file mode 100644 index 0000000..bc98c37 --- /dev/null +++ b/packages/Lucene.Net.Linq.3.1.45/readme.txt @@ -0,0 +1,26 @@ +Thank you for trying Lucene.Net.Linq! + +Getting Started +=============== + +See https://github.com/themotleyfool/Lucene.Net.Linq for examples and documentation. + +Upgrading to Version 3.1 +======================== + +Version 3.1 brings new options to make Lucene.Net.Linq simpler to use +for projects that aren't already using Lucene.Net. Where before the +client had to construct its own IndexWriter and Analyzer instances +and pass them into LuceneDataProvider, new constructors are available +that allow you to simply provide a Directory and Version, and it will +use metadata on your objects to build an appropriate Analyzer. + +LuceneDataProvider now implements IDisposable. If you allow it +to create the IndexWriter for you, you must call Dispose to ensure +that the writer is properly closed. + +3.1 also adds new overloads to methods on LuceneDataProvider that +allow you to provide your own implementation of IDocumentMapper +enabling clients to provide custom solutions for mapping fields +to objects, control how keys are generated for documents and more. + diff --git a/packages/Remotion.Linq.1.13.183.0/Remotion.Linq.1.13.183.0.nupkg b/packages/Remotion.Linq.1.13.183.0/Remotion.Linq.1.13.183.0.nupkg new file mode 100644 index 0000000..7990379 Binary files /dev/null and b/packages/Remotion.Linq.1.13.183.0/Remotion.Linq.1.13.183.0.nupkg differ diff --git a/packages/Remotion.Linq.1.13.183.0/Remotion.Linq.1.13.183.0.nuspec b/packages/Remotion.Linq.1.13.183.0/Remotion.Linq.1.13.183.0.nuspec new file mode 100644 index 0000000..7a84dd2 --- /dev/null +++ b/packages/Remotion.Linq.1.13.183.0/Remotion.Linq.1.13.183.0.nuspec @@ -0,0 +1,19 @@ + + + + Remotion.Linq + 1.13.183.0 + re-linq - re-motion Linq Library + rubicon IT GmbH + rubicon IT GmbH + http://relinq.codeplex.com/license + http://relinq.codeplex.com/ + false + With re-linq, it's now easier than ever to create full-featured LINQ providers. + Copyright (c) rubicon IT GmbH, www.rubicon.eu + re-motion linq expression trees + + + + + \ No newline at end of file diff --git a/packages/Remotion.Linq.1.13.183.0/lib/net35/Remotion.Linq.dll b/packages/Remotion.Linq.1.13.183.0/lib/net35/Remotion.Linq.dll new file mode 100644 index 0000000..7205f85 Binary files /dev/null and b/packages/Remotion.Linq.1.13.183.0/lib/net35/Remotion.Linq.dll differ diff --git a/packages/Remotion.Linq.1.13.183.0/lib/net35/Remotion.Linq.xml b/packages/Remotion.Linq.1.13.183.0/lib/net35/Remotion.Linq.xml new file mode 100644 index 0000000..d783481 --- /dev/null +++ b/packages/Remotion.Linq.1.13.183.0/lib/net35/Remotion.Linq.xml @@ -0,0 +1,4236 @@ + + + + Remotion.Linq + + + + + Represents a data source in a query that adds new data items in addition to those provided by the . + + + In C#, the second "from" clause in the following sample corresponds to an : + + var query = from s in Students + from f in s.Friends + select f; + + + + + + Base class for from clauses ( and ). From clauses define query sources that + provide data items to the query which are filtered, ordered, projected, or otherwise processed by the following clauses. + + + + + Represents a clause within the . Implemented by , , + , and . + + + + + Transforms all the expressions in this clause and its child objects via the given delegate. + + The transformation object. This delegate is called for each within this + clause, and those expressions will be replaced with what the delegate returns. + + + + Represents a clause or result operator that generates items which are streamed to the following clauses or operators. + + + + + Gets the name of the items generated by this . + + + Item names are inferred when a query expression is parsed, and they usually correspond to the variable names present in that expression. + However, note that names are not necessarily unique within a . Use names only for readability and debugging, not for + uniquely identifying objects. To match an with its references, use the + property rather than the . + + + + + Gets the type of the items generated by this . + + + + + Initializes a new instance of the class. + + A name describing the items generated by the from clause. + The type of the items generated by the from clause. + The generating data items for this from clause. + + + + Transforms all the expressions in this clause and its child objects via the given delegate. + + The transformation object. This delegate is called for each within this + clause, and those expressions will be replaced with what the delegate returns. + + + + Gets or sets a name describing the items generated by this from clause. + + + Item names are inferred when a query expression is parsed, and they usually correspond to the variable names present in that expression. + However, note that names are not necessarily unique within a . Use names only for readability and debugging, not for + uniquely identifying objects. To match an with its references, use the + property rather than the . + + + + + Gets or sets the type of the items generated by this from clause. + + + Changing the of a can make all objects that + point to that invalid, so the property setter should be used with care. + + + + + The expression generating the data items for this from clause. + + + + + Represents a clause in a 's collection. Body clauses take the items generated by + the , filtering (), ordering (), augmenting + (), or otherwise processing them before they are passed to the . + + + + + Accepts the specified visitor by calling one of its Visit... methods. + + The visitor to accept. + The query model in whose context this clause is visited. + The index of this clause in the 's collection. + + + + Clones this clause, registering its clone with the if it is a query source clause. + + The clones of all query source clauses are registered with this . + A clone of this clause. + + + + Initializes a new instance of the class. + + A name describing the items generated by the from clause. + The type of the items generated by the from clause. + The generating the items of this from clause. + + + + Accepts the specified visitor by calling its method. + + The visitor to accept. + The query model in whose context this clause is visited. + The index of this clause in the 's collection. + + + + Clones this clause, registering its clone with the . + + The clones of all query source clauses are registered with this . + A clone of this clause. + + + + Aggregates all objects needed in the process of cloning a and its clauses. + + + + + Gets the clause mapping used during the cloning process. This is used to adjust the instances + of clauses to point to clauses in the cloned . + + + + + Acts as a base class for custom extension expressions, providing advanced visitor support. Also allows extension expressions to be reduced to + a tree of standard expressions with equivalent semantics. + + + Custom extension expressions can specify their own or use a default one. re-linq reserves + values from 100000 to 150000 for its own expressions. Custom LINQ providers can use 150001 and above. + + + + + Defines a standard value that is used by all subclasses unless they specify + their own value. + + + + + Initializes a new instance of the class with a default value. + + The type of the value represented by the . + + + + Initializes a new instance of the class with a custom value. + + The type of the value represented by the . + The value to use as this expression's value. + LINQ providers should use values starting from 150001 and above. + + + + Reduces this instance to a tree of standard expressions. If this instance cannot be reduced, the same + is returned. + + If is , a reduced version of this ; otherwise, + this . + + + This method can be called in order to produce a new that has the same semantics as this + but consists of expressions of standard node types. The reduction need not be complete, nodes can be + returned that themselves must be reduced. + + + Subclasses overriding the property to return must also override this method and cannot + call the base implementation. + + + + + + Calls the method and checks certain invariants before returning the result. This method can only be called when + returns . + + A reduced version of this . + This is not reducible - or - the method + violated one of the invariants (see Remarks). + + This method checks the following invariants: + + must not return . + must not return the original . + + The new expression returned by must be assignment-compatible with the type of the original + . + + + + + + + Accepts the specified visitor, by default dispatching to . + Inheritors of the class can override this method in order to dispatch to a specific Visit method. + + The visitor whose Visit method should be invoked. + The returned by the visitor. + + Overriders can test the for a specific interface. If the visitor supports the interface, the extension expression + can dispatch to the respective strongly-typed Visit method declared in the interface. If it does not, the extension expression should call + the base implementation of , which will dispatch to . + + + + + Must be overridden by subclasses by calling on all + children of this extension node. + + The visitor to visit the child nodes with. + This , or an expression that should replace it in the surrounding tree. + + If the visitor replaces any of the child nodes, a new instance should + be returned holding the new child nodes. If the node has no children or the visitor does not replace any child node, the method should + return this . + + + + + Gets a value indicating whether this instance can be reduced to a tree of standard expressions. + + + if this instance can be reduced; otherwise, . + + + + If this method returns , the method can be called in order to produce a new + that has the same semantics as this but consists of + expressions of standard node types. + + + Subclasses overriding the property to return must also override the + method and cannot call its base implementation. + + + + + + This interface should be implemented by visitors that handle the instances. + + + + + This interface should be implemented by visitors that handle VB-specific expressions. + + + + + Represents a VB-specific comparison expression. + + + + To explicitly support this expression type, implement . + To treat this expression as if it were an ordinary , call its method and visit the result. + + + Subclasses of that do not implement will, by default, + automatically reduce this expression type to in the + method. + + + Subclasses of that do not implement will, by default, + ignore this expression and visit its child expressions via the and + methods. + + + + + + Performs a reverse operation, i.e. creates a from a given resolved expression, + substituting all objects by getting the referenced objects from the lambda's input parameter. + + + Given the following input: + + ItemExpression: new AnonymousType ( a = [s1], b = [s2] ) + ResolvedExpression: [s1].ID + [s2].ID + + The visitor generates the following : input => input.a.ID + input.b.ID + The lambda's input parameter has the same type as the ItemExpression. + + + + + Provides a base class that can be used for visiting and optionally transforming each node of an tree in a + strongly typed fashion. + This is the base class of many transformation classes. + + + + + Determines whether the given is one of the expressions defined by for which + has a Visit method. handles those by calling the respective Visit method. + + The expression to check. Must not be . + + if is one of the expressions defined by and + has a Visit method for it; otherwise, . + + + + + Determines whether the given is one of the base expressions defined by re-linq. + handles those by calling the respective Visit method. + + The expression to check. + + if is a re-linq base expression (, + ) for which has dedicated Visit methods; + otherwise, . + + + + + Determines whether the given is an . handles such + expressions by calling . + + The expression to check. + + if is an ; otherwise, . + + + + + Determines whether the given is an unknown expression not derived from . + cannot handle such expressions at all and will call for them. + + The expression to check. + + if is an unknown expression not derived from ; + otherwise, . + + + + + Adjusts the arguments for a so that they match the given members. + + The arguments to adjust. + The members defining the required argument types. + + A sequence of expressions that are equivalent to , but converted to the associated member's + result type if needed. + + + + + Performs a reverse operation, i.e. creates a from a given resolved expression, + substituting all objects by getting the referenced objects from the lambda's input parameter. + + The item expression representing the items passed to the generated via its input + parameter. + The resolved expression for which to generate a reverse resolved . + A from the given resolved expression, substituting all + objects by getting the referenced objects from the lambda's input parameter. The generated has exactly one + parameter which is of the type defined by . + + + + Performs a reverse operation on a , i.e. creates a new + with an additional parameter from a given resolved , + substituting all objects by getting the referenced objects from the new input parameter. + + The item expression representing the items passed to the generated via its new + input parameter. + The resolved for which to generate a reverse resolved . + The position at which to insert the new parameter. + A similar to the given resolved expression, substituting all + objects by getting the referenced objects from an additional input parameter. The new input parameter is of the type defined by + . + + + + Represents a that is executed on a sequence, choosing a single item for its result. + + + + + Represents a that is executed on a sequence, returning a scalar value or single item as its result. + + + + + Represents an operation that is executed on the result set of the query, aggregating, filtering, or restricting the number of result items + before the query result is returned. + + + + + Executes this result operator in memory, on a given input. Executing result operators in memory should only be + performed if the target query system does not support the operator. + + The input for the result operator. This must match the type of expected by the operator. + The result of the operator. + + + + + Gets information about the data streamed out of this . This contains the result type a query would have if + it ended with this , and it optionally includes an describing + the streamed sequence's items. + + Information about the data produced by the preceding , or the + of the query if no previous exists. + Gets information about the data streamed out of this . + + + + Clones this item, registering its clone with the if it is a query source clause. + + The clones of all query source clauses are registered with this . + A clone of this item. + + + + Accepts the specified visitor by calling its method. + + The visitor to accept. + The query model in whose context this clause is visited. + The index of this item in the 's collection. + + + + Transforms all the expressions in this item via the given delegate. Subclasses must apply the + to any expressions they hold. If a subclass does not hold any expressions, it shouldn't do anything + in the implementation of this method. + + The transformation object. This delegate is called for each within this + item, and those expressions will be replaced with what the delegate returns. + + + + Invokes a given generic method on an input via Reflection. Use this to implement + by defining a strongly typed, generic variant + of ; then invoke that strongly typed + variant via . + + The type of expected as an input to . + The type of expected as the output of . + The input object to invoke the method on.. + A delegate holding exactly one public generic method with exactly one generic argument. This method is + called via Reflection on the given argument. + The result of invoking the method in on . + + The uses this method as follows: + + public IStreamedData ExecuteInMemory (IStreamedData input) + { + ArgumentUtility.CheckNotNull ("input", input); + return InvokeGenericExecuteMethod<StreamedSequence, StreamedValue> (input, ExecuteInMemory<object>); + } + + public StreamedValue ExecuteInMemory<T> (StreamedSequence input) + { + var sequence = input.GetTypedSequence<T> (); + var result = sequence.Sequence.Count (); + return new StreamedValue (result); + } + + + + + + Invokes the given via reflection on the given . + + The input to invoke the method with. + The method to be invoked. + The result of the invocation + + + + Gets the constant value of the given expression, assuming it is a . If it is + not, an is thrown. + + The expected value type. If the value is not of this type, an is thrown. + A string describing the value; this will be included in the exception message if an exception is thrown. + The expression whose value to get. + + The constant value of the given . + + + + + Represents a check whether any items are returned by a query. + This is a result operator, operating on the whole result set of a query. + + + "Any" query methods taking a predicate are represented as into a combination of a and an + . + + + In C#, the "Any" call in the following example corresponds to an . + + var result = (from s in Students + select s).Any(); + + + + + + + + + + + + + + + + + + + + + Represents a check whether all items returned by a query satisfy a predicate. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "All" call in the following example corresponds to an . + + var result = (from s in Students + select s).All(); + + + + + + Initializes a new instance of the class. + + The predicate to evaluate. This is a resolved version of the body of the that would be + passed to . + + + + + + + + + + + + + + + + + + + Gets or sets the predicate to evaluate on all items in the sequence. + This is a resolved version of the body of the that would be + passed to . + + The predicate. + + + + Represents aggregating the items returned by a query into a single value. The first item is used as the seeding value for the aggregating + function. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Aggregate" call in the following example corresponds to an . + + var result = (from s in Students + select s.Name).Aggregate((allNames, name) => allNames + " " + name); + + + + + + Initializes a new instance of the class. + + The aggregating function. This is a taking a parameter that represents the value accumulated so + far and returns a new accumulated value. This is a resolved expression, i.e. items streaming in from prior clauses and result operators + are represented as expressions containing nodes. + + + + + + + + + + + + + + + + Gets or sets the aggregating function. This is a taking a parameter that represents the value accumulated so + far and returns a new accumulated value. This is a resolved expression, i.e. items streaming in from prior clauses and result operators + are represented as expressions containing nodes. + + The aggregating function. + + + + Represents aggregating the items returned by a query into a single value with an initial seeding value. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Aggregate" call in the following example corresponds to an . + + var result = (from s in Students + select s).Aggregate(0, (totalAge, s) => totalAge + s.Age); + + + + + + Initializes a new instance of the class. + + The seed expression. + The aggregating function. This is a taking a parameter that represents the value accumulated so + far and returns a new accumulated value. This is a resolved expression, i.e. items streaming in from prior clauses and result operators + are represented as expressions containing nodes. + The result selector, can be . + + + + Gets the constant value of the property, assuming it is a . If it is + not, an is thrown. + + The expected seed type. If the item is not of this type, an is thrown. + The constant value of the property. + + + + + + + Executes the aggregating operation in memory. + + The type of the source items. + The type of the aggregated items. + The type of the result items. + The input sequence. + A object holding the aggregated value. + + + + + + + + + + + + + Gets or sets the aggregating function. This is a taking a parameter that represents the value accumulated so + far and returns a new accumulated value. This is a resolved expression, i.e. items streaming in from prior clauses and result operators + are represented as expressions containing nodes. + + The aggregating function. + + + + Gets or sets the seed of the accumulation. This is an denoting the starting value of the aggregation. + + The seed of the accumulation. + + + + Gets or sets the result selector. This is a applied after the aggregation to select the final value. + Can be . + + The result selector. + + + + Represents a that is executed on a sequence, returning a new sequence with the same + item type as its result. + + + + + Represents a that is executed on a sequence, returning a new sequence as its result. + + + + + Describes the data streamed out of a or . + + + + + Takes the given and instantiates it, substituting its generic parameter with the value + or item type of the data described by this object. The method must have exactly one generic parameter. + + The generic method definition to instantiate. + A closed generic instantiation of with this object's value or item type substituted for + the generic parameter. + + + + Executes the specified with the given , calling either + or , depending on the type of data streamed + from this interface. + + The query model to be executed. + The executor to use. + An object holding the results of the query execution. + + + + Returns a new of the same type as this instance, but with a new . + + The type to use for the property. The type must be compatible with the data described by this + , otherwise an exception is thrown. + The type may be a generic type definition if the supports generic types; in this case, + the type definition is automatically closed with generic parameters to match the data described by this . + A new of the same type as this instance, but with a new . + The is not compatible with the data described by this + . + + + + Gets the type of the data described by this instance. For a sequence, this is a type implementing + , where T is instantiated with a concrete type. For a single value, this is the value type. + + + + + Describes a scalar value streamed out of a or . A scalar value corresponds to a + value calculated from the result set, as produced by or , for instance. + + + + + Describes a single or scalar value streamed out of a or . + + + + + + + + Returns a new instance of the same type with a different . + + The new data type. + The cannot be used for the clone. + A new instance of the same type with the given . + + + + + + + Takes the given and instantiates it, substituting its generic parameter with the value + type of the value held by this object. The method must have exactly one generic parameter. + + The generic method definition to instantiate. + + A closed generic instantiation of with this object's value type substituted for + the generic parameter. + + + + + Gets the type of the data described by this instance. This is the type of the streamed value, or + if the value is . + + + + + Holds the data needed to represent the output or input of a part of a query in memory. This is mainly used for + . The data consists of a sequence of items. + + + + + Holds the data needed to represent the output or input of a part of a query in memory. This is mainly used for + . The data held by implementations of this interface can be either a value or a sequence. + + + + + Gets an object describing the data held by this instance. + + An object describing the data held by this instance. + + + + Gets the value held by this instance. + + The value. + + + + Initializes a new instance of the class, setting the and + properties. + + The sequence. + An instance of describing the sequence. + + + + Gets the current sequence held by this object as well as an describing the + sequence's items, throwing an exception if the object does not hold a sequence of items of type . + + The expected item type of the sequence. + + The sequence and an describing its items. + + Thrown when the item type is not the expected type . + + + + Gets the current sequence for the operation. If the object is used as input, this + holds the input sequence for the operation. If the object is used as output, this holds the result of the operation. + + The current sequence. + + + + Describes sequence data streamed out of a or . Sequence data can be held by an object + implementing , and its items are described via a . + + + + + Returns a new with an adjusted . + + The type to use for the property. The type must be convertible from the previous type, otherwise + an exception is thrown. The type may be a generic type definition; in this case, + the type definition is automatically closed with the type of the . + + A new with a new . + + The is not compatible with the items described by this + . + + + + Takes the given and instantiates it, substituting its generic parameter with the + item type of the sequence described by this object. The method must have exactly one generic parameter. + + The generic method definition to instantiate. + + A closed generic instantiation of with this object's item type substituted for + the generic parameter. + + + + + Gets the type of the items returned by the sequence described by this object, as defined by . Note that because + is covariant starting from .NET 4.0, this may be a more abstract type than what's returned by + 's property. + + + + + Gets an expression that describes the structure of the items held by the sequence described by this object. + + The expression for the sequence's items. + + + + Gets the type of the data described by this instance. This is a type implementing + , where T is instantiated with a concrete type. + + + + + Describes a single value streamed out of a or . A single value corresponds to one + item from the result set, as produced by or , for instance. + + + + + Holds the data needed to represent the output or input of a part of a query in memory. This is mainly used for + . The data is a single, non-sequence value and can only be consumed by result operators + working with single values. + + + + + Initializes a new instance of the class, setting the and properties. + + The value. + A describing the value. + + + + Gets the value held by , throwing an exception if the value is not of type . + + The expected type of the value. + , cast to . + Thrown when if not of the expected type. + + + + Gets an object describing the data held by this instance. + + + An object describing the data held by this instance. + + + + + Gets the current value for the operation. If the object is used as input, this + holds the input value for the operation. If the object is used as output, this holds the result of the operation. + + The current value. + + + + Constructs a that is able to extract a specific simple expression from a complex + or . + + + + For example, consider the task of determining the value of a specific query source [s] from an input value corresponding to a complex + expression. This will return a able to perform this task. + + + + If the complex expression is [s], it will simply return input => input. + If the complex expression is new { a = [s], b = "..." }, it will return input => input.a. + If the complex expression is new { a = new { b = [s], c = "..." }, d = "..." }, it will return input => input.a.b. + + + + + + + Constructs a that is able to extract a specific simple from a + complex . + + The expression an accessor to which should be created. + The full expression containing the . + The input parameter to be used by the resulting lambda. Its type must match the type of . + The compares the via reference equality, + which means that exactly the same expression reference must be contained by for the visitor to return the + expected result. In addition, the visitor can only provide accessors for expressions nested in or + . + A acting as an accessor for the when an input matching + is given. + + + + + Represents the join part of a query, adding new data items and joining them with data items from previous clauses. In contrast to + , the does not provide access to the individual items of the joined query source. + Instead, it provides access to all joined items for each item coming from the previous clauses, thus grouping them together. The semantics + of this join is so that for all input items, a joined sequence is returned. That sequence can be empty if no joined items are available. + + + In C#, the "into" clause in the following sample corresponds to a . The "join" part before that is encapsulated + as a held in . The adds a new query source to the query + ("addresses"), but the item type of that query source is , not "Address". Therefore, it can be + used in the of an to extract the single items. + + var query = from s in Students + join a in Addresses on s.AdressID equals a.ID into addresses + from a in addresses + select new { s, a }; + + + + + + Transforms all the expressions in this clause and its child objects via the given delegate. + + The transformation object. This delegate is called for each within this + clause, and those expressions will be replaced with what the delegate returns. + + + + Accepts the specified visitor by calling its method. + + The visitor to accept. + The query model in whose context this clause is visited. + The index of this clause in the 's collection. + + + + Clones this clause, registering its clone with the . + + The clones of all query source clauses are registered with this . + A clone of this clause. + + + + Gets or sets a name describing the items generated by this . + + + Item names are inferred when a query expression is parsed, and they usually correspond to the variable names present in that expression. + However, note that names are not necessarily unique within a . Use names only for readability and debugging, not for + uniquely identifying objects. To match an with its references, use the + property rather than the . + + + + + Gets or sets the type of the items generated by this . This must implement . + + + Changing the of a can make all objects that + point to that invalid, so the property setter should be used with care. + + + + + Gets or sets the inner join clause of this . The represents the actual join operation + performed by this clause; its results are then grouped by this clause before streaming them to subsequent clauses. + objects outside the must not point to + because the items generated by it are only available in grouped form from outside this clause. + + + + + Maps instances to instances. This is used by + in order to be able to correctly update references to old clauses to point to the new clauses. Via + and , it can also be used manually. + + + + + Visits an tree, replacing all instances with references to cloned clauses, + as defined by a . In addition, all instances in + SubQueryExpressions are cloned, and their references also replaces. All referenced clauses must be mapped + to cloned clauses in the given , otherwise an expression is thrown. This is used by + to adjust references to the old with references to the new . + + + + + Takes an expression and replaces all instances, as defined by a given . + This is used whenever references to query sources should be replaced by a transformation. + + + + + Takes an expression and replaces all instances, as defined by a given + . + + The expression to be scanned for references. + The clause mapping to be used for replacing instances. + If , the visitor will throw an exception when + not mapped in the is encountered. If , + the visitor will ignore such expressions. + An expression with its instances replaced as defined by the + . + + + + Adjusts the given expression for cloning, that is replaces and + instances. All referenced clauses must be mapped to clones in the given , otherwise an exception is thrown. + + The expression to be adjusted. + The clause mapping to be used for replacing instances. + An expression with all and instances replaced + as required by a operation. + + + + Represents a calculation of an average value from the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Average" call in the following example corresponds to an . + + var query = (from s in Students + select s.ID).Average(); + + + + + + + + + Represents a cast of the items returned by a query to a different type. + This is a result operator, operating on the whole result set of a query. + + + In C#, "Cast" call in the following example corresponds to a . + + var query = (from s in Students + select s.ID).Cast<int>(); + + + + + + + + + Represents a check whether the results returned by a query contain a specific item. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Contains" call in the following example corresponds to a . + + var query = (from s in Students + select s).Contains (student); + + + + + + Initializes a new instance of the class. + + The item for which to be searched. + + + + Gets the constant value of the property, assuming it is a . If it is + not, an is thrown. + + The expected item type. If the item is not of this type, an is thrown. + The constant value of the property. + + + + Gets or sets an expression yielding the item for which to be searched. This must be compatible with (ie., assignable to) the source sequence + items. + + The item expression. + + + + Represents a guard clause yielding a singleton sequence with a default value if no items are returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Defaultifempty" call in the following example corresponds to a . + + var query = (from s in Students + select s).DefaultIfEmpty ("student"); + + + + + + Gets the constant value of the property, assuming it is a . If it is + not, an expression is thrown. If it is , is returned. + + The constant value of the property. + + + + Gets or sets the optional default value. + + The optional default value. + + + + Represents the removal of a given set of items from the result set of a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Except" call in the following example corresponds to a . + + var query = (from s in Students + select s).Except(students2); + + + + + + Gets the value of , assuming holds a . If it doesn't, + an exception is thrown. + + The constant value of . + + + + Gets or sets the second source of this result operator, that is, an enumerable containing the items removed from the input sequence. + + + + + Represents taking the mathematical intersection of a given set of items and the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Intersect" call in the following example corresponds to a . + + var query = (from s in Students + select s).Intersect(students2); + + + + + + Gets the value of , assuming holds a . If it doesn't, + an Intersection is thrown. + + The constant value of . + + + + Gets or sets the second source of this result operator, that is, an enumerable containing the items intersected with the input sequence. + + + + + Represents counting the number of items returned by a query as a 64-bit number. + This is a result operator, operating on the whole result set of a query. + + + "LongCount" query methods taking a predicate are represented as a combination of a and a + . + + + In C#, the "LongCount" call in the following example corresponds to a . + + var query = (from s in Students + select s).LongCount(); + + + + + + + + + Represents filtering the items returned by a query to only return those items that are of a specific type. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "OfType" call in the following example corresponds to a . + + var query = (from s in Students + select s.ID).OfType<int>(); + + + + + + + + + Represents reversing the sequence of items returned by of a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Reverse" call in the following example corresponds to a . + + var query = (from s in Students + select s).Reverse(); + + + + + + + + + Represents skipping a number of the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Skip" call in the following example corresponds to a . + + var query = (from s in Students + select s).Skip (3); + + + + + + Gets the constant value of the property, assuming it is a . If it is + not, an expression is thrown. + + The constant value of the property. + + + + Represents calculating the sum of the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Sum" call in the following example corresponds to a . + + var query = (from s in Students + select s.ID).Sum(); + + + + + + + + + Represents taking only the greatest one of the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + The semantics of "greatest" are defined by the query provider. "Max" query methods taking a selector are represented as a combination + of a and a . + + + In C#, the "Max" call in the following example corresponds to a . + + var query = (from s in Students + select s.ID).Max(); + + + + + + Initializes a new instance of the . + + + + + + + + Represents taking only the smallest one of the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + The semantics of "smallest" are defined by the query provider. "Min" query methods taking a selector are represented as a combination + of a and a . + + + In C#, the "Min" call in the following example corresponds to a . + + var query = (from s in Students + select s.ID).Min(); + + + + + + Initializes a new instance of the . + + + + + + + + Represents taking only the last one of the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + "Last" query methods taking a predicate are represented as a combination of a and a . + + + In C#, the "Last" call in the following example corresponds to a . + + var query = (from s in Students + select s).Last(); + + + + + + Initializes a new instance of the . + + The flag defines if a default expression should be regarded. + + + + + + + Represents taking only a specific number of items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Take" call in the following example corresponds to a . + + var query = (from s in Students + select s).Take(3); + + + + + + Initializes a new instance of the . + + The number of elements which should be returned. + + + + Gets the constant value of the property, assuming it is a . If it is + not, an expression is thrown. + + The constant value of the property. + + + + Represents taking only the first of the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + "First" query methods taking a predicate are represented as a combination of a and a . + + + In C#, the "First" call in the following example corresponds to a . + + var query = (from s in Students + select s).First(); + + + + + + Initializes a new instance of the . + + The flag defines if a default expression should be regarded. + + + + + + + Represents taking the single item returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Single" call in the following example corresponds to a . + + var query = (from s in Students + select s).Single(); + + + + + + Initializes a new instance of the . + + The flag defines if a default expression should be regarded. + + + + + + + Represents the removal of duplicate values from the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Distinct" call in the following example corresponds to a . + + var query = (from s in Students + select s).Distinct(); + + + + + + + + + Represents counting the number of items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + "Count" query methods taking a predicate are represented as a combination of a and a . + /// + In C#, the "Count" call in the following example corresponds to a . + + var query = (from s in Students + select s).Count(); + + + + + + + + + Represents forming the mathematical union of a given set of items and the items returned by a query. + This is a result operator, operating on the whole result set of a query. + + + In C#, the "Union" call in the following example corresponds to a . + + var query = (from s in Students + select s).Union(students2); + + + + + + Gets the value of , assuming holds a . If it doesn't, + an Exception is thrown. + + The constant value of . + + + + Gets or sets the second source of this result operator, that is, an enumerable containing the items united with the input sequence. + + + + + Provides a way to enumerate an while items are inserted, removed, or cleared in a consistent fashion. + + The element type of the . + + This class subscribes to the events exposed by and reacts on changes to the collection. + If an item is inserted or removed before the current element, the enumerator will continue after the current element without + regarding the new or removed item. If the current item is removed, the enumerator will continue with the item that previously followe the + current item. If an item is inserted or removed after the current element, the enumerator will simply continue, including the newly inserted + item and not including the removed item. + + + + + Provides an implementation of that allows storing multiple values per key. The multiple values + are represented as an of value. Access to a key without values returns an empty . + + The type of the keys of the values to be stored. + The type of the values to be stored. + + + + Extends with events that indicate when the collection was changed. + + The type of items held by this . + + + + Returns an instance of that represents this collection and can be enumerated even while the collection changes; + the enumerator will adapt to the changes (see ). + + + + + Returns an instance of that represents this collection and can be enumerated even while the collection changes; + the enumerator will adapt to the changes (see ). The enumerable will yield + instances of type , which hold both the index and the value of the current item. If this collection changes + while enumerating, will reflect those changes. + + + + + Occurs after the items of this have been cleared. + + + + + Occurs after an item has been removed from this . It does not occur when an item is replaced, in this + case the event is raised. + + + + + Occurs after an item has been added to this . It does not occur when an item is replaced, in this + case the event is raised. + + + + + Occurs after an item has been set at a specific index of this . + + + + + Represents an item enumerated by . This provides access + to the as well as the of the enumerated item. + + + + + Gets the index of the current enumerated item. Can only be called while enumerating, afterwards, it will throw an + . If an item is inserted into or removed from the collection before the current item, this + index will change. + + + + + Gets the value of the current enumerated item. Can only be called while enumerating, afterwards, it will throw an + . + + The value. + + + + Provides event data for 's events. + + The type of the items managed by the . + + + + Represents a default implementation of that is automatically used by + unless a custom is specified. The executes queries by parsing them into + an instance of type , which is then passed to an implementation of to obtain the + result set. + + + + + Provides a default implementation of that executes queries (subclasses of ) by + first parsing them into a and then passing that to a given implementation of . + Usually, should be used unless must be manually implemented. + + + + + Initializes a new instance of using a custom . Use this + constructor to customize how queries are parsed. + + The used to parse queries. Specify an instance of + for default behavior. + The used to execute queries against a specific query backend. + + + + Constructs an object that can evaluate the query represented by a specified expression tree. This + method delegates to . + + An expression tree that represents a LINQ query. + + An that can evaluate the query represented by the specified expression tree. + + + + + Constructs an object that can evaluate the query represented by a specified expression tree. This method is + called by the standard query operators defined by the class. + + An expression tree that represents a LINQ query. + + An that can evaluate the query represented by the specified expression tree. + + + + + Executes the query defined by the specified expression by parsing it with a + and then running it through the . + This method is invoked through the interface methods, for example by + and + , and it's also used by + when the is enumerated. + + + Override this method to replace the query execution mechanism by a custom implementation. + + + + + Executes the query defined by the specified expression by parsing it with a + and then running it through the . + The result is cast to . + + The type of the query result. + The query expression to be executed. + The result of the query cast to . + + This method is called by the standard query operators that return a single value, such as + or + . + In addition, it is called by to execute queries that return sequences. + + + + + Executes the query defined by the specified expression by parsing it with a + and then running it through the . + + The query expression to be executed. + The result of the query. + + This method is similar to the method, but without the cast to a defined return type. + + + + + The method generates a . + + The query as expression chain. + a + + + + Gets the used by this to parse LINQ queries. + + The query parser. + + + + Gets or sets the implementation of used to execute queries created via . + + The executor used to execute queries. + + + + Initializes a new instance of using a custom . + + + A type implementing . This type is used to construct the chain of query operators. Must be a generic type + definition. + + The used to parse queries. Specify an instance of + for default behavior. See also . + The used to execute queries against a specific query backend. + + + + Creates a new (of type with as its generic argument) that + represents the query defined by and is able to enumerate its results. + + The type of the data items returned by the query. + An expression representing the query for which a should be created. + An that represents the query defined by . + + + + Gets the type of queryable created by this provider. This is the generic type definition of an implementation of + (usually a subclass of ) with exactly one type argument. + + + + + Visits a , removing all instances from its + collection and returning objects for them. + + + Note that this visitor does not remove fetch requests from sub-queries. + + + + + Provides a default implementation of which automatically visits child items. That is, the default + implementation of automatically calls Accept on all clauses in the + and the default implementation of automatically calls on the + instances in its collection, and so on. + + + This visitor is hardened against modifications performed on the visited while the model is currently being visited. + That is, if a the collection changes while a body clause (or a child item of a body clause) is currently + being processed, the visitor will handle that gracefully. The same applies to and + . + + + + + Defines an interface for visiting the clauses of a . + + + + When implement this interface, implement , then call Accept on every clause that should + be visited. Child clauses, joins, orderings, and result operators are not visited automatically; they always need to be explicitly visited + via , , , + , and so on. + + + provides a robust default implementation of this interface that can be used as a base for other visitors. + + + + + + Holds a , a for which the fetch request was created, and the position + where the occurred in the list of the . From + this information, it builds a new that represents the as a query. + + + Use to retrieve the instances for a . + + + + + Initializes a new instance of the class. + + The fetch request. + The query model for which the was originally defined. + The result operator position where the was originally located. + The will include all result operators prior to this position into the fetch , + but it will not include any result operators occurring after (or at) that position. + + + + Creates the fetch query model for the , caching the result. + + + A new which represents the same query as but selecting + the objects described by instead of the objects selected by the + . From the original , only those result operators are included that occur + prior to . + + + + + Creates objects for the of the + . Inner fetch requests start from the fetch query model of the outer fetch request, and they have + a of 0. + + An array of objects for the of the + . + + + + Provides common functionality for and . + + + + + Provides common functionality used by all expression nodes representing fetch operations. + + + + + Acts as a base class for s standing for s that operate on the result of the query + rather than representing actual clauses, such as or . + + + + + Base class for implementations that represent instantiations of . + + + + + Interface for classes representing structural parts of an tree. + + + + + Resolves the specified by replacing any occurrence of + by the result of the projection of this . The result is an that goes all the + way to an . + + The parameter representing the input data streaming into an . This is replaced + by the projection data coming out of this . + The expression to be resolved. Any occurrence of in this expression + is replaced. + Context information used during the current parsing process. This structure maps + s to the clauses created from them. Implementers that also implement + (such as or ) must add + their clauses to the mapping in if they want to be able to implement correctly. + An equivalent of with each occurrence of replaced by + the projection data streaming out of this . + + This node does not support this operation because it does not stream any data to subsequent nodes. + + + + + Applies this to the specified query model. Nodes can add or replace clauses, add or replace expressions, + add or replace objects, or even create a completely new , depending on their semantics. + + The query model this node should be applied to. + Context information used during the current parsing process. This structure maps + s to the clauses created from them. Implementers that + also implement (such as + or ) must add their clauses to the mapping in + in order to be able to implement correctly. + The modified or a new that reflects the changes made by this node. + + For objects, which mark the end of an chain, this method must not be called. + Instead, use to generate a and instantiate a new + with that clause. + + + + + Gets the source that streams data into this node. + + The source , or if this node is the end of the chain. + + + + Gets the identifier associated with this . tries to find the identifier + that was originally associated with this node in the query written by the user by analyzing the parameter names of the next expression in the + method call chain. + + The associated identifier. + + + + Gets the from a given that has to wrap a . + If the method is a generic method, its open generic method definition is returned. + This method can be used for registration of the node type with an . + + + The method call. + + + + + Wraps the into a subquery after a node that indicates the end of the query ( + or ). Override this method + when implementing a that does not need a subquery to be created if it occurs after the query end. + + + + When an ordinary node follows a result operator or group node, it cannot simply append its clauses to the + because semantically, the result operator (or grouping) must be executed _before_ the clause. Therefore, in such scenarios, we wrap + the current query model into a that we put into the of a new + . + + + This method also changes the of this node because logically, all operations must be handled + by the new holding the . For example, consider the following call chain: + + MainSource (...) + .Select (x => x) + .Distinct () + .Select (x => x) + + + Naively, the last Select node would resolve (via Distinct and Select) to the created by the initial MainSource. + After this method is executed, however, that is part of the sub query, and a new + has been created to hold it. Therefore, we replace the chain as follows: + + MainSource (MainSource (...).Select (x => x).Distinct ()) + .Select (x => x) + + + Now, the last Select node resolves to the new . + + + + + + Sets the result type override of the given . + + The query model to set the of. + + By default, the result type override is set to in the method. This ensures that the query + model represents the type of the query correctly. Specific node parsers can override this method to set the + to another value, or to clear it (set it to ). Do not leave the + unchanged when overriding this method, as a source node might have set it to a value that doesn't + fit this node. + + + + + Provides common functionality for and . + + + + + Parses query operators that instruct the LINQ provider to fetch a collection-valued relationship starting from another fetch operation. The node + creates instances and attaches them to the preceding fetch operation (unless the previous fetch operation already + has an equivalent fetch request). + + + This class is not automatically configured for any query operator methods. LINQ provider implementations must explicitly provide and register + these methods in order for to be used. See also . + + + + + Parses query operators that instruct the LINQ provider to fetch an object-valued relationship starting from another fetch operation. The node + creates instances and attaches them to the preceding fetch operation (unless the previous fetch operation already + has an equivalent fetch request). + + + This class is not automatically configured for any query operator methods. LINQ provider implementations must explicitly provide and register + these methods in order for to be used. See also . + + + + + Parses query operators that instruct the LINQ provider to fetch a collection-valued relationship starting from the values selected by the query. + The node creates instances and adds them to the as + (unless the already has an equivalent fetch request). + + + This class is not automatically configured for any query operator methods. LINQ provider implementations must explicitly provide and register + these methods in order for to be used. See also . + + + + + Parses query operators that instruct the LINQ provider to fetch an object-valued relationship starting from the values selected by the query. + The node creates instances and adds them to the as + (unless the already has an equivalent fetch request). + + + This class is not automatically configured for any query operator methods. LINQ provider implementations must explicitly provide and register + these methods in order for to be used. See also . + + + + + Replaces nodes according to a given mapping specification. Expressions are also replaced within subqueries; the + is changed by the replacement operations, it is not copied. The replacement node is not recursively searched for + occurrences of nodes to be replaced. + + + + + Wraps an exception whose partial evaluation caused an exception. + + + + When encounters an exception while evaluating an independent expression subtree, it + will wrap the subtree within a . The wrapper contains both the + instance and the that caused the exception. + + + To explicitly support this expression type, implement . + To ignore this wrapper and only handle the inner , call the method and visit the result. + + + Subclasses of that do not implement will, + by default, automatically reduce this expression type to the in the + method. + + + Subclasses of that do not implement will, + by default, ignore this expression and visit its child expressions via the and + methods. + + + + + + Transforms a given . If the can handle the , + it should return a new, transformed instance. Otherwise, it should return the input + instance. + + The expression to be transformed. + The result of the transformation, or if no transformation was applied. + + + + Manages registration and lookup of objects, and converts them to + weakly typed instances. Use this class together with + in order to apply the registered transformers to an tree. + + + + + defines an API for classes returning instances for specific + objects. Usually, the will be used when an implementation of this + interface is needed. + + + + + Gets the transformers for the given . + + The to be transformed. + + A sequence containing objects that should be applied to the . Must not + be . + + + + + Creates an with the default transformations provided by this library already registered. + New transformers can be registered by calling . + + A default . + + Currently, the default registry contains: + + + + + + + + + + + + + + Registers the specified for the transformer's + . If + returns , the is registered as a generic transformer which will be applied to all + nodes. + + The type of expressions handled by the . This should be a type implemented by all + expressions identified by . For generic transformers, + must be . + The transformer to register. + + + The order in which transformers are registered is the same order on which they will later be applied by + . When more than one transformer is registered for a certain , + each of them will get a chance to transform a given , until the first one returns a new . + At that point, the transformation will start again with the new (and, if the expression's type has changed, potentially + different transformers). + + + When generic transformers are registered, they act as if they had been registered for all values (including + custom ones). They will be applied in the order registered, but only after all respective specific transformers have run (without modifying + the expression, which would restart the transformation process with the new expression as explained above). + + + When an is registered for an incompatible , this is not detected until + the transformer is actually applied to an of that . + + + + + + Dynamically discovers attributes implementing the interface on methods and get accessors + invoked by or instances and applies the respective + . + + + + + is implemented by classes that transform instances. The + manages registration of instances, and the + applies the transformations. + + The type of expressions handled by this implementation. + + + is a convenience interface that provides strong typing, whereas + only operates on instances. + + + can be used together with the class by using the + class as the transformation provider. converts + strongly typed instances to weakly typed delegate instances. + + + + + + Transforms a given . If the implementation can handle the , + it should return a new, transformed instance. Otherwise, it should return the input + instance. + + The expression to be transformed. + The result of the transformation, or if no transformation was applied. + + + + Gets the expression types supported by this . + + The supported expression types. Return to support all expression types. (This is only sensible when + is .) + + + + + Defines an interface for attributes providing an for a given . + + + + detects attributes implementing this interface while expressions are parsed + and uses the returned by to modify the expressions. + + + Only one attribute instance implementing must be applied to a single method or property + get accessor. + + + + + + Chooses a given for a specific method (or property get accessor). + + + The must have a default constructor. To choose a transformer that does not have a default constructor, + create your own custom attribute class implementing + . + + + + + Detects nodes for the .NET tuple types and adds metadata to those nodes. + This allows LINQ providers to match member access and constructor arguments more easily. + + + + + Provides a base class for transformers detecting nodes for tuple types and adding metadata + to those nodes. This allows LINQ providers to match member access and constructor arguments more easily. + + + + + Detects nodes for and adds metadata to those nodes. + This allows LINQ providers to match member access and constructor arguments more easily. + + + + + Detects expressions invoking a and replaces them with the body of that + (with the parameter references replaced with the invocation arguments). + Providers use this transformation to be able to handle queries with instances. + + + When the is applied to a delegate instance (rather than a + ), the ignores it. + + + + + Detects nodes for and adds metadata to those nodes. + This allows LINQ providers to match member access and constructor arguments more easily. + + + + + Replaces calls to and with casts and null checks. This allows LINQ providers + to treat nullables like reference types. + + + + + Detects expressions calling the CompareString method used by Visual Basic .NET, and replaces them with + instances. Providers use this transformation to be able to handle VB string comparisons + more easily. See for details. + + + + + Detects expressions calling the Information.IsNothing (...) method used by Visual Basic .NET, and replaces them with + instances comparing with . Providers use this transformation to be able to + handle queries using IsNothing (...) more easily. + + + + + Analyzes an expression tree by visiting each of its nodes, finding those subtrees that can be evaluated without modifying the meaning of + the tree. + + + An expression node/subtree is evaluatable if: + + it is not a or any non-standard expression, + it is not a that involves an , and + it does not have any of those non-evaluatable expressions as its children. + + nodes are not evaluatable because they usually identify the flow of + some information from one query node to the next. + nodes that involve parameters or object instances are not evaluatable because they + should usually be translated into the target query syntax. + Non-standard expressions are not evaluatable because they cannot be compiled and evaluated by LINQ. + + + + + Implements by storing a list of inner instances. + The and methods delegate to these inner instances. This is an + implementation of the Composite Pattern. + + + + + Provides a common interface for classes mapping a to the respective + type. Implementations are used by when a is encountered to + instantiate the right for the given method. + + + + + Determines whether a node type for the given can be returned by this + . + + + + + Gets the type of that matches the given , returning + if none can be found. + + + + + Implements by storing a list of inner instances. + The method calls each inner instance in the order defined by the property. This is an + implementation of the Composite Pattern. + + + + + is implemented by classes that represent steps in the process of parsing the structure + of an tree. applies a series of these steps to the + tree before analyzing the query operators and creating a . + + + + There are predefined implementations of that should only be left out when parsing an + tree when there are very good reasons to do so. + + + can be implemented to provide custom, complex transformations on an + tree. For performance reasons, avoid adding too many steps each of which visits the whole tree. For + simple transformations, consider using and - which can + batch several transformations into a single expression tree visiting run - rather than implementing a dedicated + . + + + + + + Implements the interface by doing nothing in the method. This is an + implementation of the Null Object Pattern. + + + + + Maps the objects used in objects to the respective + types based on the method names and a filter (as defined by ). + This is used by when a is encountered to instantiate the right + for the given method. + + + + + Creates a and automatically registers all types implementing + from a given type sequence that offer a public static SupportedMethodNames field. + + A with all types with a SupportedMethodNames + field registered. + + + + Registers the given for the query operator methods defined by the given + objects. + + A sequence of objects defining the methods to register the node type for. + The type of the to register. + + + + Determines whether the specified method was registered with this . + + + + + Gets the type of registered with this instance that + matches the given , returning if none can be found. + + + + + Returns the count of the registered method names. + + + + + Applies a given set of transformations to an tree. The transformations are provided by an instance of + (eg., ). + + + The uses the to apply the transformations. + It performs a single visiting run over the tree. + + + + + Initializes a new instance of the class. + + A class providing the transformations to apply to the tree, eg., an instance of + . + + + + Analyzes an tree for sub-trees that are evaluatable in-memory, and evaluates those sub-trees. + + + The uses the for partial evaluation. + It performs two visiting runs over the tree. + + + + + Represents a for the + and methods. + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for the different overloads of . + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Encapsulates contextual information used while generating clauses from instances. + + + + + Represents a for and + . + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for the , + , + , and + methods. + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for the + and + methods. + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for the + , , + , and + methods. + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for the different + overloads that do take a result selector. The overloads without a result selector are represented by + . + It is generated by when an tree is parsed. + + + The GroupBy overloads with result selector are parsed as if they were a following a + : + + x.GroupBy (k => key, e => element, (k, g) => result) + + is therefore equivalent to: + + c.GroupBy (k => key, e => element).Select (grouping => resultSub) + + where resultSub is the same as result with k and g substituted with grouping.Key and grouping, respectively. + + + + + Represents a for + . + It is generated by when an tree is parsed. + + + + + Represents a for and + and + and + + It is generated by when an tree is parsed. + When this node is used, it usually follows (or replaces) a of an chain that + represents a query. + + + + + Represents a for + . + It is generated by when an tree is parsed. + When this node is used, it usually follows (or replaces) a of an chain that + represents a query. + + + + + Thrown whan an parser cannot be instantiated for a query. + + + + + Resolves an expression using , removing transparent identifiers and detecting subqueries + in the process. This is used by methods such as , which are + used when a clause is created from an . + + + + + Represents a for the different + overloads that do not take a result selector. The overloads with a result selector are represented by + . + It is generated by when an tree is parsed. + + + + + Represents a for + + or + It is generated by when an tree is parsed. + + + + + Represents a for + . + It is generated by when an tree is parsed. + When this node is used, it usually follows (or replaces) a of an chain that + represents a query. + + + + + Represents a for + + or . + It is generated by when an tree is parsed. + + + + + Represents a for , + , + and for the property of arrays. + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for + and . + It is generated by when an tree is parsed. + + + + + Provides common functionality used by implementors of . + + + + + Replaces the given parameter with a back-reference to the corresponding to . + + The referenced node. + The parameter to replace with a . + The expression in which to replace the parameter. + The clause generation context. + , with replaced with a + pointing to the clause corresponding to . + + + + Gets the corresponding to the given , throwing an + if no such clause has been registered in the given . + + The node for which the should be returned. + The clause generation context. + The corresponding to . + + + + Caches a resolved expression in the classes. + + + + + Represents a for . + It is generated by when an tree is parsed. + When this node is used, it usually follows (or replaces) a of an chain that + represents a query. + + + + + Represents a for + It is generated by when an tree is parsed. + When this node is used, it usually follows (or replaces) a of an chain that + represents a query. + + + + + Represents a for + . + It is generated by when an tree is parsed. + When this node is used, it usually follows (or replaces) a of an chain that + represents a query. + + + + + Represents a for + . + It is generated by when an tree is parsed. + + + + + Contains metadata about a that is parsed into a . + + + + + Gets the associated identifier, i.e. the name the user gave the data streaming out of this expression. For example, the + corresponding to a from c in C clause should get the identifier "c". + If there is no user-defined identifier (or the identifier is impossible to infer from the expression tree), a generated identifier + is given instead. + + + + + Gets the source expression node, i.e. the node streaming data into the parsed node. + + The source. + + + + Gets the being parsed. + + + + + is implemented by classes taking an tree and parsing it into a . + + + The default implementation of this interface is . LINQ providers can, however, implement + themselves, eg. in order to decorate or replace the functionality of . + + + + + Gets the of the given . + + The expression tree to parse. + A that represents the query defined in . + + + + Defines a name and a filter predicate used when determining the matching expression node type by . + + + + + Implements an that throws an exception for every expression type that is not explicitly supported. + Inherit from this class to ensure that an exception is thrown when an expression is passed + + + + + Called when an unhandled item is visited. This method provides the item the visitor cannot handle (), + the that is not implemented in the visitor, and a delegate that can be used to invoke the + of the class. The default behavior of this method is to call the + method, but it can be overridden to do something else. + + The type of the item that could not be handled. Either an type, a + type, or . + The result type expected for the visited . + The unhandled item. + The visit method that is not implemented. + The behavior exposed by for this item type. + An object to replace in the expression tree. Alternatively, the method can throw any exception. + + + + can be used to build tuples incorporating a sequence of s. + For example, given three expressions, exp1, exp2, and exp3, it will build nested s that are equivalent to the + following: new KeyValuePair(exp1, new KeyValuePair(exp2, exp3)). + Given an whose type matches that of a tuple built by , the builder can also return + an enumeration of accessor expressions that can be used to access the tuple elements in the same order as they were put into the nested tuple + expression. In above example, this would yield tupleExpression.Key, tupleExpression.Value.Key, and tupleExpression.Value.Value. + This class can be handy whenever a set of needs to be put into a single + (eg., a select projection), especially if each sub-expression needs to be explicitly accessed at a later point of time (eg., to retrieve the + items from a statement surrounding a sub-statement yielding the tuple in its select projection). + + + + + Collects clauses and creates a from them. This provides a simple way to first add all the clauses and then + create the rather than the two-step approach (first and , + then the s) required by 's constructor. + + + + + Transforms an expression tree into a human-readable string, taking all the custom expression nodes into account. + It does so by replacing all instances of custom expression nodes by parameters that have the desired string as their names. This is done + to circumvent a limitation in the class, where overriding in custom expressions + will not work. + + + + + Takes a and transforms it by replacing its instances ( and + ) that contain subqueries with equivalent flattened clauses. Subqueries that contain a + (such as or ) cannot be + flattened. + + + As an example, take the following query: + + from c in Customers + from o in (from oi in OrderInfos where oi.Customer == c orderby oi.OrderDate select oi.Order) + orderby o.Product.Name + select new { c, o } + + This will be transformed into: + + from c in Customers + from oi in OrderInfos + where oi.Customer == c + orderby oi.OrderDate + orderby oi.Order.Product.Name + select new { c, oi.Order } + + As another example, take the following query: + + from c in (from o in Orders select o.Customer) + where c.Name.StartsWith ("Miller") + select c + + (This query is never produced by the , the only way to construct it is via manually building a + .) + This will be transforemd into: + + from o in Orders + where o.Customer.Name.StartsWith ("Miller") + select o + + + + + + Applies delegates obtained from an to an expression tree. + The transformations occur in post-order (transforming child nodes before parent nodes). When a transformation changes + the current , its child nodes and itself will be revisited (and may be transformed again). + + + + + Generates unique identifiers based on a set of known identifiers. + An identifier is generated by appending a number to a given prefix. The identifier is considered unique when no known identifier + exists which equals the prefix/number combination. + + + + + Adds the given to the set of known identifiers. + + The identifier to add. + + + + Gets a unique identifier starting with the given . The identifier is generating by appending a number to the + prefix so that the resulting string does not match a known identifier. + + The prefix to use for the identifier. + A unique identifier starting with . + + + + Represents a property holding one object that should be eager-fetched when a query is executed. + + + + + Base class for classes representing a property that should be eager-fetched when a query is executed. + + + + + Gets a the fetch query model, i.e. a new that incorporates a given as a + and selects the fetched items from it. + + A that yields the source items for which items are to be fetched. + A that selects the fetched items from as a subquery. + + This method does not clone the , remove result operatores, etc. Use + (via ) for the full algorithm. + + + + + Modifies the given query model for fetching, adding new instances and changing the + as needed. + This method is called by in the process of creating the new fetch query model. + + The fetch query model to modify. + + + + Gets or adds an inner eager-fetch request for this . + + The to be added. + + or, if another for the same relation member already existed, + the existing . + + + + + Gets the of the relation member whose contained object(s) should be fetched. + + The relation member. + + + + Gets the inner fetch requests that were issued for this . + + The fetch requests added via . + + + + Modifies the given query model for fetching, changing the to the fetch source expression. + For example, a fetch request such as FetchOne (x => x.Customer) will be transformed into a selecting + y.Customer (where y is what the query model originally selected). + This method is called by in the process of creating the new fetch query model. + + + + + + + + + + + Represents a relation collection property that should be eager-fetched by means of a lambda expression. + + + + + Modifies the given query model for fetching, adding an and changing the to + retrieve the result of the . + For example, a fetch request such as FetchMany (x => x.Orders) will be transformed into a selecting + y.Orders (where y is what the query model originally selected) and a selecting the result of the + . + This method is called by in the process of creating the new fetch query model. + + + + + + + + + + + Holds a number of instances keyed by the instances representing the relation members + to be eager-fetched. + + + + + Gets or adds an eager-fetch request to this . + + The to be added. + + or, if another for the same relation member already existed, + the existing . + + + + + Specifies the direction used to sort the result items in a query using an . + + + + + Sorts the items in an ascending way, from smallest to largest. + + + + + Sorts the items in an descending way, from largest to smallest. + + + + + Represents an that holds a subquery. The subquery is held by in its parsed form. + + + + + Provides a fluent interface to recursively fetch related objects of objects which themselves are eager-fetched. All query methods + are implemented as extension methods. + + The type of the objects returned by the query. + The type of object from which the recursive fetch operation should be made. + + + + Acts as a common base class for implementations based on re-linq. In a specific LINQ provider, a custom queryable + class should be derived from which supplies an implementation of that is used to + execute the query. This is then used as an entry point (the main data source) of a LINQ query. + + The type of the result items yielded by this query. + + + + Initializes a new instance of the class with a and the given + . This constructor should be used by subclasses to begin a new query. The generated by + this constructor is a pointing back to this . + + The used to parse queries. Specify an instance of + for default behavior. See also . + The used to execute the query represented by this . + + + + Initializes a new instance of the class with a specific . This constructor + should only be used to begin a query when does not fit the requirements. + + The provider used to execute the query represented by this and to construct + queries around this . + + + + Initializes a new instance of the class with a given and + . This is an infrastructure constructor that must be exposed on subclasses because it is used by + to construct queries around this when a query method (e.g. of the + class) is called. + + The provider used to execute the query represented by this and to construct + queries around this . + The expression representing the query. + + + + Executes the query via the and returns an enumerator that iterates through the items returned by the query. + + + A that can be used to iterate through the query result. + + + + + Gets the expression tree that is associated with the instance of . This expression describes the + query represented by this . + + + + The that is associated with this instance of . + + + + + Gets the query provider that is associated with this data source. The provider is used to execute the query. By default, a + is used that parses the query and passes it on to an implementation of . + + + + The that is associated with this data source. + + + + + Gets the type of the element(s) that are returned when the expression tree associated with this instance of is executed. + + + + A that represents the type of the element(s) that are returned when the expression tree associated with this object is executed. + + + + + Represents a being bound to an associated instance. This binding's + method returns only for the same the expression is bound to. + + + + + + Represents a being bound to an associated instance. This is used by the + to represent assignments in constructor calls such as new AnonymousType (a = 5), + where a is the member of AnonymousType and 5 is the associated expression. + The method can be used to check whether the member bound to an expression matches a given + (considering read access). See the subclasses for details. + + + + + Represents a being bound to an associated instance. + + This binding's + method returns for the same the expression is bound to or for a + whose getter method is the the expression is bound to. + + + + + Represents a being bound to an associated instance. + + This binding's + method returns for the same the expression is bound to + or for its getter method's . + + + + + Represents grouping the items returned by a query according to some key retrieved by a , applying by an + to the grouped items. This is a result operator, operating on the whole result set of the query. + + + In C#, the "group by" clause in the following sample corresponds to a . "s" (a reference to the query source + "s", see ) is the expression, "s.Country" is the + expression: + + var query = from s in Students + where s.First == "Hugo" + group s by s.Country; + + + + + + Initializes a new instance of the class. + + A name associated with the items generated by the result operator. + The selector retrieving the key by which to group items. + The selector retrieving the elements to group. + + + + Clones this clause, adjusting all instances held by it as defined by + . + + The clones of all query source clauses are registered with this . + A clone of this clause. + + + + Transforms all the expressions in this clause and its child objects via the given delegate. + + The transformation object. This delegate is called for each within this + clause, and those expressions will be replaced with what the delegate returns. + + + + Gets or sets the name of the items generated by this . + + + Item names are inferred when a query expression is parsed, and they usually correspond to the variable names present in that expression. + However, note that names are not necessarily unique within a . Use names only for readability and debugging, not for + uniquely identifying objects. To match an with its references, use the + property rather than the . + + + + + Gets or sets the type of the items generated by this . The item type is an instantiation of + derived from the types of and . + + + + + Gets or sets the selector retrieving the key by which to group items. + This is a resolved version of the body of the that would be + passed to . + + The key selector. + + + + Gets or sets the selector retrieving the elements to group. + This is a resolved version of the body of the that would be + passed to . + + The element selector. + + + + Replaces all nodes that equal a given with a replacement node. Expressions are also replaced within subqueries; the + is changed by the replacement operations, it is not copied. The replacement node is not recursively searched for + occurrences of the to be replaced. + + + + + Maps the objects used in objects to the respective + types. This is used by when a is encountered to instantiate the + right for the given method. + + + + + Creates a and automatically registers all types implementing + from a given type sequence that offer a public static SupportedMethods field. + + A with all types with a SupportedMethods + field registered. + + + + Gets the registerable method definition from a given . A registerable method is a object + that can be registered via a call to . When the given is passed to + and its corresponding registerable method was registered, the correct node type is returned. + + The method for which the registerable method should be retrieved. + itself, unless it is a closed generic method or declared in a closed generic type. In the latter cases, + the corresponding generic method definition respectively the method declared in a generic type definition is returned. + + + + Registers the specific with the given . The given methods must either be non-generic + or open generic method definitions. If a method has already been registered before, the later registration overwrites the earlier one. + + + + + Determines whether the specified method was registered with this . + + + + + Gets the type of registered with this instance that + matches the given , returning if none can be found. + + + + + Returns the count of the registered s. + + + + + Parses an expression tree into a chain of objects after executing a sequence of + objects. + + + + + Creates a default that already has all expression node parser defined by the re-linq assembly + registered. Users can add inner providers to register their own expression node parsers. + + A default that already has all expression node parser defined by the re-linq assembly + registered. + + + + Creates a default that already has the expression tree processing steps defined by the re-linq assembly + registered. Users can insert additional processing steps. + + The tranformation provider to be used by the included + in the result set. Use to create a default provider. + + A default that already has all expression tree processing steps defined by the re-linq assembly + registered. + + + The following steps are included: + + + (parameterized with ) + + + + + + Initializes a new instance of the class with a custom and + implementation. + + The to use when parsing trees. Use + to create an instance of that already includes all + default node types. (The can be customized as needed by adding or removing + ). + The to apply to trees before parsing their nodes. Use + to create an instance of that already includes + the default steps. (The can be customized as needed by adding or removing + ). + + + + Parses the given into a chain of instances, using + to convert expressions to nodes. + + The expression tree to parse. + A chain of instances representing the . + + + + Gets the query operator represented by . If + is already a , that is the assumed query operator. If is a + and the member's getter is registered with , a corresponding + is constructed and returned. Otherwise, is returned. + + The expression to get a query operator expression for. + A to be parsed as a query operator, or if the expression does not represent + a query operator. + + + + Infers the associated identifier for the source expression node contained in methodCallExpression.Arguments[0]. For example, for the + call chain "source.Where (i => i > 5)" (which actually reads "Where (source, i => i > 5"), the identifier "i" is associated + with the node generated for "source". If no identifier can be inferred, is returned. + + + + + Gets the node type provider used to parse instances in . + + The node type provider. + + + + Gets the processing steps used by to process the tree before analyzing its structure. + + The processing steps. + + + + Creates instances of classes implementing the interface via Reflection. + + + The classes implementing instantiated by this factory must implement a single constructor. The source and + constructor parameters handed to the method are passed on to the constructor; for each argument where no + parameter is passed, is passed to the constructor. + + + + + Represents the first expression in a LINQ query, which acts as the main query source. + It is generated by when an tree is parsed. + This node usually marks the end (i.e. the first node) of an chain that represents a query. + + + + + Represents an expression tree node that points to a query source represented by a . These expressions should always + point back, to a clause defined prior to the clause holding a . Otherwise, exceptions might be + thrown at runtime. + + + This particular expression overrides , i.e. it can be compared to another based + on the . + + + + + Determines whether the specified is equal to the current by + comparing the properties for reference equality. + + The to compare with the current . + + if the specified is a that points to the + same ; otherwise, false. + + + + + Gets the query source referenced by this expression. + + The referenced query source. + + + + Represents a for + . + It is generated by when an tree is parsed. + + + + + Represents a for + . + It is generated by when an tree is parsed. + When this node is used, it follows an , an , + a , or a . + + + + + Represents a for + . + It is generated by when an tree is parsed. + When this node is used, it follows an , an , + a , or a . + + + + + Represents a for . + It is generated by when an tree is parsed. + When this node is used, it usually follows (or replaces) a of an chain that + represents a query. + + + + + Represents a for the different overloads of . + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for , + , + or + . + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for + . + It is generated by when an tree is parsed. + + + + + Represents a for + . + It is generated by when an tree is parsed. + + + + + Represents a for , + , + for the Count properties of , , , and , + and for the property of arrays. + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for or . + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for or . + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for , + , + or + . + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for , + , + or + . + It is generated by when an tree is parsed. + When this node is used, it marks the beginning (i.e. the last node) of an chain that represents a query. + + + + + Represents a for . + It is generated by when an tree is parsed. + When this node is used, it usually follows (or replaces) a of an chain that + represents a query. + + + + + Represents a for + . + It is generated by when an tree is parsed. + This node represents an additional query source introduced to the query. + + + + + Constitutes the bridge between re-linq and a concrete query provider implementation. Concrete providers implement this interface + and calls the respective method of the interface implementation when a query is to be executed. + + + + + Executes the given as a scalar query, i.e. as a query returning a scalar value of type . + The query ends with a scalar result operator, for example a or a . + + The type of the scalar value returned by the query. + The representing the query to be executed. Analyze this via an + . + A scalar value of type that represents the query's result. + + The difference between and is in the kind of object that is returned. + is used when a query that would otherwise return a collection result set should pick a single value from the + set, for example the first, last, minimum, maximum, or only value in the set. is used when a value is + calculated or aggregated from all the values in the collection result set. This applies to, for example, item counts, average calculations, + checks for the existence of a specific item, and so on. + + + + + Executes the given as a single object query, i.e. as a query returning a single object of type + . + The query ends with a single result operator, for example a or a . + + The type of the single value returned by the query. + The representing the query to be executed. Analyze this via an + . + If , the executor must return a default value when its result set is empty; + if , it should throw an when its result set is empty. + A single value of type that represents the query's result. + + The difference between and is in the kind of object that is returned. + is used when a query that would otherwise return a collection result set should pick a single value from the + set, for example the first, last, minimum, maximum, or only value in the set. is used when a value is + calculated or aggregated from all the values in the collection result set. This applies to, for example, item counts, average calculations, + checks for the existence of a specific item, and so on. + + + + + Executes the given as a collection query, i.e. as a query returning objects of type . + The query does not end with a scalar result operator, but it can end with a single result operator, for example + or . In such a case, the returned enumerable must yield exactly + one object (or none if the last result operator allows empty result sets). + + The type of the items returned by the query. + The representing the query to be executed. Analyze this via an + . + A scalar value of type that represents the query's result. + + + + Represents the join part of a query, adding new data items and joining them with data items from previous clauses. This can either + be part of or of . The semantics of the + is that of an inner join, i.e. only combinations where both an input item and a joined item exist are returned. + + + In C#, the "join" clause in the following sample corresponds to a . The adds a new + query source to the query, selecting addresses (called "a") from the source "Addresses". It associates addresses and students by + comparing the students' "AddressID" properties with the addresses' "ID" properties. "a" corresponds to and + , "Addresses" is and the left and right side of the "equals" operator are held by + and , respectively: + + var query = from s in Students + join a in Addresses on s.AdressID equals a.ID + select new { s, a }; + + + + + + Initializes a new instance of the class. + + A name describing the items generated by this . + The type of the items generated by this . + The expression that generates the inner sequence, i.e. the items of this . + An expression that selects the left side of the comparison by which source items and inner items are joined. + An expression that selects the right side of the comparison by which source items and inner items are joined. + + + + Accepts the specified visitor by calling its + method. + + The visitor to accept. + The query model in whose context this clause is visited. + The index of this clause in the 's collection. + + + + Accepts the specified visitor by calling its + method. This overload is used when visiting a that is held by a . + + The visitor to accept. + The query model in whose context this clause is visited. + The holding this instance. + + + + Clones this clause, registering its clone with the . + + The clones of all query source clauses are registered with this . + A clone of this clause. + + + + Transforms all the expressions in this clause and its child objects via the given delegate. + + The transformation object. This delegate is called for each within this + clause, and those expressions will be replaced with what the delegate returns. + + + + Gets or sets the type of the items generated by this . + + + Changing the of a can make all objects that + point to that invalid, so the property setter should be used with care. + + + + + Gets or sets a name describing the items generated by this . + + + Item names are inferred when a query expression is parsed, and they usually correspond to the variable names present in that expression. + However, note that names are not necessarily unique within a . Use names only for readability and debugging, not for + uniquely identifying objects. To match an with its references, use the + property rather than the . + + + + + Gets or sets the inner sequence, the expression that generates the inner sequence, i.e. the items of this . + + The inner sequence. + + + + Gets or sets the outer key selector, an expression that selects the right side of the comparison by which source items and inner items are joined. + + The outer key selector. + + + + Gets or sets the inner key selector, an expression that selects the left side of the comparison by which source items and inner items are joined. + + The inner key selector. + + + + Represents the orderby part of a query, ordering data items according to some . + + + In C#, the whole "orderby" clause in the following sample (including two orderings) corresponds to an : + + var query = from s in Students + orderby s.Last, s.First + select s; + + + + + + Initializes a new instance of the class. + + + + + Accepts the specified visitor by calling its method. + + The visitor to accept. + The query model in whose context this clause is visited. + The index of this clause in the 's collection. + + + + Transforms all the expressions in this clause and its child objects via the given delegate. + + The transformation object. This delegate is called for each within this + clause, and those expressions will be replaced with what the delegate returns. + + + + Clones this clause. + + The clones of all query source clauses are registered with this . + A clone of this clause. + + + + Gets the instances that define how to sort the items coming from previous clauses. The order of the + in the collection defines their priorities. For example, { LastName, FirstName } would sort all items by + LastName, and only those items that have equal LastName values would be sorted by FirstName. + + + + + Represents a single ordering instruction in an . + + + + + Initializes a new instance of the class. + + The expression used to order the data items returned by the query. + The to use for sorting. + + + + Accepts the specified visitor by calling its method. + + The visitor to accept. + The query model in whose context this clause is visited. + The in whose context this item is visited. + The index of this item in the 's collection. + + + + Clones this item. + + The clones of all query source clauses are registered with this . + A clone of this item. + + + + Transforms all the expressions in this item via the given delegate. + + The transformation object. This delegate is called for each within this + item, and those expressions will be replaced with what the delegate returns. + + + + Gets or sets the expression used to order the data items returned by the query. + + The expression. + + + + Gets or sets the direction to use for ordering data items. + + + + + Preprocesses an expression tree for parsing. The preprocessing involves detection of sub-queries and VB-specific expressions. + + + + + Parses a and creates an from it. This is used by + for parsing whole expression trees. + + + + + Replaces expression patterns of the form new T { x = 1, y = 2 }.x () or + new T ( x = 1, y = 2 ).x () to 1 (or 2 if y is accessed instead of x). + Expressions are also replaced within subqueries; the is changed by the replacement operations, it is not copied. + + + + + Takes an tree and parses it into a by use of an . + It first transforms the tree into a chain of instances, and then calls + and in order to instantiate all the + s. With those, a is created and returned. + + + + + Initializes a new instance of the class, using default parameters for parsing. + The used has all relevant methods of the class + automatically registered, and the comprises partial evaluation, and default + expression transformations. See , + , and + for details. + + + + + Initializes a new instance of the class, using the given to + convert instances into s. Use this constructor if you wish to customize the + parser. To use a default parser (with the possibility to register custom node types), use the method. + + The expression tree parser. + + + + Gets the of the given . + + The expression tree to parse. + A that represents the query defined in . + + + + Applies all nodes to a , which is created by the trailing in the + chain. + + The entry point to the chain. + The clause generation context collecting context information during the parsing process. + A created by the training and transformed by each node in the + chain. + + + + Gets the used by to parse instances. + + The node type registry. + + + + Gets the used by to process the tree + before analyzing its structure. + + The processor. + + + + Represents the main data source in a query, producing data items that are filtered, aggregated, projected, or otherwise processed by + subsequent clauses. + + + In C#, the first "from" clause in the following sample corresponds to the : + + var query = from s in Students + from f in s.Friends + select f; + + + + + + Initializes a new instance of the class. + + A name describing the items generated by the from clause. + The type of the items generated by the from clause. + The generating data items for this from clause. + + + + Accepts the specified visitor by calling its method. + + The visitor to accept. + The query model in whose context this clause is visited. + + + + Clones this clause, registering its clone with the . + + The clones of all query source clauses are registered with this . + A clone of this clause. + + + + Provides an abstraction of an expression tree created for a LINQ query. instances are passed to LINQ providers based + on re-linq via , but you can also use to parse an expression tree by hand or construct + a manually via its constructor. + + + The different parts of the query are mapped to clauses, see , , and + . The simplest way to process all the clauses belonging to a is by implementing + (or deriving from ) and calling . + + + + + Initializes a new instance of + + The of the query. This is the starting point of the query, generating items + that are filtered and projected by the query. + The of the query. This is the end point of + the query, it defines what is actually returned for each of the items coming from the and passing the + . After it, only the modify the result of the query. + + + + Gets an object describing the data streaming out of this . If a query ends with + the , this corresponds to . If a query has + , the data is further modified by those operators. + + Gets a object describing the data streaming out of this . + + The data streamed from a is often of type instantiated + with a specific item type, unless the + query ends with a . For example, if the query ends with a , the + result type will be . + + + + + Gets the which is used by the . + + + + + + Accepts an implementation of or , as defined by the Visitor pattern. + + + + + Returns a representation of this . + + + + + Clones this , returning a new equivalent to this instance, but with its clauses being + clones of this instance's clauses. Any in the cloned clauses that points back to another clause + in this (including its subqueries) is adjusted to point to the respective clones in the cloned + . Any subquery nested in the is also cloned. + + + + + Clones this , returning a new equivalent to this instance, but with its clauses being + clones of this instance's clauses. Any in the cloned clauses that points back to another clause + in this (including its subqueries) is adjusted to point to the respective clones in the cloned + . Any subquery nested in the is also cloned. + + The defining how to adjust instances of + in the cloned . If there is a + that points out of the being cloned, specify its replacement via this parameter. At the end of the cloning process, + this object maps all the clauses in this original to the clones created in the process. + + + + + Transforms all the expressions in this 's clauses via the given delegate. + + The transformation object. This delegate is called for each within this + , and those expressions will be replaced with what the delegate returns. + + + + Returns a new name with the given prefix. The name is different from that of any added + in the . Note that clause names that are changed after the clause is added as well as names of other clauses + than from clauses are not considered when determining "unique" names. Use names only for readability and debugging, not + for uniquely identifying clauses. + + + + + Executes this via the given . By default, this indirectly calls + , but this can be modified by the . + + The to use for executing this query. + + + + Determines whether this represents an identity query. An identity query is a query without any body clauses + whose selects exactly the items produced by its . An identity query can have + . + + + if this represents an identity query; otherwise, . + + + An example for an identity query is the subquery in that is produced for the in the following + query: + + from order in ... + select order.OrderItems.Count() + + In this query, the will become a because + is treated as a query operator. The + in that has no and a trivial , + so its method returns . The outer , on the other hand, does not + have a trivial , so its method returns . + + + + + Creates a new that has this as a sub-query in its . + + The name of the new 's . + A new whose 's is a + that holds this instance. + + + + Gets or sets the query's . This is the starting point of the query, generating items that are processed by + the and projected or grouped by the . + + + + + Gets or sets the query's select clause. This is the end point of the query, it defines what is actually returned for each of the + items coming from the and passing the . After it, only the + modify the result of the query. + + + + + Gets a collection representing the query's body clauses. Body clauses take the items generated by the , + filtering (), ordering (), augmenting (), or otherwise + processing them before they are passed to the . + + + + + Gets the result operators attached to this . Result operators modify the query's result set, aggregating, + filtering, or otherwise processing the result before it is returned. + + + + + Represents the select part of a query, projecting data items according to some . + + + In C#, the "select" clause in the following sample corresponds to a . "s" (a reference to the query source "s", see + ) is the expression: + + var query = from s in Students + where s.First == "Hugo" + select s; + + + + + + Initializes a new instance of the class. + + The selector that projects the data items. + + + + Accepts the specified visitor by calling its method. + + The visitor to accept. + The query model in whose context this clause is visited. + + + + Clones this clause. + + The clones of all query source clauses are registered with this . + A clone of this clause. + + + + Transforms all the expressions in this clause and its child objects via the given delegate. + + The transformation object. This delegate is called for each within this + clause, and those expressions will be replaced with what the delegate returns. + + + + Gets an object describing the data streaming out of this . If a query ends with + the , this corresponds to the query's output data. If a query has , the data + is further modified by those operators. Use to obtain the real result type of + a query model, including the . + + Gets a object describing the data streaming out of this . + + The data streamed from a is always of type instantiated + with the type of as its generic parameter. Its corresponds to the + . + + + + + Gets the selector defining what parts of the data items are returned by the query. + + + + + Takes an expression tree and first analyzes it for evaluatable subtrees (using ), i.e. + subtrees that can be pre-evaluated before actually generating the query. Examples for evaluatable subtrees are operations on constant + values (constant folding), access to closure variables (variables used by the LINQ query that are defined in an outer scope), or method + calls on known objects or their members. In a second step, it replaces all of the evaluatable subtrees (top-down and non-recursive) by + their evaluated counterparts. + + + This visitor visits each tree node at most twice: once via the for analysis and once + again to replace nodes if possible (unless the parent node has already been replaced). + + + + + Takes an expression tree and finds and evaluates all its evaluatable subtrees. + + + + + Evaluates an evaluatable subtree, i.e. an independent expression tree that is compilable and executable + without any data being passed in. The result of the evaluation is returned as a ; if the subtree + is already a , no evaluation is performed. + + The subtree to be evaluated. + A holding the result of the evaluation. + + + + Represents the where part of a query, filtering data items according to some . + + + In C#, the "where" clause in the following sample corresponds to a : + + var query = from s in Students + where s.First == "Hugo" + select s; + + + + + + Initializes a new instance of the class. + + The predicate used to filter data items. + + + + Accepts the specified visitor by calling its method. + + The visitor to accept. + The query model in whose context this clause is visited. + The index of this clause in the 's collection. + + + + Transforms all the expressions in this clause and its child objects via the given delegate. + + The transformation object. This delegate is called for each within this + clause, and those expressions will be replaced with what the delegate returns. + + + + Clones this clause. + + The clones of all query source clauses are registered with this . + + + + + Gets the predicate, the expression representing the where condition by which the data items are filtered + + + + + TODO: add summary + + + + + This exception is thrown if an argument is empty although it must have a content. + + + + + This exception is thrown if an argument has an invalid type. + + + + + This utility class provides methods for checking arguments. + + + Some methods of this class return the value of the parameter. In some cases, this is useful because the value will be converted to another + type: + ("o", o); + } + ]]> + In some other cases, the input value is returned unmodified. This makes it easier to use the argument checks in calls to base class constructors + or property setters: + + + + + Returns the value itself if it is not and of the specified value type. + The type that must have. + The is a . + The is an instance of another type. + + + Checks whether can be assigned to . + The cannot be assigned to . + + + + Provides functionality to calculate hash codes from values and sequences. + + + + + provides code which is common in all registry classes. + + + + + Indicates the condition parameter of the assertion method. + The method itself should be marked by attribute. + The mandatory argument of the attribute is the assertion type. + + + + + + Initializes new instance of AssertionConditionAttribute + + Specifies condition type + + + + Gets condition type + + + + + Specifies assertion type. If the assertion method argument satisifes the condition, then the execution continues. + Otherwise, execution is assumed to be halted + + + + + Indicates that the marked parameter should be evaluated to true + + + + + Indicates that the marked parameter should be evaluated to false + + + + + Indicates that the marked parameter should be evaluated to null value + + + + + Indicates that the marked parameter should be evaluated to not null value + + + + + Indicates that the marked method is assertion method, i.e. it halts control flow if one of the conditions is satisfied. + To set the condition, mark one of the parameters with attribute + + + + + + Only entity marked with attribute considered used + + + + + Indicates implicit assignment to a member + + + + + Indicates implicit instantiation of a type with fixed constructor signature. + That means any unused constructor parameters won't be reported as such. + + + + + Indicates implicit instantiation of a type + + + + + Specify what is considered used implicitly when marked with or + + + + + Members of entity marked with attribute are considered used + + + + + Entity marked with attribute and all its members considered used + + + + + Indicates that the function argument should be string literal and match one of the parameters of the caller function. + For example, has such parameter. + + + + + Should be used on attributes and causes ReSharper to not mark symbols marked with such attributes as unused (as well as by other usage inspections) + + + + + Gets value indicating what is meant to be used + + + + + Indicates that IEnumarable, passed as parameter, is not enumerated. + + + + + + Indicates that the marked symbol is used implicitly (e.g. via reflection, in external library), + so this symbol will not be marked as unused (as well as by other usage inspections) + + + + + Gets value indicating what is meant to be used + + + + + Builds a string from a sequence, separating each item with a given separator string. + + + + diff --git a/packages/SharpZipLib.0.86.0/SharpZipLib.0.86.0.nupkg b/packages/SharpZipLib.0.86.0/SharpZipLib.0.86.0.nupkg new file mode 100644 index 0000000..2771748 Binary files /dev/null and b/packages/SharpZipLib.0.86.0/SharpZipLib.0.86.0.nupkg differ diff --git a/packages/SharpZipLib.0.86.0/SharpZipLib.0.86.0.nuspec b/packages/SharpZipLib.0.86.0/SharpZipLib.0.86.0.nuspec new file mode 100644 index 0000000..41019de --- /dev/null +++ b/packages/SharpZipLib.0.86.0/SharpZipLib.0.86.0.nuspec @@ -0,0 +1,19 @@ + + + + SharpZipLib + 0.86.0 + SharpZipLib + http://www.icsharpcode.net/ + http://www.icsharpcode.net/ + http://www.icsharpcode.net/OpenSource/SharpZipLib/Default.aspx + false + #ziplib (SharpZipLib, formerly NZipLib) is a Zip, GZip, Tar and BZip2 library written entirely in C# for the .NET platform. It is implemented as an assembly (installable in the GAC), and thus can easily be incorporated into other projects (in any .NET language). + #ziplib (SharpZipLib, formerly NZipLib) is a Zip, GZip, Tar and BZip2 library written entirely in C# for the .NET platform. It is implemented as an assembly (installable in the GAC), and thus can easily be incorporated into other projects (in any .NET language). + + + + + + + \ No newline at end of file diff --git a/packages/SharpZipLib.0.86.0/lib/11/ICSharpCode.SharpZipLib.dll b/packages/SharpZipLib.0.86.0/lib/11/ICSharpCode.SharpZipLib.dll new file mode 100644 index 0000000..60ef0c9 Binary files /dev/null and b/packages/SharpZipLib.0.86.0/lib/11/ICSharpCode.SharpZipLib.dll differ diff --git a/packages/SharpZipLib.0.86.0/lib/20/ICSharpCode.SharpZipLib.dll b/packages/SharpZipLib.0.86.0/lib/20/ICSharpCode.SharpZipLib.dll new file mode 100644 index 0000000..fe643eb Binary files /dev/null and b/packages/SharpZipLib.0.86.0/lib/20/ICSharpCode.SharpZipLib.dll differ diff --git a/packages/SharpZipLib.0.86.0/lib/SL3/SharpZipLib.Silverlight3.dll b/packages/SharpZipLib.0.86.0/lib/SL3/SharpZipLib.Silverlight3.dll new file mode 100644 index 0000000..e3fd962 Binary files /dev/null and b/packages/SharpZipLib.0.86.0/lib/SL3/SharpZipLib.Silverlight3.dll differ diff --git a/packages/SharpZipLib.0.86.0/lib/SL4/SharpZipLib.Silverlight4.dll b/packages/SharpZipLib.0.86.0/lib/SL4/SharpZipLib.Silverlight4.dll new file mode 100644 index 0000000..f915af4 Binary files /dev/null and b/packages/SharpZipLib.0.86.0/lib/SL4/SharpZipLib.Silverlight4.dll differ