Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Initial commit

  • Loading branch information...
commit 682d4032591a033060e3683c3f0c300c9de4d87e 1 parent 1cd1252
mchampan authored
Showing with 7,732 additions and 0 deletions.
  1. +70 −0 blocks/search/block_search.php
  2. +19 −0 blocks/search/config_global.html
  3. +120 −0 mod/wiki/lib.php
  4. +22 −0 search/README.txt
  5. +30 −0 search/Zend/Exception.php
  6. +15 −0 search/Zend/IMPORTANT.txt
  7. +27 −0 search/Zend/LICENSE.txt
  8. +36 −0 search/Zend/Search/Exception.php
  9. +614 −0 search/Zend/Search/Lucene.php
  10. +96 −0 search/Zend/Search/Lucene/Analysis/Analyzer.php
  11. +75 −0 search/Zend/Search/Lucene/Analysis/Analyzer/Common.php
  12. +78 −0 search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
  13. +46 −0 search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php
  14. +171 −0 search/Zend/Search/Lucene/Analysis/Token.php
  15. +47 −0 search/Zend/Search/Lucene/Analysis/TokenFilter.php
  16. +57 −0 search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
  17. +111 −0 search/Zend/Search/Lucene/Document.php
  18. +32 −0 search/Zend/Search/Lucene/EncodingConverter.php
  19. +36 −0 search/Zend/Search/Lucene/Exception.php
  20. +161 −0 search/Zend/Search/Lucene/Field.php
  21. +45 −0 search/Zend/Search/Lucene/Index/FieldInfo.php
  22. +575 −0 search/Zend/Search/Lucene/Index/SegmentInfo.php
  23. +519 −0 search/Zend/Search/Lucene/Index/SegmentWriter.php
  24. +72 −0 search/Zend/Search/Lucene/Index/Term.php
  25. +79 −0 search/Zend/Search/Lucene/Index/TermInfo.php
  26. +331 −0 search/Zend/Search/Lucene/Index/Writer.php
  27. +100 −0 search/Zend/Search/Lucene/Search/Query.php
  28. +439 −0 search/Zend/Search/Lucene/Search/Query/MultiTerm.php
  29. +426 −0 search/Zend/Search/Lucene/Search/Query/Phrase.php
  30. +128 −0 search/Zend/Search/Lucene/Search/Query/Term.php
  31. +108 −0 search/Zend/Search/Lucene/Search/QueryHit.php
  32. +142 −0 search/Zend/Search/Lucene/Search/QueryParser.php
  33. +104 −0 search/Zend/Search/Lucene/Search/QueryToken.php
  34. +164 −0 search/Zend/Search/Lucene/Search/QueryTokenizer.php
  35. +553 −0 search/Zend/Search/Lucene/Search/Similarity.php
  36. +105 −0 search/Zend/Search/Lucene/Search/Similarity/Default.php
  37. +61 −0 search/Zend/Search/Lucene/Search/Weight.php
  38. +135 −0 search/Zend/Search/Lucene/Search/Weight/MultiTerm.php
  39. +141 −0 search/Zend/Search/Lucene/Search/Weight/Phrase.php
  40. +146 −0 search/Zend/Search/Lucene/Search/Weight/Term.php
  41. +120 −0 search/Zend/Search/Lucene/Storage/Directory.php
  42. +272 −0 search/Zend/Search/Lucene/Storage/Directory/Filesystem.php
  43. +371 −0 search/Zend/Search/Lucene/Storage/File.php
  44. +171 −0 search/Zend/Search/Lucene/Storage/File/Filesystem.php
  45. +14 −0 search/Zend/Search/TODO.txt
  46. +15 −0 search/db/mysql.sql
  47. +21 −0 search/db/postgres7.sql
  48. +12 −0 search/documents/document.php
  49. +28 −0 search/documents/wiki_document.php
  50. +10 −0 search/index.php
  51. +152 −0 search/indexer.php
  52. +44 −0 search/indexersplash.php
  53. +59 −0 search/lib.php
  54. +116 −0 search/query.php
  55. +91 −0 search/stats.php
70 blocks/search/block_search.php
View
@@ -0,0 +1,70 @@
+<?php
+
+ /* This is the global search shortcut block - a single query can be entered, and
+ the user will be redirected to the query page where they can enter more
+ advanced queries, and view the results of their search. When searching from
+ this block, the broadest possible selection of documents is searched.
+
+ Author: Michael Champanis (mchampan)
+ Date: 2006 06 23
+
+ Todo: make strings -> get_string()
+ */
+
+ class block_search extends block_base {
+
+ function init() {
+ $this->title = "Global Search"; //get_string()
+ $this->version = 20060625;
+ } //init
+
+ // only one instance of this block is required
+ function instance_allow_multiple() {
+ return false;
+ } //instance_allow_multiple
+
+ // label and button values can be set in admin
+ function has_config() {
+ return true;
+ } //has_config
+
+ function get_content() {
+ global $CFG;
+
+ //cache block contents
+ if ($this->content !== NULL) {
+ return $this->content;
+ } //if
+
+ $this->content = new stdClass;
+
+ //lazy check for the moment
+ if (check_php_version("5.0.0")) {
+ //fetch values if defined in admin, otherwise use defaults
+ $label = (isset($CFG->block_search_text)) ? $CFG->block_search_text : "Search Moodle";
+ $button = (isset($CFG->block_search_button)) ? $CFG->block_search_button : "Go";
+
+ //basic search form
+ $this->content->text =
+ '<form name="query" method="post" action="search/query.php">'
+ . "<label for=''>$label</label>"
+ . '<input type="text" name="query_string" length="50" value=""/>'
+ . '<input type="submit" value="'.$button.'"/>'
+ . '</form>';
+ } else {
+ $this->content->text = "Sorry folks, PHP 5 is needed for the new search module.";
+ } //else
+
+ //no footer, thanks
+ $this->content->footer = '';
+
+ return $this->content;
+ } //get_content
+
+ function specialisation() {
+ //empty!
+ } //specialisation
+
+ } //block_search
+
+?>
19 blocks/search/config_global.html
View
@@ -0,0 +1,19 @@
+<div style="text-align:center;">
+ <label for="block_search_text">Search label</label>
+ <input type="text" name="block_search_text" value="<?php
+ if(isset($CFG->block_search_text)) {
+ p($CFG->block_search_text);
+ } else {
+ p("Search Moodle");
+ } ?>"/><br>
+
+ <label for="block_search_button">Button label</label>
+ <input type="text" name="block_search_button" value="<?php
+ if(isset($CFG->block_search_button)) {
+ p($CFG->block_search_button);
+ } else {
+ p("Go");
+ } ?>"/><br><br>
+
+ <input type="submit" value="<?php print_string('savechanges'); ?>" />
+</div>
120 mod/wiki/lib.php
View
@@ -352,6 +352,126 @@ function wiki_get_entries(&$wiki, $byindex=NULL) {
}
}
+
+/*==== Global search modifications
+ * Author: Michael Champanis (mchampan)
+ * Last date: 2006 06 25
+ * These modifications allow wiki documents to be indexed in the new
+ * search engine module - they are probably not final, and as such
+ * shouldn't be used by other stuff for the time being
+ **/
+
+//rescued and converted from ewikimoodlelib.php
+//retrieves latest version of a page
+function wiki_get_latest_page(&$entry, $pagename, $version=0) {
+ global $CFG;
+
+ //need something like this in datalib.php?
+ switch ($CFG->dbtype) {
+ case 'mysql':
+ $f = 'mysql_real_escape_string';
+ break;
+ case 'postgres7':
+ $f = 'pg_escape_string';
+ break;
+ default:
+ $f = 'addslashes';
+ } //switch
+
+ $pagename = "'".$f($pagename)."'";
+
+ if ($version > 0 and is_int($version)) {
+ $version = "AND (version=$version)";
+ } else {
+ $version = '';
+ } //else
+
+ $select = "(pagename=$pagename) AND wiki=".$entry->id." $version ";
+ $sort = 'version DESC';
+
+ //change this to recordset_select, as per http://docs.moodle.org/en/Datalib_Notes
+ if ($result_arr = get_records_select('wiki_pages', $select, $sort, '*', 0, 1)) {
+ foreach ($result_arr as $obj) {
+ $result_obj = $obj;
+ } //foreach
+ } //if
+
+ if (isset($result_obj)) {
+ $result_obj->meta = @unserialize($result_obj->meta);
+ return $result_obj;
+ } else {
+ return false;
+ } //else
+} //wiki_get_latest_page
+
+//fetches all pages, including old versions
+function wiki_get_pages(&$entry) {
+ return get_records('wiki_pages', 'wiki', $entry->id);
+} //wiki_get_pages
+
+//fetches all the latest versions of all the pages
+function wiki_get_latest_pages(&$entry) {
+ //== (My)SQL for this
+ /* select * from wiki_pages
+ inner join
+ (select wiki_pages.pagename, max(wiki_pages.version) as ver
+ from wiki_pages group by pagename) as a
+ on ((wiki_pages.version = a.ver) and
+ (wiki_pages.pagename like a.pagename)) */
+
+ $pages = array();
+
+ //http://moodle.org/bugs/bug.php?op=show&bugid=5877&pos=0
+ //if ($ids = get_records('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) {
+ if ($rs = get_recordset('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) {
+ $ids = $rs->GetRows();
+ //--
+ foreach ($ids as $id) {
+ $pages[] = wiki_get_latest_page($entry, $id[0]);
+ } //foreach
+ } else {
+ return false;
+ } //else
+
+ return $pages;
+} //wiki_get_latest_pages
+
+function wiki_iterator() {
+ return get_all_instances_in_courses("wiki", get_courses());
+} //wiki_search_index
+
+function wiki_get_content_for_index(&$wiki) {
+ $documents = array();
+
+ $entries = wiki_get_entries($wiki);
+ foreach($entries as $entry) {
+ //all pages
+ //$pages = wiki_get_pages($entry);
+
+ //latest pages
+ $pages = wiki_get_latest_pages($entry);
+ $i = 0;
+
+ if (is_array($pages)) {
+ foreach($pages as $page) {
+ if (strlen($page->content) > 0) {
+ $i++;
+ $documents[] = new WikiSearchDocument($page, $entry->wikiid, $entry->course, $entry->userid, $entry->groupid);
+ } //if
+ } //foreach
+
+ //print "$entry->id : $i"; print "<br>";
+ } else {
+ print $pages;
+ } //else
+ } //foreach
+
+ return $documents;
+} //wiki_get_content_for_index
+
+/*==== Global search modifications end */
+
+
function wiki_get_default_entry(&$wiki, &$course, $userid=0, $groupid=0) {
/// Returns the wiki entry according to the wiki type.
/// Optionally, will return wiki entry for $userid student wiki, or
22 search/README.txt
View
@@ -0,0 +1,22 @@
+This is the initial release (prototype) of Moodle's new search module -
+so basically watch out for sharp edges.
+
+The structure has not been finalised, but this is what is working at the
+moment, when I start looking at other content to index, it will most likely
+change. I don't recommend trying to make your own content modules indexable,
+at least not until the whole flow is finalised. I will be implementing the
+functions needed to index all of the default content modules on Moodle, so
+expect that around mid-August.
+
+Wiki pages were my goal for this release, they can be indexed and searched,
+but not updated or deleted at this stage (was waiting for ZF 0.14 actually).
+
+I need to check the PostgreSQL sql file, I don't have a PG7 install lying
+around to test on, so the script is untested.
+
+To index for the first time, login as an admin user and browse to /search/index.php
+or /search/stats.php - there will be a message and a link telling you to go index.
+
+-- Michael Champanis (mchampan)
+ cynnical@gmail.com
+ Summer of Code 2006
30 search/Zend/Exception.php
View
@@ -0,0 +1,30 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/**
+ * @category Zend
+ * @package Zend
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Exception extends Exception
+{}
+
15 search/Zend/IMPORTANT.txt
View
@@ -0,0 +1,15 @@
+We are running cutting-edge (i.e. HEAD) Zend Framework:
+ URL: http://framework.zend.com/svn/framework/trunk
+ Revision: 696
+ Last Changed Rev: 696
+ Last Changed Date: 2006-06-23 02:14:54 +0200 (Fri, 23 Jun 2006)
+
+This Zend Framework present in this directory only contains the minimum
+to run Zend_Search_Lucene - I don't foresee any problems, since the license
+is new BSD...
+
+To obtain a full Zend Framework package, please visit:
+ http://framework.zend.com/
+
+Or alternatively check it out from SVN:
+ svn checkout http://framework.zend.com/svn/framework/trunk
27 search/Zend/LICENSE.txt
View
@@ -0,0 +1,27 @@
+Copyright (c) 2006, Zend Technologies USA, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of Zend Technologies USA, Inc. nor the names of its
+ contributors may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 search/Zend/Search/Exception.php
View
@@ -0,0 +1,36 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/**
+ * Framework base exception
+ */
+require_once 'Zend/Exception.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Exception extends Zend_Exception
+{}
+
614 search/Zend/Search/Lucene.php
View
@@ -0,0 +1,614 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Document */
+require_once 'Zend/Search/Lucene/Document.php';
+
+/** Zend_Search_Lucene_Storage_Directory */
+require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
+
+/** Zend_Search_Lucene_Index_Term */
+require_once 'Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Index_TermInfo */
+require_once 'Zend/Search/Lucene/Index/TermInfo.php';
+
+/** Zend_Search_Lucene_Index_SegmentInfo */
+require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
+
+/** Zend_Search_Lucene_Index_FieldInfo */
+require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
+
+/** Zend_Search_Lucene_Index_Writer */
+require_once 'Zend/Search/Lucene/Index/Writer.php';
+
+/** Zend_Search_Lucene_Search_QueryParser */
+require_once 'Zend/Search/Lucene/Search/QueryParser.php';
+
+/** Zend_Search_Lucene_Search_QueryHit */
+require_once 'Zend/Search/Lucene/Search/QueryHit.php';
+
+/** Zend_Search_Lucene_Search_Similarity */
+require_once 'Zend/Search/Lucene/Search/Similarity.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene
+{
+ /**
+ * File system adapter.
+ *
+ * @var Zend_Search_Lucene_Storage_Directory
+ */
+ private $_directory = null;
+
+ /**
+ * File system adapter closing option
+ *
+ * @var boolean
+ */
+ private $_closeDirOnExit = true;
+
+ /**
+ * Writer for this index, not instantiated unless required.
+ *
+ * @var Zend_Search_Lucene_Index_Writer
+ */
+ private $_writer = null;
+
+ /**
+ * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
+ *
+ * @var array Zend_Search_Lucene_Index_SegmentInfo
+ */
+ private $_segmentInfos = array();
+
+ /**
+ * Number of documents in this index.
+ *
+ * @var integer
+ */
+ private $_docCount = 0;
+
+ /**
+ * Flag for index changes
+ *
+ * @var boolean
+ */
+ private $_hasChanges = false;
+
+ /**
+ * Opens the index.
+ *
+ * IndexReader constructor needs Directory as a parameter. It should be
+ * a string with a path to the index folder or a Directory object.
+ *
+ * @param mixed $directory
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function __construct($directory = null, $create = false)
+ {
+ if ($directory === null) {
+ throw new Zend_Search_Exception('No index directory specified');
+ }
+
+ if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
+ $this->_directory = $directory;
+ $this->_closeDirOnExit = false;
+ } else {
+ $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
+ $this->_closeDirOnExit = true;
+ }
+
+ if ($create) {
+ $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, true);
+ } else {
+ $this->_writer = null;
+ }
+
+ $this->_segmentInfos = array();
+
+ $segmentsFile = $this->_directory->getFileObject('segments');
+
+ $format = $segmentsFile->readInt();
+
+ if ($format != (int)0xFFFFFFFF) {
+ throw new Zend_Search_Lucene_Exception('Wrong segments file format');
+ }
+
+ // read version
+ $segmentsFile->readLong();
+
+ // read counter
+ $segmentsFile->readInt();
+
+ $segments = $segmentsFile->readInt();
+
+ $this->_docCount = 0;
+
+ // read segmentInfos
+ for ($count = 0; $count < $segments; $count++) {
+ $segName = $segmentsFile->readString();
+ $segSize = $segmentsFile->readInt();
+ $this->_docCount += $segSize;
+
+ $this->_segmentInfos[$count] =
+ new Zend_Search_Lucene_Index_SegmentInfo($segName,
+ $segSize,
+ $this->_directory);
+ }
+ }
+
+
+ /**
+ * Object destructor
+ */
+ public function __destruct()
+ {
+ $this->commit();
+
+ if ($this->_closeDirOnExit) {
+ $this->_directory->close();
+ }
+ }
+
+ /**
+ * Returns an instance of Zend_Search_Lucene_Index_Writer for the index
+ *
+ * @return Zend_Search_Lucene_Index_Writer
+ */
+ public function getIndexWriter()
+ {
+ if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
+ $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);
+ }
+
+ return $this->_writer;
+ }
+
+
+ /**
+ * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
+ *
+ * @return Zend_Search_Lucene_Storage_Directory
+ */
+ public function getDirectory()
+ {
+ return $this->_directory;
+ }
+
+
+ /**
+ * Returns the total number of documents in this index.
+ *
+ * @return integer
+ */
+ public function count()
+ {
+ return $this->_docCount;
+ }
+
+
+ /**
+ * Performs a query against the index and returns an array
+ * of Zend_Search_Lucene_Search_QueryHit objects.
+ * Input is a string or Zend_Search_Lucene_Search_Query.
+ *
+ * @param mixed $query
+ * @return array ZSearchHit
+ */
+ public function find($query)
+ {
+ if (is_string($query)) {
+ $query = Zend_Search_Lucene_Search_QueryParser::parse($query);
+ }
+
+ if (!$query instanceof Zend_Search_Lucene_Search_Query) {
+ throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
+ }
+
+ $this->commit();
+
+ $hits = array();
+ $scores = array();
+
+ $docNum = $this->count();
+ for( $count=0; $count < $docNum; $count++ ) {
+ $docScore = $query->score( $count, $this);
+ if( $docScore != 0 ) {
+ $hit = new Zend_Search_Lucene_Search_QueryHit($this);
+ $hit->id = $count;
+ $hit->score = $docScore;
+
+ $hits[] = $hit;
+ $scores[] = $docScore;
+ }
+ }
+ array_multisort($scores, SORT_DESC, SORT_REGULAR, $hits);
+
+ return $hits;
+ }
+
+
+ /**
+ * Returns a list of all unique field names that exist in this index.
+ *
+ * @param boolean $indexed
+ * @return array
+ */
+ public function getFieldNames($indexed = false)
+ {
+ $result = array();
+ foreach( $this->_segmentInfos as $segmentInfo ) {
+ $result = array_merge($result, $segmentInfo->getFields($indexed));
+ }
+ return $result;
+ }
+
+
+ /**
+ * Returns a Zend_Search_Lucene_Document object for the document
+ * number $id in this index.
+ *
+ * @param integer|Zend_Search_Lucene_Search_QueryHit $id
+ * @return Zend_Search_Lucene_Document
+ */
+ public function getDocument($id)
+ {
+ if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
+ /* @var $id Zend_Search_Lucene_Search_QueryHit */
+ $id = $id->id;
+ }
+
+ if ($id >= $this->_docCount) {
+ throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
+ }
+
+ $segCount = 0;
+ $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();
+ while( $nextSegmentStartId <= $id ) {
+ $segCount++;
+ $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();
+ }
+ $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();
+
+ $fdxFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdx');
+ $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR );
+ $fieldValuesPosition = $fdxFile->readLong();
+
+ $fdtFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdt');
+ $fdtFile->seek( $fieldValuesPosition, SEEK_CUR );
+ $fieldCount = $fdtFile->readVInt();
+
+ $doc = new Zend_Search_Lucene_Document();
+ for( $count = 0; $count < $fieldCount; $count++ ) {
+ $fieldNum = $fdtFile->readVInt();
+ $bits = $fdtFile->readByte();
+
+ $fieldInfo = $this->_segmentInfos[ $segCount ]->getField($fieldNum);
+
+ if( !($bits & 2) ) { // Text data
+ $field = new Zend_Search_Lucene_Field($fieldInfo->name,
+ $fdtFile->readString(),
+ true,
+ $fieldInfo->isIndexed,
+ $bits & 1 );
+ } else {
+ $field = new Zend_Search_Lucene_Field($fieldInfo->name,
+ $fdtFile->readBinary(),
+ true,
+ $fieldInfo->isIndexed,
+ $bits & 1 );
+ }
+
+ $doc->addField($field);
+ }
+
+ return $doc;
+ }
+
+
+ /**
+ * Returns an array of all the documents which contain term.
+ *
+ * @param Zend_Search_Lucene_Index_Term $term
+ * @return array
+ */
+ public function termDocs(Zend_Search_Lucene_Index_Term $term)
+ {
+ $result = array();
+ $segmentStartDocId = 0;
+
+ foreach ($this->_segmentInfos as $segInfo) {
+ $termInfo = $segInfo->getTermInfo($term);
+
+ if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
+ $segmentStartDocId += $segInfo->count();
+ continue;
+ }
+
+ $frqFile = $segInfo->openCompoundFile('.frq');
+ $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
+ $docId = 0;
+ for( $count=0; $count < $termInfo->docFreq; $count++ ) {
+ $docDelta = $frqFile->readVInt();
+ if( $docDelta % 2 == 1 ) {
+ $docId += ($docDelta-1)/2;
+ } else {
+ $docId += $docDelta/2;
+ // read freq
+ $frqFile->readVInt();
+ }
+
+ $result[] = $segmentStartDocId + $docId;
+ }
+
+ $segmentStartDocId += $segInfo->count();
+ }
+
+ return $result;
+ }
+
+
+ /**
+ * Returns an array of all term positions in the documents.
+ * Return array structure: array( docId => array( pos1, pos2, ...), ...)
+ *
+ * @param Zend_Search_Lucene_Index_Term $term
+ * @return array
+ */
+ public function termPositions(Zend_Search_Lucene_Index_Term $term)
+ {
+ $result = array();
+ $segmentStartDocId = 0;
+ foreach( $this->_segmentInfos as $segInfo ) {
+ $termInfo = $segInfo->getTermInfo($term);
+
+ if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
+ $segmentStartDocId += $segInfo->count();
+ continue;
+ }
+
+ $frqFile = $segInfo->openCompoundFile('.frq');
+ $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
+ $freqs = array();
+ $docId = 0;
+
+ for( $count = 0; $count < $termInfo->docFreq; $count++ ) {
+ $docDelta = $frqFile->readVInt();
+ if( $docDelta % 2 == 1 ) {
+ $docId += ($docDelta-1)/2;
+ $freqs[ $docId ] = 1;
+ } else {
+ $docId += $docDelta/2;
+ $freqs[ $docId ] = $frqFile->readVInt();
+ }
+ }
+
+ $prxFile = $segInfo->openCompoundFile('.prx');
+ $prxFile->seek($termInfo->proxPointer,SEEK_CUR);
+ foreach ($freqs as $docId => $freq) {
+ $termPosition = 0;
+ $positions = array();
+
+ for ($count = 0; $count < $freq; $count++ ) {
+ $termPosition += $prxFile->readVInt();
+ $positions[] = $termPosition;
+ }
+
+ $result[ $segmentStartDocId + $docId ] = $positions;
+ }
+
+ $segmentStartDocId += $segInfo->count();
+ }
+
+ return $result;
+ }
+
+
+ /**
+ * Returns the number of documents in this index containing the $term.
+ *
+ * @param Zend_Search_Lucene_Index_Term $term
+ * @return integer
+ */
+ public function docFreq(Zend_Search_Lucene_Index_Term $term)
+ {
+ $result = 0;
+ foreach ($this->_segmentInfos as $segInfo) {
+ $termInfo = $segInfo->getTermInfo($term);
+ if ($termInfo !== null) {
+ $result += $termInfo->docFreq;
+ }
+ }
+
+ return $result;
+ }
+
+
+ /**
+ * Retrive similarity used by index reader
+ *
+ * @return Zend_Search_Lucene_Search_Similarity
+ */
+ public function getSimilarity()
+ {
+ return Zend_Search_Lucene_Search_Similarity::getDefault();
+ }
+
+
+ /**
+ * Returns a normalization factor for "field, document" pair.
+ *
+ * @param integer $id
+ * @param string $fieldName
+ * @return Zend_Search_Lucene_Document
+ */
+ public function norm( $id, $fieldName )
+ {
+ if ($id >= $this->_docCount) {
+ return null;
+ }
+
+ $segmentStartId = 0;
+ foreach ($this->_segmentInfos as $segInfo) {
+ if ($segmentStartId + $segInfo->count() > $id) {
+ break;
+ }
+
+ $segmentStartId += $segInfo->count();
+ }
+
+ if ($segInfo->isDeleted($id - $segmentStartId)) {
+ return 0;
+ }
+
+ return $segInfo->norm($id - $segmentStartId, $fieldName);
+ }
+
+ /**
+ * Returns true if any documents have been deleted from this index.
+ *
+ * @return boolean
+ */
+ public function hasDeletions()
+ {
+ foreach ($this->_segmentInfos as $segmentInfo) {
+ if ($segmentInfo->hasDeletions()) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+
+ /**
+ * Deletes a document from the index.
+ * $id is an internal document id
+ *
+ * @param integer|Zend_Search_Lucene_Search_QueryHit $id
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function delete($id)
+ {
+ if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
+ /* @var $id Zend_Search_Lucene_Search_QueryHit */
+ $id = $id->id;
+ }
+
+ if ($id >= $this->_docCount) {
+ throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
+ }
+
+ $segCount = 0;
+ $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();
+ while( $nextSegmentStartId <= $id ) {
+ $segCount++;
+ $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();
+ }
+
+ $this->_hasChanges = true;
+ $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();
+ $this->_segmentInfos[ $segCount ]->delete($id - $segmentStartId);
+ }
+
+
+
+ /**
+ * Adds a document to this index.
+ *
+ * @param Zend_Search_Lucene_Document $document
+ */
+ public function addDocument(Zend_Search_Lucene_Document $document)
+ {
+ if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
+ $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);
+ }
+
+ $this->_writer->addDocument($document);
+ }
+
+
+ /**
+ * Commit changes resulting from delete() or undeleteAll() operations.
+ *
+ * @todo delete() and undeleteAll processing.
+ */
+ public function commit()
+ {
+ if ($this->_hasChanges) {
+ foreach ($this->_segmentInfos as $segInfo) {
+ $segInfo->writeChanges();
+ }
+
+ $this->_hasChanges = false;
+ }
+
+ if ($this->_writer !== null) {
+ foreach ($this->_writer->commit() as $segmentName => $segmentInfo) {
+ if ($segmentInfo !== null) {
+ $this->_segmentInfos[] = $segmentInfo;
+ $this->_docCount += $segmentInfo->count();
+ } else {
+ foreach ($this->_segmentInfos as $segId => $segInfo) {
+ if ($segInfo->getName() == $segmentName) {
+ unset($this->_segmentInfos[$segId]);
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ /*************************************************************************
+ @todo UNIMPLEMENTED
+ *************************************************************************/
+
+ /**
+ * Returns an array of all terms in this index.
+ *
+ * @todo Implementation
+ * @return array
+ */
+ public function terms()
+ {
+ return array();
+ }
+
+
+ /**
+ * Undeletes all documents currently marked as deleted in this index.
+ *
+ * @todo Implementation
+ */
+ public function undeleteAll()
+ {}
+}
96 search/Zend/Search/Lucene/Analysis/Analyzer.php
View
@@ -0,0 +1,96 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Token */
+require_once 'Zend/Search/Lucene/Analysis/Token.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
+
+
+
+/**
+ * An Analyzer is used to analyze text.
+ * It thus represents a policy for extracting index terms from text.
+ *
+ * Note:
+ * Lucene Java implementation is oriented to streams. It provides effective work
+ * with a huge documents (more then 20Mb).
+ * But engine itself is not oriented such documents.
+ * Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays).
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+abstract class Zend_Search_Lucene_Analysis_Analyzer
+{
+ /**
+ * The Analyzer implementation used by default.
+ *
+ * @var Zend_Search_Lucene_Analysis_Analyzer
+ */
+ static private $_defaultImpl;
+
+ /**
+ * Tokenize text to a terms
+ * Returns array of Zend_Search_Lucene_Analysis_Token objects
+ *
+ * @param string $data
+ * @return array
+ */
+ abstract public function tokenize($data);
+
+
+ /**
+ * Set the default Analyzer implementation used by indexing code.
+ *
+ * @param Zend_Search_Lucene_Analysis_Analyzer $similarity
+ */
+ static public function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer)
+ {
+ self::$_defaultImpl = $analyzer;
+ }
+
+
+ /**
+ * Return the default Analyzer implementation used by indexing code.
+ *
+ * @return Zend_Search_Lucene_Analysis_Analyzer
+ */
+ static public function getDefault()
+ {
+ if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) {
+ self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive();
+ }
+
+ return self::$_defaultImpl;
+ }
+
+}
+
75 search/Zend/Search/Lucene/Analysis/Analyzer/Common.php
View
@@ -0,0 +1,75 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
+
+
+/**
+ * Common implementation of the Zend_Search_Lucene_Analysis_Analyzer interface.
+ * There are several standard standard subclasses provided by Zend_Search_Lucene/Analysis
+ * subpackage: Zend_Search_Lucene_Analysis_Analyzer_Common_Text, ZSearchHTMLAnalyzer, ZSearchXMLAnalyzer.
+ *
+ * @todo ZSearchHTMLAnalyzer and ZSearchXMLAnalyzer implementation
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+abstract class Zend_Search_Lucene_Analysis_Analyzer_Common extends Zend_Search_Lucene_Analysis_Analyzer
+{
+ /**
+ * The set of Token filters applied to the Token stream.
+ * Array of Zend_Search_Lucene_Analysis_TokenFilter objects.
+ *
+ * @var array
+ */
+ private $_filters = array();
+
+ /**
+ * Add Token filter to the Analyzer
+ *
+ * @param Zend_Search_Lucene_Analysis_TokenFilter $filter
+ */
+ public function addFilter(Zend_Search_Lucene_Analysis_TokenFilter $filter)
+ {
+ $this->_filters[] = $filter;
+ }
+
+ /**
+ * Apply filters to the token.
+ *
+ * @param Zend_Search_Lucene_Analysis_Token $token
+ * @return Zend_Search_Lucene_Analysis_Token
+ */
+ public function normalize(Zend_Search_Lucene_Analysis_Token $token)
+ {
+ foreach ($this->_filters as $filter) {
+ $token = $filter->normalize($token);
+ }
+
+ return $token;
+ }
+}
+
78 search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
View
@@ -0,0 +1,78 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common
+{
+ /**
+ * Tokenize text to a terms
+ * Returns array of Zend_Search_Lucene_Analysis_Token objects
+ *
+ * @param string $data
+ * @return array
+ */
+ public function tokenize($data)
+ {
+ $tokenStream = array();
+
+ $position = 0;
+ while ($position < strlen($data)) {
+ // skip white space
+ while ($position < strlen($data) && !ctype_alpha( $data{$position} )) {
+ $position++;
+ }
+
+ $termStartPosition = $position;
+
+ // read token
+ while ($position < strlen($data) && ctype_alpha( $data{$position} )) {
+ $position++;
+ }
+
+ // Empty token, end of stream.
+ if ($position == $termStartPosition) {
+ break;
+ }
+
+ $token = new Zend_Search_Lucene_Analysis_Token(substr($data,
+ $termStartPosition,
+ $position-$termStartPosition),
+ $termStartPosition,
+ $position);
+ $tokenStream[] = $this->normalize($token);
+ }
+
+ return $tokenStream;
+ }
+}
+
46 search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php
View
@@ -0,0 +1,46 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
+
+/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCase */
+require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+class Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_Text
+{
+ public function __construct()
+ {
+ $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCase());
+ }
+}
+
171 search/Zend/Search/Lucene/Analysis/Token.php
View
@@ -0,0 +1,171 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Analysis_Token
+{
+ /**
+ * The text of the term.
+ *
+ * @var string
+ */
+ private $_termText;
+
+ /**
+ * Start in source text.
+ *
+ * @var integer
+ */
+ private $_startOffset;
+
+ /**
+ * End in source text
+ *
+ * @var integer
+ */
+ private $_endOffset;
+
+ /**
+ * Lexical type.
+ *
+ * @var string
+ */
+ private $_type;
+
+ /**
+ * The position of this token relative to the previous Token.
+ *
+ * The default value is one.
+ *
+ * Some common uses for this are:
+ * Set it to zero to put multiple terms in the same position. This is
+ * useful if, e.g., a word has multiple stems. Searches for phrases
+ * including either stem will match. In this case, all but the first stem's
+ * increment should be set to zero: the increment of the first instance
+ * should be one. Repeating a token with an increment of zero can also be
+ * used to boost the scores of matches on that token.
+ *
+ * Set it to values greater than one to inhibit exact phrase matches.
+ * If, for example, one does not want phrases to match across removed stop
+ * words, then one could build a stop word filter that removes stop words and
+ * also sets the increment to the number of stop words removed before each
+ * non-stop word. Then exact phrase queries will only match when the terms
+ * occur with no intervening stop words.
+ *
+ * @var integer
+ */
+ private $_positionIncrement;
+
+
+ /**
+ * Object constructor
+ *
+ * @param string $text
+ * @param integer $start
+ * @param integer $end
+ * @param string $type
+ */
+ public function __construct($text, $start, $end, $type = 'word' )
+ {
+ $this->_termText = $text;
+ $this->_startOffset = $start;
+ $this->_endOffset = $end;
+ $this->_type = $type;
+
+ $this->_positionIncrement = 1;
+ }
+
+
+ /**
+ * positionIncrement setter
+ *
+ * @param integer $positionIncrement
+ */
+ public function setPositionIncrement($positionIncrement)
+ {
+ $this->_positionIncrement = $positionIncrement;
+ }
+
+ /**
+ * Returns the position increment of this Token.
+ *
+ * @return integer
+ */
+ public function getPositionIncrement()
+ {
+ return $this->_positionIncrement;
+ }
+
+ /**
+ * Returns the Token's term text.
+ *
+ * @return string
+ */
+ public function getTermText()
+ {
+ return $this->_termText;
+ }
+
+ /**
+ * Returns this Token's starting offset, the position of the first character
+ * corresponding to this token in the source text.
+ *
+ * Note:
+ * The difference between getEndOffset() and getStartOffset() may not be equal
+ * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
+ * by a stemmer or some other filter.
+ *
+ * @return integer
+ */
+ public function getStartOffset()
+ {
+ return $this->_startOffset;
+ }
+
+ /**
+ * Returns this Token's ending offset, one greater than the position of the
+ * last character corresponding to this token in the source text.
+ *
+ * @return integer
+ */
+ public function getEndOffset()
+ {
+ return $this->_endOffset;
+ }
+
+ /**
+ * Returns this Token's lexical type. Defaults to 'word'.
+ *
+ * @return string
+ */
+ public function getType()
+ {
+ return $this->_type;
+ }
+}
+
47 search/Zend/Search/Lucene/Analysis/TokenFilter.php
View
@@ -0,0 +1,47 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Token */
+require_once 'Zend/Search/Lucene/Analysis/Token.php';
+
+
+/**
+ * Token filter converts (normalizes) Token ore removes it from a token stream.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+abstract class Zend_Search_Lucene_Analysis_TokenFilter
+{
+ /**
+ * Normalize Token or remove it (if null is returned)
+ *
+ * @param Zend_Search_Lucene_Analysis_Token $srcToken
+ * @return Zend_Search_Lucene_Analysis_Token
+ */
+ abstract public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken);
+}
+
57 search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
View
@@ -0,0 +1,57 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_TokenFilter */
+require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
+
+
+/**
+ * Lower case Token filter.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+class Zend_Search_Lucene_Analysis_TokenFilter_LowerCase extends Zend_Search_Lucene_Analysis_TokenFilter
+{
+ /**
+ * Normalize Token or remove it (if null is returned)
+ *
+ * @param Zend_Search_Lucene_Analysis_Token $srcToken
+ * @return Zend_Search_Lucene_Analysis_Token
+ */
+ public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
+ {
+ $newToken = new Zend_Search_Lucene_Analysis_Token(strtolower( $srcToken->getTermText() ),
+ $srcToken->getStartOffset(),
+ $srcToken->getEndOffset(),
+ $srcToken->getType());
+
+ $newToken->setPositionIncrement($srcToken->getPositionIncrement());
+
+ return $newToken;
+ }
+}
+
111 search/Zend/Search/Lucene/Document.php
View
@@ -0,0 +1,111 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Document
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Field */
+require_once 'Zend/Search/Lucene/Field.php';
+
+
+/**
+ * A Document is a set of fields. Each field has a name and a textual value.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Document
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Document
+{
+
+ /**
+ * Associative array Zend_Search_Lucene_Field objects where the keys to the
+ * array are the names of the fields.
+ *
+ * @var array
+ */
+ protected $_fields = array();
+
+ public $boost = 1.0;
+
+
+ /**
+ * Proxy method for getFieldValue(), provides more convenient access to
+ * the string value of a field.
+ *
+ * @param $offset
+ * @return string
+ */
+ public function __get($offset)
+ {
+ return $this->getFieldValue($offset);
+ }
+
+
+ /**
+ * Add a field object to this document.
+ *
+ * @param Zend_Search_Lucene_Field $field
+ */
+ public function addField(Zend_Search_Lucene_Field $field)
+ {
+ $this->_fields[$field->name] = $field;
+ }
+
+
+ /**
+ * Return an array with the names of the fields in this document.
+ *
+ * @return array
+ */
+ public function getFieldNames()
+ {
+ return array_keys($this->_fields);
+ }
+
+
+ /**
+ * Returns Zend_Search_Lucene_Field object for a named field in this document.
+ *
+ * @param string $fieldName
+ * @return Zend_Search_Lucene_Field
+ */
+ public function getField($fieldName)
+ {
+ if (!array_key_exists($fieldName, $this->_fields)) {
+ throw new Zend_Search_Lucene_Exception("Field name \"$fieldName\" not found in document.");
+ }
+ return $this->_fields[$fieldName];
+ }
+
+
+ /**
+ * Returns the string value of a named field in this document.
+ *
+ * @see __get()
+ * @return string
+ */
+ public function getFieldValue($fieldName)
+ {
+ return $this->getField($fieldName)->stringValue;
+ }
+
+}
32 search/Zend/Search/Lucene/EncodingConverter.php
View
@@ -0,0 +1,32 @@
+<?php
+
+class EncodingConverter {
+ private $last_error,
+ $in_encoding,
+ $out_encoding;
+
+ function __construct($in_encoding, $out_encoding) {
+ $this->in_encoding = $in_encoding;
+ $this->out_encoding = $out_encoding;
+ } //constructor
+
+ function handleError($err, $msg) {
+ $this->last_error = $msg;
+ } //handleError
+
+ function convert($str) {
+ $this->last_error = FALSE;
+
+ set_error_handler(array(&$this, 'handleError'));
+ $ret = iconv($this->in_encoding, $this->out_encoding, $str);
+ restore_error_handler();
+
+ return $ret;
+ } //convert
+
+ function getLastError() {
+ return $this->last_error;
+ } //getLastError
+} //EncodingConverter
+
+?>
36 search/Zend/Search/Lucene/Exception.php
View
@@ -0,0 +1,36 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/**
+ * Framework base exception
+ */
+require_once 'Zend/Search/Exception.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Exception extends Zend_Search_Exception
+{}
+
161 search/Zend/Search/Lucene/Field.php
View
@@ -0,0 +1,161 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Document
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/**
+ * A field is a section of a Document. Each field has two parts,
+ * a name and a value. Values may be free text or they may be atomic
+ * keywords, which are not further processed. Such keywords may
+ * be used to represent dates, urls, etc. Fields are optionally
+ * stored in the index, so that they may be returned with hits
+ * on the document.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Document
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+//require_once('EncodingConverter.php');
+
+class Zend_Search_Lucene_Field
+{
+ public $kind;
+
+ public $name = 'body';
+ public $stringValue = null;
+ public $isStored = false;
+ public $isIndexed = true;
+ public $isTokenized = true;
+ public $isBinary = false;
+
+ public $storeTermVector = false;
+
+ public $boost = 1.0;
+
+ public function __construct($name, $stringValue, $isStored, $isIndexed, $isTokenized, $isBinary = false)
+ {
+ $this->name = $name;
+
+ if (!$isBinary) {
+ /*
+ $econv = new EncodingConverter(mb_detect_encoding($stringValue), 'ASCII//TRANSLIT');
+ $this->stringValue = $econv->convert($stringValue);
+
+ if ($econv->getLastError()) {
+ echo "Error: ".$econv->getLastError();
+ echo "<br>";
+ echo "x".$stringValue."x";
+ exit();
+ } else {
+ }*/
+
+ /**
+ * @todo Correct UTF-8 string should be required in future
+ * Until full UTF-8 support is not completed, string should be normalized to ANSII encoding
+ */
+
+ $this->stringValue = iconv('ISO-8859-1', 'ASCII//TRANSLIT', $stringValue);
+ //$this->stringValue = iconv(mb_detect_encoding($stringValue), 'ASCII//TRANSLIT', $stringValue);
+ } else {
+ $this->stringValue = $stringValue;
+ }
+ $this->isStored = $isStored;
+ $this->isIndexed = $isIndexed;
+ $this->isTokenized = $isTokenized;
+ $this->isBinary = $isBinary;
+
+ $this->storeTermVector = false;
+ $this->boost = 1.0;
+ }
+
+
+ /**
+ * Constructs a String-valued Field that is not tokenized, but is indexed
+ * and stored. Useful for non-text fields, e.g. date or url.
+ *
+ * @param string $name
+ * @param string $value
+ * @return Zend_Search_Lucene_Field
+ */
+ static public function Keyword($name, $value)
+ {
+ return new self($name, $value, true, true, false);
+ }
+
+
+ /**
+ * Constructs a String-valued Field that is not tokenized nor indexed,
+ * but is stored in the index, for return with hits.
+ *
+ * @param string $name
+ * @param string $value
+ * @return Zend_Search_Lucene_Field
+ */
+ static public function UnIndexed($name, $value)
+ {
+ return new self($name, $value, true, false, false);
+ }
+
+
+ /**
+ * Constructs a Binary String valued Field that is not tokenized nor indexed,
+ * but is stored in the index, for return with hits.
+ *
+ * @param string $name
+ * @param string $value
+ * @return Zend_Search_Lucene_Field
+ */
+ static public function Binary($name, $value)
+ {
+ return new self($name, $value, true, false, false, true);
+ }
+
+ /**
+ * Constructs a String-valued Field that is tokenized and indexed,
+ * and is stored in the index, for return with hits. Useful for short text
+ * fields, like "title" or "subject". Term vector will not be stored for this field.
+ *
+ * @param string $name
+ * @param string $value
+ * @return Zend_Search_Lucene_Field
+ */
+ static public function Text($name, $value)
+ {
+ return new self($name, $value, true, true, true);
+ }
+
+
+ /**
+ * Constructs a String-valued Field that is tokenized and indexed,
+ * but that is not stored in the index.
+ *
+ * @param string $name
+ * @param string $value
+ * @return Zend_Search_Lucene_Field
+ */
+ static public function UnStored($name, $value)
+ {
+ return new self($name, $value, false, true, true);
+ }
+
+}
+
45 search/Zend/Search/Lucene/Index/FieldInfo.php
View
@@ -0,0 +1,45 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Index_FieldInfo
+{
+ public $name;
+ public $isIndexed;
+ public $number;
+ public $storeTermVector;
+
+ public function __construct( $name, $isIndexed, $number, $storeTermVector )
+ {
+ $this->name = $name;
+ $this->isIndexed = $isIndexed;
+ $this->number = $number;
+ $this->storeTermVector = $storeTermVector;
+ }
+}
+
575 search/Zend/Search/Lucene/Index/SegmentInfo.php
View
@@ -0,0 +1,575 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Index_SegmentInfo
+{
+ /**
+ * Number of docs in a segment
+ *
+ * @var integer
+ */
+ private $_docCount;
+
+ /**
+ * Segment name
+ *
+ * @var string
+ */
+ private $_name;
+
+ /**
+ * Term Dictionary Index
+ * Array of the Zend_Search_Lucene_Index_Term objects
+ * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
+ *
+ * @var array
+ */
+ private $_termDictionary;
+
+ /**
+ * Term Dictionary Index TermInfos
+ * Array of the Zend_Search_Lucene_Index_TermInfo objects
+ *
+ * @var array
+ */
+ private $_termDictionaryInfos;
+
+ /**
+ * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
+ *
+ * @var array
+ */
+ private $_fields;
+
+ /**
+ * Field positions in a dictionary.
+ * (Term dictionary contains filelds ordered by names)
+ *
+ * @var array
+ */
+ private $_fieldsDicPositions;
+
+
+ /**
+ * Associative array where the key is the file name and the value is data offset
+ * in a compound segment file (.csf).
+ *
+ * @var array
+ */
+ private $_segFiles;
+
+ /**
+ * File system adapter.
+ *
+ * @var Zend_Search_Lucene_Storage_Directory_Filesystem
+ */
+ private $_directory;
+
+ /**
+ * Normalization factors.
+ * An array fieldName => normVector
+ * normVector is a binary string.
+ * Each byte corresponds to an indexed document in a segment and
+ * encodes normalization factor (float value, encoded by
+ * Zend_Search_Lucene_Search_Similarity::encodeNorm())
+ *
+ * @var array
+ */
+ private $_norms = array();
+
+ /**
+ * List of deleted documents.
+ * bitset if bitset extension is loaded or array otherwise.
+ *
+ * @var mixed
+ */
+ private $_deleted;
+
+ /**
+ * $this->_deleted update flag
+ *
+ * @var boolean
+ */
+ private $_deletedDirty = false;
+
+ /**
+ * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,
+ * Documents count and Directory as a parameter.
+ *
+ * @param string $name
+ * @param integer $docCount
+ * @param Zend_Search_Lucene_Storage_Directory $directory
+ */
+ public function __construct($name, $docCount, $directory)
+ {
+ $this->_name = $name;
+ $this->_docCount = $docCount;
+ $this->_directory = $directory;
+ $this->_termDictionary = null;
+
+ $this->_segFiles = array();
+ if ($this->_directory->fileExists($name . '.cfs')) {
+ $cfsFile = $this->_directory->getFileObject($name . '.cfs');
+ $segFilesCount = $cfsFile->readVInt();
+
+ for ($count = 0; $count < $segFilesCount; $count++) {
+ $dataOffset = $cfsFile->readLong();
+ $fileName = $cfsFile->readString();
+ $this->_segFiles[$fileName] = $dataOffset;
+ }
+ }
+
+ $fnmFile = $this->openCompoundFile('.fnm');
+ $fieldsCount = $fnmFile->readVInt();
+ $fieldNames = array();
+ $fieldNums = array();
+ $this->_fields = array();
+ for ($count=0; $count < $fieldsCount; $count++) {
+ $fieldName = $fnmFile->readString();
+ $fieldBits = $fnmFile->readByte();
+ $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
+ $fieldBits & 1,
+ $count,
+ $fieldBits & 2 );
+ if ($fieldBits & 0x10) {
+ // norms are omitted for the indexed field
+ $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
+ }
+
+ $fieldNums[$count] = $count;
+ $fieldNames[$count] = $fieldName;
+ }
+ array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
+ $this->_fieldsDicPositions = array_flip($fieldNums);
+
+ try {
+ $delFile = $this->openCompoundFile('.del');
+
+ $byteCount = $delFile->readInt();
+ $byteCount = ceil($byteCount/8);
+ $bitCount = $delFile->readInt();
+
+ if ($bitCount == 0) {
+ $delBytes = '';
+ } else {
+ $delBytes = $delFile->readBytes($byteCount);
+ }
+
+ if (extension_loaded('bitset')) {
+ $this->_deleted = $delBytes;
+ } else {
+ $this->_deleted = array();
+ for ($count = 0; $count < $byteCount; $count++) {
+ $byte = ord($delBytes{$count});
+ for ($bit = 0; $bit < 8; $bit++) {
+ if ($byte & (1<<$bit)) {
+ $this->_deleted[$count*8 + $bit] = 1;
+ }
+ }
+ }
+
+ }
+ } catch(Zend_Search_Exception $e) {
+ if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) {
+ $this->_deleted = null;
+ } else {
+ throw $e;
+ }
+ }
+ }
+
+ /**
+ * Opens index file stoted within compound index file
+ *
+ * @param string $extension
+ * @throws Zend_Search_Lucene_Exception
+ * @return Zend_Search_Lucene_Storage_File
+ */
+ public function openCompoundFile($extension)
+ {
+ $filename = $this->_name . $extension;
+
+ // Try to open common file first
+ if ($this->_directory->fileExists($filename)) {
+ return $this->_directory->getFileObject($filename);
+ }
+
+ if( !isset($this->_segFiles[$filename]) ) {
+ throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
+ . $filename . ' file.' );
+ }
+
+ $file = $this->_directory->getFileObject( $this->_name.".cfs" );
+ $file->seek($this->_segFiles[$filename]);
+ return $file;
+ }
+
+ /**
+ * Returns field index or -1 if field is not found
+ *
+ * @param string $fieldName
+ * @return integer
+ */
+ public function getFieldNum($fieldName)
+ {
+ foreach( $this->_fields as $field ) {
+ if( $field->name == $fieldName ) {
+ return $field->number;
+ }
+ }
+
+ return -1;
+ }
+
+ /**
+ * Returns field info for specified field
+ *
+ * @param integer $fieldNum
+ * @return ZSearchFieldInfo
+ */
+ public function getField($fieldNum)
+ {
+ return $this->_fields[$fieldNum];
+ }
+
+ /**
+ * Returns array of fields.
+ * if $indexed parameter is true, then returns only indexed fields.
+ *
+ * @param boolean $indexed
+ * @return array
+ */
+ public function getFields($indexed = false)
+ {
+ $result = array();
+ foreach( $this->_fields as $field ) {
+ if( (!$indexed) || $field->isIndexed ) {
+ $result[ $field->name ] = $field->name;
+ }
+ }
+ return $result;
+ }
+
+ /**
+ * Returns the total number of documents in this segment.
+ *
+ * @return integer
+ */
+ public function count()
+ {
+ return $this->_docCount;
+ }
+
+ /**
+ * Get field position in a fields dictionary
+ *
+ * @param integer $fieldNum
+ * @return integer
+ */
+ private function _getFieldPosition($fieldNum) {
+ // Treat values which are not in a translation table as a 'direct value'
+ return isset($this->_fieldsDicPositions[$fieldNum]) ?
+ $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
+ }
+
+ /**
+ * Loads Term dictionary from TermInfoIndex file
+ */
+ protected function _loadDictionary()
+ {
+ if ($this->_termDictionary !== null) {
+ return;
+ }
+
+ $this->_termDictionary = array();
+ $this->_termDictionaryInfos = array();
+
+ $tiiFile = $this->openCompoundFile('.tii');
+ $tiVersion = $tiiFile->readInt();
+ if ($tiVersion != (int)0xFFFFFFFE) {
+ throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
+ }
+
+ $indexTermCount = $tiiFile->readLong();
+ $tiiFile->readInt(); // IndexInterval
+ $skipInterval = $tiiFile->readInt();
+
+ $prevTerm = '';
+ $freqPointer = 0;
+ $proxPointer = 0;
+ $indexPointer = 0;
+ for ($count = 0; $count < $indexTermCount; $count++) {
+ $termPrefixLength = $tiiFile->readVInt();
+ $termSuffix = $tiiFile->readString();
+ $termValue = substr( $prevTerm, 0, $termPrefixLength ) . $termSuffix;
+
+ $termFieldNum = $tiiFile->readVInt();
+ $docFreq = $tiiFile->readVInt();
+ $freqPointer += $tiiFile->readVInt();
+ $proxPointer += $tiiFile->readVInt();
+ if( $docFreq >= $skipInterval ) {
+ $skipDelta = $tiiFile->readVInt();
+ } else {
+ $skipDelta = 0;
+ }
+
+ $indexPointer += $tiiFile->readVInt();
+
+ $this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue,$termFieldNum);
+ $this->_termDictionaryInfos[] =
+ new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
+ $prevTerm = $termValue;
+ }
+ }
+
+
+ /**
+ * Return segment name
+ *
+ * @return string
+ */
+ public function getName()
+ {
+ return $this->_name;
+ }
+
+
+ /**
+ * Scans terms dictionary and returns term info
+ *
+ * @param Zend_Search_Lucene_Index_Term $term
+ * @return Zend_Search_Lucene_Index_TermInfo
+ */
+ public function getTermInfo($term)
+ {
+ $this->_loadDictionary();
+
+ $searchField = $this->getFieldNum($term->field);
+
+ if ($searchField == -1) {
+ return null;
+ }
+ $searchDicField = $this->_getFieldPosition($searchField);
+
+ // search for appropriate value in dictionary
+ $lowIndex = 0;
+ $highIndex = count($this->_termDictionary)-1;
+ while ($highIndex >= $lowIndex) {
+ // $mid = ($highIndex - $lowIndex)/2;
+ $mid = ($highIndex + $lowIndex) >> 1;
+ $midTerm = $this->_termDictionary[$mid];
+
+ $fieldNum = $this->_getFieldPosition($midTerm->field);
+ $delta = $searchDicField - $fieldNum;
+ if ($delta == 0) {
+ $delta = strcmp($term->text, $midTerm->text);
+ }
+
+ if ($delta < 0) {
+ $highIndex = $mid-1;
+ } elseif ($delta > 0) {
+ $lowIndex = $mid+1;
+ } else {
+ return $this->_termDictionaryInfos[$mid]; // We got it!
+ }
+ }
+
+ if ($highIndex == -1) {
+ // Term is out of the dictionary range
+ return null;
+ }
+
+ $prevPosition = $highIndex;
+ $prevTerm<