Permalink
Browse files

Comments, update logic improved, rest of module add/delete functions …

…added.
  • Loading branch information...
mchampan
mchampan committed Aug 21, 2006
1 parent 0513f3b commit 791a4cece1078dc5c48f9d9c69a00ba93f8fad0d
View
@@ -1,3 +1,25 @@
+2006/08/21
+----------
+Fixed index document count, and created new config variable to store
+the size. (Search now has 3 global vars in $CFG, date, size and complete,
+see indexer.php for var names). Index size is cached to provide an always
+current value for the index - this is to take into account the fact that
+deleted documents are in fact not removed from the index, but instead just
+marked as deleted and not returned in search results. The actual document
+still features in the index, and skews sizes. When the index optimiser is
+completed in ZFS, then these deleted documents will be pruned, thus
+correctly modifying the index size.
+
+Additional commenting added.
+
+Query page logic very slightly modified to clean up GET string a bit (removed
+'p' variable).
+
+Add/delete functions added to other document types.
+
+A few TODO fields added to source, indicating changes still to come (or at
+least to be considered).
+
2006/08/16
----------
Add/delete/update cron functions finished - can be called seperately
View
@@ -22,11 +22,18 @@
$dbcontrol = new IndexDBControl();
$addition_count = 0;
+ $indexdate = $CFG->search_indexer_run_date;
+
mtrace('<pre>Starting index update (additions)...');
- mtrace('Index size before: '.$index->count()."\n");
+ mtrace('Index size before: '.$CFG->search_index_size."\n");
+ //get all modules
if ($mods = get_records_select('modules')) {
+ //append virtual modules onto array
+ $mods = array_merge($mods, search_get_additional_modules());
+
foreach ($mods as $mod) {
+ //build include file and function names
$class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
$db_names_function = $mod->name.'_db_names';
$get_document_function = $mod->name.'_single_document';
@@ -35,22 +42,29 @@
if (file_exists($class_file)) {
require_once($class_file);
+ //if both required functions exist
if (function_exists($db_names_function) and function_exists($get_document_function)) {
mtrace("Checking $mod->name module for additions.");
$values = $db_names_function();
+ $where = (isset($values[4])) ? $values[4] : '';
- $sql = "select id, ".$values[0]." as docid from ".$values[1]."
- where id not in
- (select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')";
+ //select records in MODULE table, but not in SEARCH_DATABASE_TABLE
+ $sql = "select id, ".$values[0]." as docid from ".$values[1].
+ " where id not in".
+ " (select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')".
+ " and ".$values[2]." > $indexdate".
+ " $where";
$records = get_records_sql($sql);
+ //foreach record, build a module specific search document using the get_document function
if (is_array($records)) {
foreach($records as $record) {
$additions[] = $get_document_function($record->id);
} //foreach
} //if
+ //foreach document, add it to the index and database table
foreach ($additions as $add) {
++$addition_count;
@@ -74,9 +88,11 @@
//commit changes
$index->commit();
- //update index date
+ //update index date and size
set_config("search_indexer_run_date", time());
+ set_config("search_index_size", (int)$CFG->search_index_size + (int)$addition_count);
+ //print some additional info
mtrace("Added $addition_count documents.");
mtrace('Index size after: '.$index->count().'</pre>');
View
@@ -1,5 +1,17 @@
<?php
+ /* cron script to perform all the periodic search tasks
+ *
+ * delete.php
+ * updates the index by pruning deleted documents
+ *
+ * update.php
+ * updates document info in the index if the document has been modified since indexing
+ *
+ * add.php
+ * adds documents created since the last index run
+ */
+
require_once('../config.php');
require_once("$CFG->dirroot/search/lib.php");
View
@@ -23,10 +23,13 @@
$deletion_count = 0;
mtrace('<pre>Starting clean-up of removed records...');
- mtrace('Index size before: '.$index->count()."\n");
+ mtrace('Index size before: '.$CFG->search_index_size."\n");
if ($mods = get_records_select('modules')) {
+ $mods = array_merge($mods, search_get_additional_modules());
+
foreach ($mods as $mod) {
+ //build function names
$class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
$delete_function = $mod->name.'_delete';
$db_names_function = $mod->name.'_db_names';
@@ -39,27 +42,30 @@
mtrace("Checking $mod->name module for deletions.");
$values = $db_names_function();
- $sql = "select id, docid from ".SEARCH_DATABASE_TABLE."
- where doctype like '$mod->name'
- and docid not in
- (select ".$values[0]." from ".$values[1].")";
+ $sql = "select id, docid from ".SEARCH_DATABASE_TABLE.
+ " where doctype like '$mod->name'".
+ " and docid not in".
+ " (select ".$values[0]." from ".$values[1].")";
$records = get_records_sql($sql);
+ //build an array of all the deleted records
if (is_array($records)) {
foreach($records as $record) {
$deletions[] = $delete_function($record->docid);
} //foreach
} //if
foreach ($deletions as $delete) {
+ //find the specific document in the index, using it's docid and doctype as keys
$doc = $index->find("+docid:$delete +doctype:$mod->name");
//get the record, should only be one
foreach ($doc as $thisdoc) {
++$deletion_count;
mtrace(" Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
+ //remove it from index and database table
$dbcontrol->delDocument($thisdoc);
$index->delete($thisdoc->id);
} //foreach
@@ -74,8 +80,9 @@
//commit changes
$index->commit();
- //update index date
+ //update index date and index size
set_config("search_indexer_run_date", time());
+ set_config("search_index_size", (int)$CFG->search_index_size - (int)$deletion_count);
mtrace("Finished $deletion_count removals.");
mtrace('Index size after: '.$index->count().'</pre>');
@@ -12,6 +12,7 @@ public function __construct(&$doc, &$data, $document_type, $course_id, $group_id
$this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url));
$this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date));
+ //additional data added on a per-module basis
$this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data)));
$this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $document_type));
@@ -68,6 +68,30 @@ function forum_get_content_for_index(&$forum) {
return $documents;
} //forum_get_content_for_index
+ //returns a single forum search document based on a forum_entry id
+ function forum_single_document($id) {
+ $posts = get_recordset('forum_posts', 'id', $id);
+ $post = $posts->fields;
+
+ $discussions = get_recordset('forum_discussions', 'id', $post['discussion']);
+ $discussion = $discussions->fields;
+
+ $forums = get_recordset('forum', 'id', $discussion['forum']);
+ $forum = $forums->fields;
+
+ return new ForumSearchDocument($post, $forum['id'], $forum['course'], $post['groupid']);
+ } //forum_single_document
+
+ function forum_delete($info) {
+ return $info;
+ } //forum_delete
+
+ //returns the var names needed to build a sql query for addition/deletions
+ function forum_db_names() {
+ //[primary id], [table name], [time created field name], [time modified field name]
+ return array('id', 'forum_posts', 'created', 'modified');
+ } //forum_db_names
+
//reworked faster version from /mod/forum/lib.php
function forum_get_discussions_fast($forum) {
global $CFG, $USER;
@@ -7,7 +7,6 @@
* */
require_once("$CFG->dirroot/search/documents/document.php");
- //require_once("$CFG->dirroot/mod/glossary/lib.php");
class GlossarySearchDocument extends SearchDocument {
public function __construct(&$entry, $glossary_id, $course_id, $group_id) {
@@ -63,6 +62,7 @@ function glossary_get_content_for_index(&$glossary) {
return $documents;
} //glossary_get_content_for_index
+ //returns a single glossary search document based on a glossary_entry id
function glossary_single_document($id) {
$entries = get_recordset('glossary_entries', 'id', $id);
$entry = $entries->fields;
@@ -73,12 +73,16 @@ function glossary_single_document($id) {
return new GlossarySearchDocument($entry, $entry['glossaryid'], $glossary['course'], -1);
} //glossary_single_document
+ //dummy delete function that converts docid from the search table to itself..
+ //this was here for a reason, but I can't remember it at the moment.
function glossary_delete($info) {
return $info;
} //glossary_delete
+ //returns the var names needed to build a sql query for addition/deletions
function glossary_db_names() {
- return array('id', 'glossary_entries', 'timemodified');
+ //[primary id], [table name], [time created field name], [time modified field name]
+ return array('id', 'glossary_entries', 'timecreated', 'timemodified');
} //glossary_db_names
?>
@@ -58,4 +58,29 @@ function resource_get_content_for_index(&$notneeded) {
return $documents;
} //resource_get_content_for_index
+ //returns a single resource search document based on a resource_entry id
+ function resource_single_document($id) {
+ $resources = get_recordset_sql('SELECT *
+ FROM `resource`
+ WHERE alltext NOT LIKE ""
+ AND alltext NOT LIKE " "
+ AND alltext NOT LIKE "&nbsp;"
+ AND TYPE != "file",
+ AND id = '.$id);
+
+ $resource = $resources->fields;
+
+ return new ResourceSearchDocument($resource);
+ } //resource_single_document
+
+ function resource_delete($info) {
+ return $info;
+ } //resource_delete
+
+ //returns the var names needed to build a sql query for addition/deletions
+ function resource_db_names() {
+ //[primary id], [table name], [time created field name], [time modified field name], [additional where conditions for sql]
+ return array('id', 'resource', 'timemodified', 'timemodified', "WHERE alltext NOT LIKE '' AND alltext NOT LIKE ' ' AND alltext NOT LIKE '&nbsp;' AND TYPE != 'file'");
+ } //resource_db_names
+
?>
@@ -134,4 +134,25 @@ function wiki_get_content_for_index(&$wiki) {
return $documents;
} //wiki_get_content_for_index
+ //returns a single wiki search document based on a wiki_entry id
+ function wiki_single_document($id) {
+ $pages = get_recordset('wiki_pages', 'id', $id);
+ $page = $pages->fields;
+
+ $entries = get_recordset('wiki_entries', 'id', $page['wiki']);
+ $entry = $entries->fields;
+
+ return new WikiSearchDocument($page, $entry['wikiid'], $entry['course'], $entry['groupid']);
+ } //wiki_single_document
+
+ function wiki_delete($info) {
+ return $info;
+ } //wiki_delete
+
+ //returns the var names needed to build a sql query for addition/deletions
+ function wiki_db_names() {
+ //[primary id], [table name], [time created field name], [time modified field name]
+ return array('id', 'wiki_pages', 'created', 'lastmodified');
+ } //wiki_db_names
+
?>
View
@@ -92,15 +92,17 @@
// * mod_get_content_for_index
//are the sole basis for including a module in the index at the moment.
- if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
- $mods = array_merge($mods, search_get_additional_modules());
+ if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
+ //add virtual modules onto the back of the array
+ $mods = array_merge($mods, search_get_additional_modules());
- foreach ($mods as $mod) {
+ foreach ($mods as $mod) {
$class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
if (file_exists($class_file)) {
include_once($class_file);
+ //build function names
$iter_function = $mod->name.'_iterator';
$index_function = $mod->name.'_get_content_for_index';
@@ -163,5 +165,8 @@
//mark the time we last updated
set_config("search_indexer_run_date", time());
-
+
+ //and the index size
+ set_config("search_index_size", (int)$index->count());
+
?>
Oops, something went wrong.

0 comments on commit 791a4ce

Please sign in to comment.