Navigation Menu

Skip to content

Commit

Permalink
Comments, update logic improved, rest of module add/delete functions …
Browse files Browse the repository at this point in the history
…added.
  • Loading branch information
mchampan committed Aug 21, 2006
1 parent 0513f3b commit 791a4ce
Show file tree
Hide file tree
Showing 15 changed files with 217 additions and 33 deletions.
22 changes: 22 additions & 0 deletions search/README.txt
@@ -1,3 +1,25 @@
2006/08/21
----------
Fixed index document count, and created new config variable to store
the size. (Search now has 3 global vars in $CFG, date, size and complete,
see indexer.php for var names). Index size is cached to provide an always
current value for the index - this is to take into account the fact that
deleted documents are in fact not removed from the index, but instead just
marked as deleted and not returned in search results. The actual document
still features in the index, and skews sizes. When the index optimiser is
completed in ZFS, then these deleted documents will be pruned, thus
correctly modifying the index size.

Additional commenting added.

Query page logic very slightly modified to clean up GET string a bit (removed
'p' variable).

Add/delete functions added to other document types.

A few TODO fields added to source, indicating changes still to come (or at
least to be considered).

2006/08/16
----------
Add/delete/update cron functions finished - can be called seperately
Expand Down
26 changes: 21 additions & 5 deletions search/add.php
Expand Up @@ -22,11 +22,18 @@
$dbcontrol = new IndexDBControl();
$addition_count = 0;

$indexdate = $CFG->search_indexer_run_date;

mtrace('<pre>Starting index update (additions)...');
mtrace('Index size before: '.$index->count()."\n");
mtrace('Index size before: '.$CFG->search_index_size."\n");

//get all modules
if ($mods = get_records_select('modules')) {
//append virtual modules onto array
$mods = array_merge($mods, search_get_additional_modules());

foreach ($mods as $mod) {
//build include file and function names
$class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
$db_names_function = $mod->name.'_db_names';
$get_document_function = $mod->name.'_single_document';
Expand All @@ -35,22 +42,29 @@
if (file_exists($class_file)) {
require_once($class_file);

//if both required functions exist
if (function_exists($db_names_function) and function_exists($get_document_function)) {
mtrace("Checking $mod->name module for additions.");
$values = $db_names_function();
$where = (isset($values[4])) ? $values[4] : '';

$sql = "select id, ".$values[0]." as docid from ".$values[1]."
where id not in
(select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')";
//select records in MODULE table, but not in SEARCH_DATABASE_TABLE
$sql = "select id, ".$values[0]." as docid from ".$values[1].
" where id not in".
" (select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')".
" and ".$values[2]." > $indexdate".
" $where";

$records = get_records_sql($sql);

//foreach record, build a module specific search document using the get_document function
if (is_array($records)) {
foreach($records as $record) {
$additions[] = $get_document_function($record->id);
} //foreach
} //if

//foreach document, add it to the index and database table
foreach ($additions as $add) {
++$addition_count;

Expand All @@ -74,9 +88,11 @@
//commit changes
$index->commit();

//update index date
//update index date and size
set_config("search_indexer_run_date", time());
set_config("search_index_size", (int)$CFG->search_index_size + (int)$addition_count);

//print some additional info
mtrace("Added $addition_count documents.");
mtrace('Index size after: '.$index->count().'</pre>');

Expand Down
12 changes: 12 additions & 0 deletions search/cron.php
@@ -1,5 +1,17 @@
<?php

/* cron script to perform all the periodic search tasks
*
* delete.php
* updates the index by pruning deleted documents
*
* update.php
* updates document info in the index if the document has been modified since indexing
*
* add.php
* adds documents created since the last index run
*/

require_once('../config.php');
require_once("$CFG->dirroot/search/lib.php");

Expand Down
19 changes: 13 additions & 6 deletions search/delete.php
Expand Up @@ -23,10 +23,13 @@
$deletion_count = 0;

mtrace('<pre>Starting clean-up of removed records...');
mtrace('Index size before: '.$index->count()."\n");
mtrace('Index size before: '.$CFG->search_index_size."\n");

if ($mods = get_records_select('modules')) {
$mods = array_merge($mods, search_get_additional_modules());

foreach ($mods as $mod) {
//build function names
$class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
$delete_function = $mod->name.'_delete';
$db_names_function = $mod->name.'_db_names';
Expand All @@ -39,27 +42,30 @@
mtrace("Checking $mod->name module for deletions.");
$values = $db_names_function();

$sql = "select id, docid from ".SEARCH_DATABASE_TABLE."
where doctype like '$mod->name'
and docid not in
(select ".$values[0]." from ".$values[1].")";
$sql = "select id, docid from ".SEARCH_DATABASE_TABLE.
" where doctype like '$mod->name'".
" and docid not in".
" (select ".$values[0]." from ".$values[1].")";

$records = get_records_sql($sql);

//build an array of all the deleted records
if (is_array($records)) {
foreach($records as $record) {
$deletions[] = $delete_function($record->docid);
} //foreach
} //if

foreach ($deletions as $delete) {
//find the specific document in the index, using it's docid and doctype as keys
$doc = $index->find("+docid:$delete +doctype:$mod->name");

//get the record, should only be one
foreach ($doc as $thisdoc) {
++$deletion_count;
mtrace(" Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");

//remove it from index and database table
$dbcontrol->delDocument($thisdoc);
$index->delete($thisdoc->id);
} //foreach
Expand All @@ -74,8 +80,9 @@
//commit changes
$index->commit();

//update index date
//update index date and index size
set_config("search_indexer_run_date", time());
set_config("search_index_size", (int)$CFG->search_index_size - (int)$deletion_count);

mtrace("Finished $deletion_count removals.");
mtrace('Index size after: '.$index->count().'</pre>');
Expand Down
1 change: 1 addition & 0 deletions search/documents/document.php
Expand Up @@ -12,6 +12,7 @@ public function __construct(&$doc, &$data, $document_type, $course_id, $group_id
$this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url));
$this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date));

//additional data added on a per-module basis
$this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data)));

$this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $document_type));
Expand Down
24 changes: 24 additions & 0 deletions search/documents/forum_document.php
Expand Up @@ -68,6 +68,30 @@ function forum_get_content_for_index(&$forum) {
return $documents;
} //forum_get_content_for_index

//returns a single forum search document based on a forum_entry id
function forum_single_document($id) {
$posts = get_recordset('forum_posts', 'id', $id);
$post = $posts->fields;

$discussions = get_recordset('forum_discussions', 'id', $post['discussion']);
$discussion = $discussions->fields;

$forums = get_recordset('forum', 'id', $discussion['forum']);
$forum = $forums->fields;

return new ForumSearchDocument($post, $forum['id'], $forum['course'], $post['groupid']);
} //forum_single_document

function forum_delete($info) {
return $info;
} //forum_delete

//returns the var names needed to build a sql query for addition/deletions
function forum_db_names() {
//[primary id], [table name], [time created field name], [time modified field name]
return array('id', 'forum_posts', 'created', 'modified');
} //forum_db_names

//reworked faster version from /mod/forum/lib.php
function forum_get_discussions_fast($forum) {
global $CFG, $USER;
Expand Down
8 changes: 6 additions & 2 deletions search/documents/glossary_document.php
Expand Up @@ -7,7 +7,6 @@
* */

require_once("$CFG->dirroot/search/documents/document.php");
//require_once("$CFG->dirroot/mod/glossary/lib.php");

class GlossarySearchDocument extends SearchDocument {
public function __construct(&$entry, $glossary_id, $course_id, $group_id) {
Expand Down Expand Up @@ -63,6 +62,7 @@ function glossary_get_content_for_index(&$glossary) {
return $documents;
} //glossary_get_content_for_index

//returns a single glossary search document based on a glossary_entry id
function glossary_single_document($id) {
$entries = get_recordset('glossary_entries', 'id', $id);
$entry = $entries->fields;
Expand All @@ -73,12 +73,16 @@ function glossary_single_document($id) {
return new GlossarySearchDocument($entry, $entry['glossaryid'], $glossary['course'], -1);
} //glossary_single_document

//dummy delete function that converts docid from the search table to itself..
//this was here for a reason, but I can't remember it at the moment.
function glossary_delete($info) {
return $info;
} //glossary_delete

//returns the var names needed to build a sql query for addition/deletions
function glossary_db_names() {
return array('id', 'glossary_entries', 'timemodified');
//[primary id], [table name], [time created field name], [time modified field name]
return array('id', 'glossary_entries', 'timecreated', 'timemodified');
} //glossary_db_names

?>
25 changes: 25 additions & 0 deletions search/documents/resource_document.php
Expand Up @@ -58,4 +58,29 @@ function resource_get_content_for_index(&$notneeded) {
return $documents;
} //resource_get_content_for_index

//returns a single resource search document based on a resource_entry id
function resource_single_document($id) {
$resources = get_recordset_sql('SELECT *
FROM `resource`
WHERE alltext NOT LIKE ""
AND alltext NOT LIKE " "
AND alltext NOT LIKE "&nbsp;"
AND TYPE != "file",
AND id = '.$id);

$resource = $resources->fields;

return new ResourceSearchDocument($resource);
} //resource_single_document

function resource_delete($info) {
return $info;
} //resource_delete

//returns the var names needed to build a sql query for addition/deletions
function resource_db_names() {
//[primary id], [table name], [time created field name], [time modified field name], [additional where conditions for sql]
return array('id', 'resource', 'timemodified', 'timemodified', "WHERE alltext NOT LIKE '' AND alltext NOT LIKE ' ' AND alltext NOT LIKE '&nbsp;' AND TYPE != 'file'");
} //resource_db_names

?>
21 changes: 21 additions & 0 deletions search/documents/wiki_document.php
Expand Up @@ -134,4 +134,25 @@ function wiki_get_content_for_index(&$wiki) {
return $documents;
} //wiki_get_content_for_index

//returns a single wiki search document based on a wiki_entry id
function wiki_single_document($id) {
$pages = get_recordset('wiki_pages', 'id', $id);
$page = $pages->fields;

$entries = get_recordset('wiki_entries', 'id', $page['wiki']);
$entry = $entries->fields;

return new WikiSearchDocument($page, $entry['wikiid'], $entry['course'], $entry['groupid']);
} //wiki_single_document

function wiki_delete($info) {
return $info;
} //wiki_delete

//returns the var names needed to build a sql query for addition/deletions
function wiki_db_names() {
//[primary id], [table name], [time created field name], [time modified field name]
return array('id', 'wiki_pages', 'created', 'lastmodified');
} //wiki_db_names

?>
13 changes: 9 additions & 4 deletions search/indexer.php
Expand Up @@ -92,15 +92,17 @@
// * mod_get_content_for_index
//are the sole basis for including a module in the index at the moment.

if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
$mods = array_merge($mods, search_get_additional_modules());
if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
//add virtual modules onto the back of the array
$mods = array_merge($mods, search_get_additional_modules());

foreach ($mods as $mod) {
foreach ($mods as $mod) {
$class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';

if (file_exists($class_file)) {
include_once($class_file);

//build function names
$iter_function = $mod->name.'_iterator';
$index_function = $mod->name.'_get_content_for_index';

Expand Down Expand Up @@ -163,5 +165,8 @@

//mark the time we last updated
set_config("search_indexer_run_date", time());


//and the index size
set_config("search_index_size", (int)$index->count());

?>

0 comments on commit 791a4ce

Please sign in to comment.