Skip to content

Commit

Permalink
MDL-55356 core_search: New manager API index_context
Browse files Browse the repository at this point in the history
New function which uses the get_document_recordset function in
search areas to support full and partial indexing of a given
context.
  • Loading branch information
sammarshallou committed Oct 11, 2017
1 parent 66e3702 commit 4ba11aa
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 2 deletions.
3 changes: 3 additions & 0 deletions search/classes/base.php
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,9 @@ public function get_document_recordset($modifiedfrom = 0, \context $context = nu
* indexfiles => File indexing is enabled if true.
* lastindexedtime => The last time this area was indexed. 0 if never indexed.
*
* The lastindexedtime value is not set if indexing a specific context rather than the whole
* system.
*
* @param \stdClass $record A record containing, at least, the indexed document id and a modified timestamp
* @param array $options Options for document creation
* @return \core_search\document
Expand Down
2 changes: 1 addition & 1 deletion search/classes/engine.php
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ public function add_documents($iterator, $searcharea, $options) {
continue;
}

if ($options['lastindexedtime'] == 0) {
if (isset($options['lastindexedtime']) && $options['lastindexedtime'] == 0) {
// If we have never indexed this area before, it must be new.
$document->set_is_new(true);
}
Expand Down
146 changes: 145 additions & 1 deletion search/classes/manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ public function optimize_index() {
*
* @param bool $fullindex Whether we should reindex everything or not.
* @param float $timelimit Time limit in seconds (0 = no time limit)
* @param \progress_trace $progress Optional class for tracking progress
* @param \progress_trace|null $progress Optional class for tracking progress
* @throws \moodle_exception
* @return bool Whether there was any updated document or not.
*/
Expand Down Expand Up @@ -775,6 +775,150 @@ public function index($fullindex = false, $timelimit = 0, \progress_trace $progr
return (bool)$sumdocs;
}

/**
* Indexes or reindexes a specific context of the system, e.g. one course.
*
* The function returns an object with field 'complete' (true or false).
*
* This function supports partial indexing via the time limit parameter. If the time limit
* expires, it will return values for $startfromarea and $startfromtime which can be passed
* next time to continue indexing.
*
* @param \context $context Context to restrict index.
* @param string $singleareaid If specified, indexes only the given area.
* @param float $timelimit Time limit in seconds (0 = no time limit)
* @param \progress_trace|null $progress Optional class for tracking progress
* @param string $startfromarea Area to start from
* @param int $startfromtime Timestamp to start from
* @return \stdClass Object indicating success
*/
public function index_context($context, $singleareaid = '', $timelimit = 0,
\progress_trace $progress = null, $startfromarea = '', $startfromtime = 0) {
if (!$progress) {
$progress = new \null_progress_trace();
}

// Work out time to stop, if limited.
if ($timelimit) {
// Decide time to stop.
$stopat = microtime(true) + $timelimit;
}

// No PHP time limit.
\core_php_time_limit::raise();

// Notify the engine that an index starting.
$this->engine->index_starting(false);

$sumdocs = 0;

// Get all search areas, in consistent order.
$searchareas = $this->get_search_areas_list(true);
ksort($searchareas);

// Are we skipping past some that were handled previously?
$skipping = $startfromarea ? true : false;

foreach ($searchareas as $areaid => $searcharea) {
// If we're only processing one area id, skip all the others.
if ($singleareaid && $singleareaid !== $areaid) {
continue;
}

// If we're skipping to a later area, continue through the loop.
$referencestarttime = 0;
if ($skipping) {
if ($areaid !== $startfromarea) {
continue;
}
// Stop skipping and note the reference start time.
$skipping = false;
$referencestarttime = $startfromtime;
}

$progress->output('Processing area: ' . $searcharea->get_visible_name());

$elapsed = microtime(true);

// Get the recordset of all documents from the area for this context.
$recordset = $searcharea->get_document_recordset($referencestarttime, $context);
if (!$recordset) {
if ($recordset === null) {
$progress->output('Skipping (not relevant to context).', 1);
} else {
$progress->output('Skipping (does not support context indexing).', 1);
}
continue;
}

// Notify the engine that an area is starting.
$this->engine->area_index_starting($searcharea, false);

// Work out search options.
$options = [];
$options['indexfiles'] = $this->engine->file_indexing_enabled() &&
$searcharea->uses_file_indexing();
if ($timelimit) {
$options['stopat'] = $stopat;
}

// Construct iterator which will use get_document on the recordset results.
$iterator = new \core\dml\recordset_walk($recordset,
array($searcharea, 'get_document'), $options);

// Use this iterator to add documents.
$result = $this->engine->add_documents($iterator, $searcharea, $options);
if (count($result) === 5) {
list($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial) = $result;
} else {
// Backward compatibility for engines that don't support partial adding.
list($numrecords, $numdocs, $numdocsignored, $lastindexeddoc) = $result;
debugging('engine::add_documents() should return $partial (4-value return is deprecated)',
DEBUG_DEVELOPER);
$partial = false;
}

if ($numdocs > 0) {
$elapsed = round((microtime(true) - $elapsed), 3);
$progress->output('Processed ' . $numrecords . ' records containing ' . $numdocs .
' documents, in ' . $elapsed . ' seconds' .
($partial ? ' (not complete)' : '') . '.', 1);
} else {
$progress->output('No documents to index.', 1);
}

// Notify the engine this area is complete, but don't store any times as this is not
// part of the 'normal' search index.
if (!$this->engine->area_index_complete($searcharea, $numdocs, false)) {
$progress->output('Engine reported error.', 1);
}

if ($partial && $timelimit && (microtime(true) >= $stopat)) {
$progress->output('Stopping indexing due to time limit.');
break;
}
}

if ($sumdocs > 0) {
$event = \core\event\search_indexed::create(
array('context' => $context));
$event->trigger();
}

$this->engine->index_complete($sumdocs, false);

// Indicate in result whether we completed indexing, or only part of it.
$result = new \stdClass();
if ($partial) {
$result->complete = false;
$result->startfromarea = $areaid;
$result->startfromtime = $lastindexeddoc;
} else {
$result->complete = true;
}
return $result;
}

/**
* Resets areas config.
*
Expand Down
100 changes: 100 additions & 0 deletions search/tests/manager_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,106 @@ public function test_partial_indexing() {
$this->assertFalse(get_config($componentname, $varname . '_partial'));
}

/**
* Tests that indexing a specified context works correctly.
*/
public function test_context_indexing() {
global $USER;

$this->resetAfterTest();
$this->setAdminUser();

// Create a course and two forums and a page.
$generator = $this->getDataGenerator();
$course = $generator->create_course();
$now = time();
$forum1 = $generator->create_module('forum', ['course' => $course->id]);
$generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id,
'forum' => $forum1->id, 'userid' => $USER->id, 'timemodified' => $now,
'name' => 'Frog']);
$this->waitForSecond();
$generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id,
'forum' => $forum1->id, 'userid' => $USER->id, 'timemodified' => $now + 2,
'name' => 'Zombie']);
$forum2 = $generator->create_module('forum', ['course' => $course->id]);
$this->waitForSecond();
$generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id,
'forum' => $forum2->id, 'userid' => $USER->id, 'timemodified' => $now + 1,
'name' => 'Toad']);
$generator->create_module('page', ['course' => $course->id]);
$generator->create_module('forum', ['course' => $course->id]);

// Index forum 1 only.
$search = testable_core_search::instance();
$buffer = new progress_trace_buffer(new text_progress_trace(), false);
$result = $search->index_context(\context_module::instance($forum1->cmid), '', 0, $buffer);
$this->assertTrue($result->complete);
$log = $buffer->get_buffer();
$buffer->reset_buffer();

// Confirm that output only processed 1 forum activity and 2 posts.
var_dump(strpos($log, "area: Forum - activity information\n Processed 1 "));
$this->assertNotFalse(strpos($log, "area: Forum - activity information\n Processed 1 "));
$this->assertNotFalse(strpos($log, "area: Forum - posts\n Processed 2 "));

// Confirm that some areas for different types of context were skipped.
$this->assertNotFalse(strpos($log, "area: Users\n Skipping"));
$this->assertNotFalse(strpos($log, "area: My courses\n Skipping"));

// Confirm that another module area had no results.
$this->assertNotFalse(strpos($log, "area: Page\n No documents"));

// Index whole course.
$result = $search->index_context(\context_course::instance($course->id), '', 0, $buffer);
$this->assertTrue($result->complete);
$log = $buffer->get_buffer();
$buffer->reset_buffer();

// Confirm that output processed 3 forum activities and 3 posts.
$this->assertNotFalse(strpos($log, "area: Forum - activity information\n Processed 3 "));
$this->assertNotFalse(strpos($log, "area: Forum - posts\n Processed 3 "));

// The course area was also included this time.
$this->assertNotFalse(strpos($log, "area: My courses\n Processed 1 "));

// Confirm that another module area had results too.
$this->assertNotFalse(strpos($log, "area: Page\n Processed 1 "));

// Index whole course, but only forum posts.
$result = $search->index_context(\context_course::instance($course->id), 'mod_forum-post',
0, $buffer);
$this->assertTrue($result->complete);
$log = $buffer->get_buffer();
$buffer->reset_buffer();

// Confirm that output processed 3 posts but not forum activities.
$this->assertFalse(strpos($log, "area: Forum - activity information"));
$this->assertNotFalse(strpos($log, "area: Forum - posts\n Processed 3 "));

// Set time limit and retry index of whole course, taking 3 tries to complete it.
$search->get_engine()->set_add_delay(0.4);
$result = $search->index_context(\context_course::instance($course->id), '', 1, $buffer);
$log = $buffer->get_buffer();
$buffer->reset_buffer();
$this->assertFalse($result->complete);
$this->assertNotFalse(strpos($log, "area: Forum - activity information\n Processed 2 "));

$result = $search->index_context(\context_course::instance($course->id), '', 1, $buffer,
$result->startfromarea, $result->startfromtime);
$log = $buffer->get_buffer();
$buffer->reset_buffer();
$this->assertNotFalse(strpos($log, "area: Forum - activity information\n Processed 2 "));
$this->assertNotFalse(strpos($log, "area: Forum - posts\n Processed 2 "));
$this->assertFalse($result->complete);

$result = $search->index_context(\context_course::instance($course->id), '', 1, $buffer,
$result->startfromarea, $result->startfromtime);
$log = $buffer->get_buffer();
$buffer->reset_buffer();
$this->assertNotFalse(strpos($log, "area: Forum - posts\n Processed 2 "));
$this->assertTrue($result->complete);
}

/**
* Adding this test here as get_areas_user_accesses process is the same, results just depend on the context level.
*
Expand Down

0 comments on commit 4ba11aa

Please sign in to comment.