Skip to content
Permalink
Browse files

MDL-60981 core_search: Add get_contexts_to_reindex API

This new API returns a list of contexts for each search area. This
allows the areas to be reindexed in a sensible order (roughly
speaking, newest first) and also allows this to be controlled by
each area.

An implementation in the forum module means that forums are ordered
by the date of the most recent discussion, so that active forums
will be reindexed early even if they were created a long time ago.
  • Loading branch information...
sammarshallou committed Dec 7, 2017
1 parent 9993c1d commit 25564a784b9da372ad1147d638ff5d0990e53286
@@ -292,4 +292,17 @@ protected function get_discussion($discussionid) {
}
return $this->discussionsdata[$discussionid];
}
/**
* Changes the context ordering so that the forums with most recent discussions are indexed
* first.
*
* @return string[] SQL join and ORDER BY
*/
protected function get_contexts_to_reindex_extra_sql() {
return [
'JOIN {forum_discussions} fd ON fd.course = cm.course AND fd.forum = cm.instance',
'MAX(fd.timemodified) DESC'
];
}
}
@@ -390,4 +390,62 @@ public function test_attach_files() {
$recordset->close();
$this->assertEquals(3, $nrecords);
}
/**
* Tests that reindexing works in order starting from the forum with most recent discussion.
*/
public function test_posts_get_contexts_to_reindex() {
global $DB;
$generator = $this->getDataGenerator();
$adminuser = get_admin();
$course1 = $generator->create_course();
$course2 = $generator->create_course();
$time = time() - 1000;
// Create 3 forums (two in course 1, one in course 2 - doesn't make a difference).
$forum1 = $generator->create_module('forum', ['course' => $course1->id]);
$forum2 = $generator->create_module('forum', ['course' => $course1->id]);
$forum3 = $generator->create_module('forum', ['course' => $course2->id]);
$forum4 = $generator->create_module('forum', ['course' => $course2->id]);
// Hack added time for the course_modules entries. These should not be used (they would
// be used by the base class implementation). We are setting this so that the order would
// be 4, 3, 2, 1 if this ordering were used (newest first).
$DB->set_field('course_modules', 'added', $time + 100, ['id' => $forum1->cmid]);
$DB->set_field('course_modules', 'added', $time + 110, ['id' => $forum2->cmid]);
$DB->set_field('course_modules', 'added', $time + 120, ['id' => $forum3->cmid]);
$DB->set_field('course_modules', 'added', $time + 130, ['id' => $forum4->cmid]);
$forumgenerator = $generator->get_plugin_generator('mod_forum');
// Create one discussion in forums 1 and 3, three in forum 2, and none in forum 4.
$forumgenerator->create_discussion(['course' => $course1->id,
'forum' => $forum1->id, 'userid' => $adminuser->id, 'timemodified' => $time + 20]);
$forumgenerator->create_discussion(['course' => $course1->id,
'forum' => $forum2->id, 'userid' => $adminuser->id, 'timemodified' => $time + 10]);
$forumgenerator->create_discussion(['course' => $course1->id,
'forum' => $forum2->id, 'userid' => $adminuser->id, 'timemodified' => $time + 30]);
$forumgenerator->create_discussion(['course' => $course1->id,
'forum' => $forum2->id, 'userid' => $adminuser->id, 'timemodified' => $time + 11]);
$forumgenerator->create_discussion(['course' => $course2->id,
'forum' => $forum3->id, 'userid' => $adminuser->id, 'timemodified' => $time + 25]);
// Get the contexts in reindex order.
$area = \core_search\manager::get_search_area($this->forumpostareaid);
$contexts = iterator_to_array($area->get_contexts_to_reindex(), false);
// We expect them in order of newest discussion. Forum 4 is not included at all (which is
// correct because it has no content).
$expected = [
\context_module::instance($forum2->cmid),
\context_module::instance($forum3->cmid),
\context_module::instance($forum1->cmid)
];
$this->assertEquals($expected, $contexts);
}
}
@@ -285,6 +285,14 @@ public function get_recordset_by_timestamp($modifiedfrom = 0) {
* The default implementation returns false, indicating that this facility is not supported and
* the older get_recordset_by_timestamp function should be used.
*
* This function must accept all possible values for the $context parameter. For example, if
* you are implementing this function for the forum module, it should still operate correctly
* if called with the context for a glossary module, or for the HTML block. (In these cases
* where it will not return any data, it may return null.)
*
* The $context parameter can also be null or the system context; both of these indicate that
* all data, without context restriction, should be returned.
*
* @param int $modifiedfrom Return only records modified after this date
* @param \context|null $context Context (null means no context restriction)
* @return \moodle_recordset|null|false Recordset / null if no results / false if not supported
@@ -474,4 +482,18 @@ protected function get_course_level_context_restriction_sql(\context $context =
return [$sql, $params];
}
/**
* Gets a list of all contexts to reindex when reindexing this search area. The list should be
* returned in an order that is likely to be suitable when reindexing, for example with newer
* contexts first.
*
* The default implementation simply returns the system context, which will result in
* reindexing everything in normal date order (oldest first).
*
* @return \Iterator Iterator of contexts to reindex
*/
public function get_contexts_to_reindex() {
return new \ArrayIterator([\context_system::instance()]);
}
}
@@ -343,4 +343,59 @@ protected function get_context_restriction_sql(\context $context = null, $blockt
return [$sql, $params];
}
/**
* This can be used in subclasses to change ordering within the get_contexts_to_reindex
* function.
*
* It returns 2 values:
* - Extra SQL joins (tables block_instances 'bi' and context 'x' already exist).
* - An ORDER BY value which must use aggregate functions, by default 'MAX(bi.timemodified) DESC'.
*
* Note the query already includes a GROUP BY on the context fields, so if your joins result
* in multiple rows, you can use aggregate functions in the ORDER BY. See forum for an example.
*
* @return string[] Array with 2 elements; extra joins for the query, and ORDER BY value
*/
protected function get_contexts_to_reindex_extra_sql() {
return ['', 'MAX(bi.timemodified) DESC'];
}
/**
* Gets a list of all contexts to reindex when reindexing this search area.
*
* For blocks, the default is to return all contexts for blocks of that type, that are on a
* course page, in order of time added (most recent first).
*
* @return \Iterator Iterator of contexts to reindex
* @throws \moodle_exception If any DB error
*/
public function get_contexts_to_reindex() {
global $DB;
list ($extrajoins, $dborder) = $this->get_contexts_to_reindex_extra_sql();
$contexts = [];
$selectcolumns = \context_helper::get_preload_record_columns_sql('x');
$groupbycolumns = '';
foreach (\context_helper::get_preload_record_columns('x') as $column => $thing) {
if ($groupbycolumns !== '') {
$groupbycolumns .= ',';
}
$groupbycolumns .= $column;
}
$rs = $DB->get_recordset_sql("
SELECT $selectcolumns
FROM {block_instances} bi
JOIN {context} x ON x.instanceid = bi.id AND x.contextlevel = ?
JOIN {context} parent ON parent.id = bi.parentcontextid
$extrajoins
WHERE bi.blockname = ? AND parent.contextlevel = ?
GROUP BY $groupbycolumns
ORDER BY $dborder", [CONTEXT_BLOCK, $this->get_block_name(), CONTEXT_COURSE]);
return new \core\dml\recordset_walk($rs, function($rec) {
$id = $rec->ctxid;
\context_helper::preload_from_record($rec);
return \context::instance_by_id($id);
});
}
}
@@ -192,4 +192,57 @@ protected function get_context_restriction_sql(\context $context = null, $modnam
return [$sql, $params];
}
/**
* This can be used in subclasses to change ordering within the get_contexts_to_reindex
* function.
*
* It returns 2 values:
* - Extra SQL joins (tables course_modules 'cm' and context 'x' already exist).
* - An ORDER BY value which must use aggregate functions, by default 'MAX(cm.added) DESC'.
*
* Note the query already includes a GROUP BY on the context fields, so if your joins result
* in multiple rows, you can use aggregate functions in the ORDER BY. See forum for an example.
*
* @return string[] Array with 2 elements; extra joins for the query, and ORDER BY value
*/
protected function get_contexts_to_reindex_extra_sql() {
return ['', 'MAX(cm.added) DESC'];
}
/**
* Gets a list of all contexts to reindex when reindexing this search area.
*
* For modules, the default is to return all contexts for modules of that type, in order of
* time added (most recent first).
*
* @return \Iterator Iterator of contexts to reindex
* @throws \moodle_exception If any DB error
*/
public function get_contexts_to_reindex() {
global $DB;
list ($extrajoins, $dborder) = $this->get_contexts_to_reindex_extra_sql();
$contexts = [];
$selectcolumns = \context_helper::get_preload_record_columns_sql('x');
$groupbycolumns = '';
foreach (\context_helper::get_preload_record_columns('x') as $column => $thing) {
if ($groupbycolumns !== '') {
$groupbycolumns .= ',';
}
$groupbycolumns .= $column;
}
$rs = $DB->get_recordset_sql("
SELECT $selectcolumns
FROM {course_modules} cm
JOIN {context} x ON x.instanceid = cm.id AND x.contextlevel = ?
$extrajoins
WHERE cm.module = (SELECT id FROM {modules} WHERE name = ?)
GROUP BY $groupbycolumns
ORDER BY $dborder", [CONTEXT_MODULE, $this->get_module_name()]);
return new \core\dml\recordset_walk($rs, function($rec) {
$id = $rec->ctxid;
\context_helper::preload_from_record($rec);
return \context::instance_by_id($id);
});
}
}
@@ -334,4 +334,43 @@ public function test_check_access() {
// that context in the list to search. (This is because the $cm->uservisible access flag
// is only valid if the user is known to be able to access the course.)
}
/**
* Tests the module version of get_contexts_to_reindex, which is supposed to return all the
* activity contexts in order of date added.
*/
public function test_get_contexts_to_reindex() {
global $DB;
$this->resetAfterTest();
// Set up a course with two URLs and a Page.
$generator = $this->getDataGenerator();
$course = $generator->create_course(['fullname' => 'TCourse']);
$url1 = $generator->create_module('url', ['course' => $course->id, 'name' => 'TURL1']);
$url2 = $generator->create_module('url', ['course' => $course->id, 'name' => 'TURL2']);
$page = $generator->create_module('page', ['course' => $course->id, 'name' => 'TPage1']);
// Hack the items so they have different added times.
$now = time();
$DB->set_field('course_modules', 'added', $now - 3, ['id' => $url2->cmid]);
$DB->set_field('course_modules', 'added', $now - 2, ['id' => $url1->cmid]);
$DB->set_field('course_modules', 'added', $now - 1, ['id' => $page->cmid]);
// Check the URL contexts are in date order.
$urlarea = new \mod_url\search\activity();
$contexts = iterator_to_array($urlarea->get_contexts_to_reindex(), false);
$this->assertEquals([\context_module::instance($url1->cmid),
\context_module::instance($url2->cmid)], $contexts);
// Check the Page contexts.
$pagearea = new \mod_page\search\activity();
$contexts = iterator_to_array($pagearea->get_contexts_to_reindex(), false);
$this->assertEquals([\context_module::instance($page->cmid)], $contexts);
// Check another module area that has no instances.
$glossaryarea = new \mod_glossary\search\activity();
$contexts = iterator_to_array($glossaryarea->get_contexts_to_reindex(), false);
$this->assertEquals([], $contexts);
}
}
@@ -384,6 +384,52 @@ public function test_check_access() {
$this->assertEquals(\core_search\manager::ACCESS_DELETED, $area->check_access($blockid));
}
/**
* Tests the block version of get_contexts_to_reindex, which is supposed to return all the
* block contexts in order of date added.
*/
public function test_get_contexts_to_reindex() {
global $DB;
$this->resetAfterTest();
// Create course and activity module.
$generator = $this->getDataGenerator();
$course = $generator->create_course();
$coursecontext = \context_course::instance($course->id);
$page = $generator->create_module('page', ['course' => $course->id]);
$pagecontext = \context_module::instance($page->cmid);
// Create blocks on course page, with time modified non-sequential.
$configdata = base64_encode(serialize(new \stdClass()));
$instance = (object)['blockname' => 'mockblock', 'parentcontextid' => $coursecontext->id,
'showinsubcontexts' => 0, 'pagetypepattern' => 'course-view-*', 'defaultweight' => 0,
'timecreated' => 1, 'timemodified' => 100, 'configdata' => $configdata];
$blockid1 = $DB->insert_record('block_instances', $instance);
$context1 = \context_block::instance($blockid1);
$instance->timemodified = 120;
$blockid2 = $DB->insert_record('block_instances', $instance);
$context2 = \context_block::instance($blockid2);
$instance->timemodified = 110;
$blockid3 = $DB->insert_record('block_instances', $instance);
$context3 = \context_block::instance($blockid3);
// Create another block on the activity page (not included).
$instance->parentcontextid = $pagecontext->id;
$blockid4 = $DB->insert_record('block_instances', $instance);
\context_block::instance($blockid4);
// Check list of contexts.
$area = new block_mockblock\search\area();
$contexts = iterator_to_array($area->get_contexts_to_reindex(), false);
$expected = [
$context2,
$context3,
$context1
];
$this->assertEquals($expected, $contexts);
}
/**
* Gets a search document object from the fake search area.
*
@@ -130,4 +130,13 @@ public function test_attach_files_base() {
$this->assertEquals(1, count($files));
$this->assertEquals($content, $file->get_content());
}
/**
* Tests the base version (stub) of get_contexts_to_reindex.
*/
public function test_get_contexts_to_reindex() {
$area = new core_mocksearch\search\mock_search_area();
$this->assertEquals([\context_system::instance()],
iterator_to_array($area->get_contexts_to_reindex(), false));
}
}
@@ -1,6 +1,13 @@
This files describes API changes in /search/*,
information provided here is intended especially for developers.

=== 3.5 ===

* Search areas may now optionally implement the get_contexts_to_reindex function (for modules and
blocks, see also get_contexts_to_reindex_extra_sql). This allows a search area to customise the
order in which it is reindexed when doing a gradual reindex, so as to reindex the most important
contexts first.

=== 3.4 ===

* Search indexing now supports time limits to make the scheduled task run more neatly. In order for

0 comments on commit 25564a7

Please sign in to comment.
You can’t perform that action at this time.