Permalink
Browse files

MDL-59694 analytics: Track processed analysables

  • Loading branch information...
dmonllao committed Sep 29, 2017
1 parent d8e9a23 commit dd13fc222cc67b27abce1106a81622b870c12c34
@@ -101,5 +101,10 @@
// Disable web interface evaluation and get predictions.
$settings->add(new admin_setting_configcheckbox('analytics/onlycli', new lang_string('onlycli', 'analytics'),
new lang_string('onlycliinfo', 'analytics'), 1));
// Training and prediction time limit per model.
$settings->add(new admin_setting_configduration('analytics/modeltimelimit', new lang_string('modeltimelimit', 'analytics'),
new lang_string('modeltimelimitinfo', 'analytics'), 20 * MINSECS));
}
}
@@ -118,7 +118,7 @@ public function __construct($modelid, \core_analytics\local\target\base $target,
* \core_analytics\local\analyser\by_course and \core_analytics\local\analyser\sitewide are implementing
* this method returning site courses (by_course) and the whole system (sitewide) as analysables.
*
* @return \core_analytics\analysable[]
* @return \core_analytics\analysable[] Array of analysable elements using the analysable id as array key.
*/
abstract public function get_analysables();
@@ -180,10 +180,16 @@ public function __construct($modelid, \core_analytics\local\target\base $target,
* @return \stored_file[]
*/
public function get_analysable_data($includetarget) {
global $DB;
// Time limit control.
$modeltimelimit = intval(get_config('analytics', 'modeltimelimit'));
$filesbytimesplitting = array();
$analysables = $this->get_analysables();
list($analysables, $processedanalysables) = $this->get_sorted_analysables($includetarget);
$inittime = time();
foreach ($analysables as $analysable) {
$files = $this->process_analysable($analysable, $includetarget);
@@ -192,6 +198,16 @@ public function get_analysable_data($includetarget) {
foreach ($files as $timesplittingid => $file) {
$filesbytimesplitting[$timesplittingid][$analysable->get_id()] = $file;
}
$this->update_analysable_analysed_time($processedanalysables, $analysable->get_id(), $includetarget);
// Apply time limit.
if (!$this->options['evaluation']) {
$timespent = time() - $inittime;
if ($modeltimelimit <= $timespent) {
break;
}
}
}
// We join the datasets by time splitting method.
@@ -721,4 +737,86 @@ protected function add_model_metadata(&$data) {
$data[1][] = $value;
}
}
/**
* Returns the list of analysables sorted in processing priority order.
*
* It will first return analysables that have never been analysed before
* and it will continue with the ones we have already seen by timeanalysed DESC
* order.
*
* @param bool $includetarget
* @return array(0 => \core_analytics\analysable[], 1 => \stdClass[])
*/
protected function get_sorted_analysables($includetarget) {
$analysables = $this->get_analysables();
// Get the list of analysables that have been already processed.
$processedanalysables = $this->get_processed_analysables($includetarget);
// We want to start processing analysables we have not yet processed and later continue
// with analysables that we already processed.
$unseen = array_diff_key($analysables, $processedanalysables);
// Var $processed first as we want to respect its timeanalysed DESC order so analysables that
// have recently been processed are on the bottom of the stack.
$seen = array_intersect_key($processedanalysables, $analysables);
array_walk($seen, function(&$value, $analysableid) use ($analysables) {
// We replace the analytics_used_analysables record by the analysable object.
$value = $analysables[$analysableid];
});
return array($unseen + $seen, $processedanalysables);
}
/**
* Get analysables that have been already processed.
*
* @param bool $includetarget
* @return \stdClass[]
*/
protected function get_processed_analysables($includetarget) {
global $DB;
$params = array('modelid' => $this->modelid);
$params['action'] = ($includetarget) ? 'training' : 'prediction';
$select = 'modelid = :modelid and action = :action';
// Weird select fields ordering for performance (analysableid key matching, analysableid is also unique by modelid).
return $DB->get_records_select('analytics_used_analysables', $select,
$params, 'timeanalysed DESC', 'analysableid, modelid, action, timeanalysed, id AS primarykey');
}
/**
* Updates the analysable analysis time.
*
* @param array $processedanalysables
* @param int $analysableid
* @param bool $includetarget
* @return null
*/
protected function update_analysable_analysed_time($processedanalysables, $analysableid, $includetarget) {
global $DB;
if (!empty($processedanalysables[$analysableid])) {
$obj = $processedanalysables[$analysableid];
$obj->id = $obj->primarykey;
unset($obj->primarykey);
$obj->timeanalysed = time();
$DB->update_record('analytics_used_analysables', $obj);
} else {
$obj = new \stdClass();
$obj->modelid = $this->modelid;
$obj->action = ($includetarget) ? 'training' : 'prediction';
$obj->analysableid = $analysableid;
$obj->timeanalysed = time();
$DB->insert_record('analytics_used_analysables', $obj);
}
}
}
@@ -55,7 +55,7 @@ public function get_analysables() {
foreach ($courses as $course) {
// Skip the frontpage course.
$analysable = \core_analytics\course::instance($course);
$analysables[] = $analysable;
$analysables[$analysable->get_id()] = $analysable;
}
if (empty($analysables)) {
@@ -42,6 +42,6 @@
*/
public function get_analysables() {
$analysable = new \core_analytics\site();
return array($analysable);
return array(SYSCONTEXTID => $analysable);
}
}
@@ -1446,6 +1446,7 @@ private function clear_model() {
$DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
$DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
$DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
$DB->delete_records('analytics_used_analysables', array('modelid' => $this->model->id));
// Purge all generated files.
\core_analytics\dataset_manager::clear_model_files($this->model->id);
@@ -0,0 +1,48 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Test analyser.
*
* @package core_analytics
* @copyright 2017 David Monllaó {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
defined('MOODLE_INTERNAL') || die();
/**
* Test analyser.
*
* @package core_analytics
* @copyright 2017 David Monllaó {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class test_analyser extends \core\analytics\analyser\courses {
/**
* Overwritten to add a delay.
*
* @param \core_analytics\analysable $analysable
* @param mixed $includetarget
* @return null
*/
public function process_analysable($analysable, $includetarget) {
// A bit more than 1 second.
usleep(1100000);
return parent::process_analysable($analysable, $includetarget);
}
}
@@ -28,6 +28,8 @@
require_once(__DIR__ . '/fixtures/test_indicator_min.php');
require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
require_once(__DIR__ . '/fixtures/test_target_shortname.php');
require_once(__DIR__ . '/fixtures/test_target_course_level_shortname.php');
require_once(__DIR__ . '/fixtures/test_analyser.php');
/**
* Unit tests for the model.
@@ -256,6 +258,64 @@ public function test_exists() {
$this->assertTrue(\core_analytics\model::exists($target));
}
/**
* test_model_timelimit
*
* @return null
*/
public function test_model_timelimit() {
global $DB;
$this->resetAfterTest(true);
set_config('modeltimelimit', 2, 'analytics');
$courses = array();
for ($i = 0; $i < 5; $i++) {
$course = $this->getDataGenerator()->create_course();
$analysable = new \core_analytics\course($course);
$courses[$analysable->get_id()] = $course;
}
$target = new test_target_course_level_shortname();
$analyser = new test_analyser(1, $target, [], [], []);
// Each analysable element takes 1.1 secs, so the max (and likely) number of analysable
// elements that will be processed is 2.
$analyser->get_analysable_data(false);
$params = array('modelid' => 1, 'action' => 'prediction');
$this->assertLessThanOrEqual(2, $DB->count_records('analytics_used_analysables', $params));
$analyser->get_analysable_data(false);
$this->assertLessThanOrEqual(4, $DB->count_records('analytics_used_analysables', $params));
// Check that analysable elements have been processed following the analyser order
// (course->sortorder here). We can not check this nicely after next get_analysable_data round
// because the first analysed element will be analysed again.
$analysedelems = $DB->get_records('analytics_used_analysables', $params, 'timeanalysed ASC');
// Just a default for the first checked element.
$last = (object)['sortorder' => PHP_INT_MAX];
foreach ($analysedelems as $analysed) {
if ($courses[$analysed->analysableid]->sortorder > $last->sortorder) {
$this->fail('Analysable elements have not been analysed sorted by course sortorder.');
}
$last = $courses[$analysed->analysableid];
}
$analyser->get_analysable_data(false);
$this->assertGreaterThanOrEqual(5, $DB->count_records('analytics_used_analysables', $params));
// New analysable elements are immediately pulled.
$this->getDataGenerator()->create_course();
$analyser->get_analysable_data(false);
$this->assertGreaterThanOrEqual(6, $DB->count_records('analytics_used_analysables', $params));
// Training and prediction data do not get mixed.
$analyser->get_analysable_data(true);
$params = array('modelid' => 1, 'action' => 'training');
$this->assertLessThanOrEqual(2, $DB->count_records('analytics_used_analysables', $params));
}
/**
* Generates a model log record.
*/
View
@@ -64,6 +64,8 @@
$string['nocourses'] = 'No courses to analyse';
$string['modeloutputdir'] = 'Models output directory';
$string['modeloutputdirinfo'] = 'Directory where prediction processors store all evaluation info. Useful for debugging and research.';
$string['modeltimelimit'] = 'Analysis time limit per model';
$string['modeltimelimitinfo'] = 'This setting limits the time each model spends analysing the site contents.';
$string['noevaluationbasedassumptions'] = 'Models based on assumptions cannot be evaluated.';
$string['nodata'] = 'No data to analyse';
$string['noinsightsmodel'] = 'This model does not generate insights';
View
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" ?>
<XMLDB PATH="lib/db" VERSION="20170921" COMMENT="XMLDB file for core Moodle tables"
<XMLDB PATH="lib/db" VERSION="20170929" COMMENT="XMLDB file for core Moodle tables"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="../../lib/xmldb/xmldb.xsd"
>
@@ -3720,5 +3720,21 @@
<INDEX NAME="predictionidanduseridandactionname" UNIQUE="false" FIELDS="predictionid, userid, actionname"/>
</INDEXES>
</TABLE>
<TABLE NAME="analytics_used_analysables" COMMENT="List of analysables used by each model">
<FIELDS>
<FIELD NAME="id" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="true"/>
<FIELD NAME="modelid" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="action" TYPE="char" LENGTH="50" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="analysableid" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="timeanalysed" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="false"/>
</FIELDS>
<KEYS>
<KEY NAME="primary" TYPE="primary" FIELDS="id"/>
<KEY NAME="modelid" TYPE="foreign" FIELDS="modelid" REFTABLE="analytics_models" REFFIELDS="id"/>
</KEYS>
<INDEXES>
<INDEX NAME="modelid-action" UNIQUE="false" FIELDS="modelid, action"/>
</INDEXES>
</TABLE>
</TABLES>
</XMLDB>
</XMLDB>
View
@@ -2601,5 +2601,33 @@ function xmldb_main_upgrade($oldversion) {
upgrade_main_savepoint(true, 2017092900.00);
}
if ($oldversion < 2017100600.02) {
// Define table analytics_used_analysables to be created.
$table = new xmldb_table('analytics_used_analysables');
// Adding fields to table analytics_used_analysables.
$table->add_field('id', XMLDB_TYPE_INTEGER, '10', null, XMLDB_NOTNULL, XMLDB_SEQUENCE, null);
$table->add_field('modelid', XMLDB_TYPE_INTEGER, '10', null, XMLDB_NOTNULL, null, null);
$table->add_field('action', XMLDB_TYPE_CHAR, '50', null, XMLDB_NOTNULL, null, null);
$table->add_field('analysableid', XMLDB_TYPE_INTEGER, '10', null, XMLDB_NOTNULL, null, null);
$table->add_field('timeanalysed', XMLDB_TYPE_INTEGER, '10', null, XMLDB_NOTNULL, null, null);
// Adding keys to table analytics_used_analysables.
$table->add_key('primary', XMLDB_KEY_PRIMARY, array('id'));
$table->add_key('modelid', XMLDB_KEY_FOREIGN, array('modelid'), 'analytics_models', array('id'));
// Adding indexes to table analytics_used_analysables.
$table->add_index('modelid-action', XMLDB_INDEX_NOTUNIQUE, array('modelid', 'action'));
// Conditionally launch create table for analytics_used_analysables.
if (!$dbman->table_exists($table)) {
$dbman->create_table($table);
}
// Main savepoint reached.
upgrade_main_savepoint(true, 2017100600.02);
}
return true;
}
View
@@ -29,7 +29,7 @@
defined('MOODLE_INTERNAL') || die();
$version = 2017100600.00; // YYYYMMDD = weekly release date of this DEV branch.
$version = 2017100600.02; // YYYYMMDD = weekly release date of this DEV branch.
// RR = release increments - 00 in DEV branches.
// .XX = incremental changes.

0 comments on commit dd13fc2

Please sign in to comment.