Skip to content

Commit

Permalink
Merge branch 'MDL-66498_master' of git://github.com/dmonllao/moodle
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewnicols committed Sep 12, 2019
2 parents b75e563 + abc745f commit 82704b0
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 63 deletions.
67 changes: 40 additions & 27 deletions analytics/classes/manager.php
Expand Up @@ -553,30 +553,32 @@ public static function add_builtin_models() {
public static function cleanup() {
global $DB;

// Clean up stuff that depends on contexts that do not exist anymore.
$sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap
LEFT JOIN {context} ctx ON ap.contextid = ctx.id
WHERE ctx.id IS NULL";
$apcontexts = $DB->get_records_sql($sql);
$DB->execute("DELETE FROM {analytics_prediction_actions} WHERE predictionid IN
(SELECT ap.id FROM {analytics_predictions} ap
LEFT JOIN {context} ctx ON ap.contextid = ctx.id
WHERE ctx.id IS NULL)");

$sql = "SELECT DISTINCT aic.contextid FROM {analytics_indicator_calc} aic
LEFT JOIN {context} ctx ON aic.contextid = ctx.id
WHERE ctx.id IS NULL";
$indcalccontexts = $DB->get_records_sql($sql);

$contexts = $apcontexts + $indcalccontexts;
if ($contexts) {
list($sql, $params) = $DB->get_in_or_equal(array_keys($contexts));
$DB->execute("DELETE FROM {analytics_prediction_actions} WHERE predictionid IN
(SELECT ap.id FROM {analytics_predictions} ap WHERE ap.contextid $sql)", $params);

$DB->delete_records_select('analytics_predictions', "contextid $sql", $params);
$DB->delete_records_select('analytics_indicator_calc', "contextid $sql", $params);
}
$contextsql = "SELECT id FROM {context} ctx";
$DB->delete_records_select('analytics_predictions', "contextid NOT IN ($contextsql)");
$DB->delete_records_select('analytics_indicator_calc', "contextid NOT IN ($contextsql)");

// Clean up stuff that depends on analysable ids that do not exist anymore.

$models = self::get_all_models();
foreach ($models as $model) {

// We first dump into memory the list of analysables we have in the database (we could probably do this with 1 single
// query for the 3 tables, but it may be safer to do it separately).
$predictsamplesanalysableids = $DB->get_fieldset_select('analytics_predict_samples', 'DISTINCT analysableid',
'modelid = :modelid', ['modelid' => $model->get_id()]);
$predictsamplesanalysableids = array_flip($predictsamplesanalysableids);
$trainsamplesanalysableids = $DB->get_fieldset_select('analytics_train_samples', 'DISTINCT analysableid',
'modelid = :modelid', ['modelid' => $model->get_id()]);
$trainsamplesanalysableids = array_flip($trainsamplesanalysableids);
$usedanalysablesanalysableids = $DB->get_fieldset_select('analytics_used_analysables', 'DISTINCT analysableid',
'modelid = :modelid', ['modelid' => $model->get_id()]);
$usedanalysablesanalysableids = array_flip($usedanalysablesanalysableids);

$analyser = $model->get_analyser(array('notimesplitting' => true));
$analysables = $analyser->get_analysables_iterator();

Expand All @@ -585,17 +587,28 @@ public static function cleanup() {
if (!$analysable) {
continue;
}
$analysableids[] = $analysable->get_id();
}
if (empty($analysableids)) {
continue;
unset($predictsamplesanalysableids[$analysable->get_id()]);
unset($trainsamplesanalysableids[$analysable->get_id()]);
unset($usedanalysablesanalysableids[$analysable->get_id()]);
}

list($notinsql, $params) = $DB->get_in_or_equal($analysableids, SQL_PARAMS_NAMED, 'param', false);
$params['modelid'] = $model->get_id();
$param = ['modelid' => $model->get_id()];

$DB->delete_records_select('analytics_predict_samples', "modelid = :modelid AND analysableid $notinsql", $params);
$DB->delete_records_select('analytics_train_samples', "modelid = :modelid AND analysableid $notinsql", $params);
if ($predictsamplesanalysableids) {
list($idssql, $idsparams) = $DB->get_in_or_equal(array_flip($predictsamplesanalysableids), SQL_PARAMS_NAMED);
$DB->delete_records_select('analytics_predict_samples', "modelid = :modelid AND analysableid $idssql",
$param + $idsparams);
}
if ($trainsamplesanalysableids) {
list($idssql, $idsparams) = $DB->get_in_or_equal(array_flip($trainsamplesanalysableids), SQL_PARAMS_NAMED);
$DB->delete_records_select('analytics_train_samples', "modelid = :modelid AND analysableid $idssql",
$param + $idsparams);
}
if ($usedanalysablesanalysableids) {
list($idssql, $idsparams) = $DB->get_in_or_equal(array_flip($usedanalysablesanalysableids), SQL_PARAMS_NAMED);
$DB->delete_records_select('analytics_used_analysables', "modelid = :modelid AND analysableid $idssql",
$param + $idsparams);
}
}
}

Expand Down
116 changes: 88 additions & 28 deletions analytics/classes/model.php
Expand Up @@ -1030,7 +1030,7 @@ protected function get_static_predictions(&$indicatorcalculations) {
}

// Get all samples data.
list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
list($sampleids, $samplesdata) = $this->get_samples($sampleids);

// Calculate the targets.
$predictions = array();
Expand Down Expand Up @@ -1344,7 +1344,7 @@ public function get_predictions(\context $context, $skiphidden = true, $page = f
return $prediction->sampleid;
}, $predictions);

list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
list($unused, $samplesdata) = $this->get_samples($sampleids);

$current = 0;

Expand Down Expand Up @@ -1410,7 +1410,7 @@ public function get_prediction_actions(?\context $context): \moodle_recordset {
*/
public function prediction_sample_data($predictionobj) {

list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
list($unused, $samplesdata) = $this->get_samples(array($predictionobj->sampleid));

if (empty($samplesdata[$predictionobj->sampleid])) {
throw new \moodle_exception('errorsamplenotavailable', 'analytics');
Expand Down Expand Up @@ -1722,12 +1722,8 @@ public function clear() {
$predictor->clear_model($this->get_unique_id(), $this->get_output_dir());
}

$predictionids = $DB->get_fieldset_select('analytics_predictions', 'id', 'modelid = :modelid',
array('modelid' => $this->get_id()));
if ($predictionids) {
list($sql, $params) = $DB->get_in_or_equal($predictionids);
$DB->delete_records_select('analytics_prediction_actions', "predictionid $sql", $params);
}
$DB->delete_records_select('analytics_prediction_actions', "predictionid IN
(SELECT id FROM {analytics_predictions} WHERE modelid = :modelid)", ['modelid' => $this->get_id()]);

$DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
$DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
Expand Down Expand Up @@ -1833,30 +1829,94 @@ private function add_prediction_ids($predictionrecords) {
$contextids = array_map(function($predictionobj) {
return $predictionobj->contextid;
}, $predictionrecords);
list($contextsql, $contextparams) = $DB->get_in_or_equal($contextids, SQL_PARAMS_NAMED);

// We select the fields that will allow us to map ids to $predictionrecords. Given that we already filter by modelid
// we have enough with sampleid and rangeindex. The reason is that the sampleid relation to a site is N - 1.
$fields = 'id, sampleid, rangeindex';

// We include the contextid and the timecreated filter to reduce the number of records in $dbpredictions. We can not
// add as many OR conditions as records in $predictionrecords.
$sql = "SELECT $fields
FROM {analytics_predictions}
WHERE modelid = :modelid
AND contextid $contextsql
AND timecreated >= :firsttimecreated";
$params = $contextparams + ['modelid' => $this->model->id, 'firsttimecreated' => $firstprediction->timecreated];
$dbpredictions = $DB->get_recordset_sql($sql, $params);
foreach ($dbpredictions as $id => $dbprediction) {
// The append_rangeindex implementation is the same regardless of the time splitting method in use.
$uniqueid = $this->get_time_splitting()->append_rangeindex($dbprediction->sampleid, $dbprediction->rangeindex);
$predictionrecords[$uniqueid]->id = $dbprediction->id;

// Limited to 30000 records as a middle point between the ~65000 params limit in pgsql and the size limit for mysql which
// can be increased if required up to a reasonable point.
$chunks = array_chunk($contextids, 30000);
foreach ($chunks as $contextidschunk) {
list($contextsql, $contextparams) = $DB->get_in_or_equal($contextidschunk, SQL_PARAMS_NAMED);

// We select the fields that will allow us to map ids to $predictionrecords. Given that we already filter by modelid
// we have enough with sampleid and rangeindex. The reason is that the sampleid relation to a site is N - 1.
$fields = 'id, sampleid, rangeindex';

// We include the contextid and the timecreated filter to reduce the number of records in $dbpredictions. We can not
// add as many OR conditions as records in $predictionrecords.
$sql = "SELECT $fields
FROM {analytics_predictions}
WHERE modelid = :modelid
AND contextid $contextsql
AND timecreated >= :firsttimecreated";
$params = $contextparams + ['modelid' => $this->model->id, 'firsttimecreated' => $firstprediction->timecreated];
$dbpredictions = $DB->get_recordset_sql($sql, $params);
foreach ($dbpredictions as $id => $dbprediction) {
// The append_rangeindex implementation is the same regardless of the time splitting method in use.
$uniqueid = $this->get_time_splitting()->append_rangeindex($dbprediction->sampleid, $dbprediction->rangeindex);
$predictionrecords[$uniqueid]->id = $dbprediction->id;
}
}

return $predictionrecords;
}

/**
* Wrapper around analyser's get_samples to skip DB's max-number-of-params exception.
*
* @param array $sampleids
* @return array
*/
public function get_samples(array $sampleids): array {

if (empty($sampleids)) {
throw new \coding_exception('No sample ids provided');
}

$chunksize = count($sampleids);

// We start with just 1 chunk, if it is too large for the db we split the list of sampleids in 2 and we
// try again. We repeat this process until the chunk is small enough for the db engine to process. The
// >= has been added in case there are other \dml_read_exceptions unrelated to the max number of params.
while (empty($done) && $chunksize >= 1) {

$chunks = array_chunk($sampleids, $chunksize);
$allsampleids = [];
$allsamplesdata = [];

foreach ($chunks as $index => $chunk) {

try {
list($chunksampleids, $chunksamplesdata) = $this->get_analyser()->get_samples($chunk);
} catch (\dml_read_exception $e) {

// Reduce the chunksize, we use floor() so the $chunksize is always less than the previous $chunksize value.
$chunksize = floor($chunksize / 2);
break;
}

// We can sum as these two arrays are indexed by sampleid and there are no collisions.
$allsampleids = $allsampleids + $chunksampleids;
$allsamplesdata = $allsamplesdata + $chunksamplesdata;

if ($index === count($chunks) - 1) {
// We successfully processed all the samples in all chunks, we are done.
$done = true;
}
}
}

if (empty($done)) {
if (!empty($e)) {
// Throw the last exception we caught, the \dml_read_exception we have been catching is unrelated to the max number
// of param's exception.
throw new \dml_read_exception($e);
} else {
throw new \coding_exception('We should never reach this point, there is a bug in ' .
'core_analytics\\model::get_samples\'s code');
}
}
return [$allsampleids, $allsamplesdata];
}

/**
* Purges the insights cache.
*/
Expand Down
8 changes: 2 additions & 6 deletions analytics/classes/privacy/provider.php
Expand Up @@ -306,13 +306,9 @@ public static function delete_data_for_all_users_in_context(\context $context) {
$idssql = "SELECT ap.id FROM {analytics_predictions} ap
WHERE ap.contextid = :contextid AND ap.modelid = :modelid";
$idsparams = ['contextid' => $context->id, 'modelid' => $modelid];
$predictionids = $DB->get_fieldset_sql($idssql, $idsparams);
if ($predictionids) {
list($predictionidssql, $params) = $DB->get_in_or_equal($predictionids, SQL_PARAMS_NAMED);

$DB->delete_records_select('analytics_prediction_actions', "predictionid IN ($idssql)", $idsparams);
$DB->delete_records_select('analytics_predictions', "id $predictionidssql", $params);
}
$DB->delete_records_select('analytics_prediction_actions', "predictionid IN ($idssql)", $idsparams);
$DB->delete_records_select('analytics_predictions', "contextid = :contextid AND modelid = :modelid", $idsparams);
}

// We delete them all this table is just a cache and we don't know which model filled it.
Expand Down
6 changes: 4 additions & 2 deletions analytics/tests/manager_test.php
Expand Up @@ -134,8 +134,9 @@ public function test_deleted_analysable() {
$model->train();
$model->predict();

$npredictsamples = $DB->count_records('analytics_predict_samples');
$ntrainsamples = $DB->count_records('analytics_train_samples');
$this->assertNotEmpty($DB->count_records('analytics_predict_samples'));
$this->assertNotEmpty($DB->count_records('analytics_train_samples'));
$this->assertNotEmpty($DB->count_records('analytics_used_analysables'));

// Now we delete an analysable, stored predict and training samples should be deleted.
$deletedcontext = \context_course::instance($coursepredict1->id);
Expand All @@ -145,6 +146,7 @@ public function test_deleted_analysable() {

$this->assertEmpty($DB->count_records('analytics_predict_samples', array('analysableid' => $coursepredict1->id)));
$this->assertEmpty($DB->count_records('analytics_train_samples', array('analysableid' => $coursepredict1->id)));
$this->assertEmpty($DB->count_records('analytics_used_analysables', array('analysableid' => $coursepredict1->id)));

set_config('enabled_stores', '', 'tool_log');
get_log_manager(true);
Expand Down
55 changes: 55 additions & 0 deletions analytics/tests/model_test.php
Expand Up @@ -505,6 +505,61 @@ public function test_potential_timesplittings() {
$this->assertArrayHasKey('\core\analytics\time_splitting\quarters', $this->model->get_potential_timesplittings());
}

/**
* Tests model::get_samples()
*
* @return null
*/
public function test_get_samples() {
$this->resetAfterTest();

if (!PHPUNIT_LONGTEST) {
$this->markTestSkipped('PHPUNIT_LONGTEST is not defined');
}

// 10000 should be enough to make oracle and mssql fail, if we want pgsql to fail we need around 70000
// users, that is a few minutes just to create the users.
$nusers = 10000;

$userids = [];
for ($i = 0; $i < $nusers; $i++) {
$user = $this->getDataGenerator()->create_user();
$userids[] = $user->id;
}

$upcomingactivities = null;
foreach (\core_analytics\manager::get_all_models() as $model) {
if (get_class($model->get_target()) === 'core_user\\analytics\\target\\upcoming_activities_due') {
$upcomingactivities = $model;
}
}

list($sampleids, $samplesdata) = $upcomingactivities->get_samples($userids);
$this->assertCount($nusers, $sampleids);
$this->assertCount($nusers, $samplesdata);

$subset = array_slice($userids, 0, 100);
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
$this->assertCount(100, $sampleids);
$this->assertCount(100, $samplesdata);

$subset = array_slice($userids, 0, 2);
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
$this->assertCount(2, $sampleids);
$this->assertCount(2, $samplesdata);

$subset = array_slice($userids, 0, 1);
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
$this->assertCount(1, $sampleids);
$this->assertCount(1, $samplesdata);

// Unexisting, so nothing returned, but still 2 arrays.
list($sampleids, $samplesdata) = $upcomingactivities->get_samples([1231231231231231]);
$this->assertEmpty($sampleids);
$this->assertEmpty($samplesdata);

}

/**
* Generates a model log record.
*/
Expand Down

0 comments on commit 82704b0

Please sign in to comment.