Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent possible errors when exporting datasets in GDPR tool #19056

Merged
merged 2 commits into from Apr 12, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
129 changes: 79 additions & 50 deletions plugins/PrivacyManager/Model/DataSubjects.php
@@ -1,11 +1,13 @@
<?php

/**
* Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/

namespace Piwik\Plugins\PrivacyManager\Model;

use Piwik\Columns\Dimension;
Expand All @@ -21,6 +23,7 @@
use Piwik\Site;
use Piwik\Tracker\LogTable;
use Piwik\Tracker\PageUrl;
use Psr\Log\LoggerInterface;

class DataSubjects
{
Expand Down Expand Up @@ -49,7 +52,7 @@ private function getDistinctIdSitesInTable($tableName, $maxIdSite)
public function deleteDataSubjectsForDeletedSites($allExistingIdSites)
{
if (empty($allExistingIdSites)) {
return array();
return [];
}

$allExistingIdSites = array_map('intval', $allExistingIdSites);
Expand All @@ -66,15 +69,15 @@ public function deleteDataSubjectsForDeletedSites($allExistingIdSites)
if (empty($idSitesNoLongerExisting)) {
// nothing to be deleted... if there is no entry for that table in log_visit or log_link_visit_action
// then there shouldn't be anything to be deleted in other tables either
return array();
return [];
}

$logTables = $this->getLogTablesToDeleteFrom();
// It's quicker to call the delete queries one site at a time instead of using the IN operator and potentially
// creating a huge result set
foreach ($idSitesNoLongerExisting as $idSiteNoLongerExisting) {
$r = $this->deleteLogDataFrom($logTables, function($tableToSelectFrom) use ($idSiteNoLongerExisting) {
return [$tableToSelectFrom . '.idsite = '. $idSiteNoLongerExisting, []];
$r = $this->deleteLogDataFrom($logTables, function ($tableToSelectFrom) use ($idSiteNoLongerExisting) {
return [$tableToSelectFrom . '.idsite = ' . $idSiteNoLongerExisting, []];
});
foreach ($r as $k => $v) {
if (!array_key_exists($k, $results)) {
Expand All @@ -91,16 +94,16 @@ public function deleteDataSubjectsForDeletedSites($allExistingIdSites)
public function deleteDataSubjects($visits)
{
if (empty($visits)) {
return array();
return [];
}

$results = array();
$results = [];

/**
* Lets you delete data subjects to make your plugin GDPR compliant.
* This can be useful if you have developed a plugin which stores any data for visits but doesn't
* use any core logic to store this data. If core API's are used, for example log tables, then the data may
* be deleted automatically.
* be deleted automatically.
*
* **Example**
*
Expand All @@ -114,7 +117,7 @@ public function deleteDataSubjects($visits)
* @param array &$visits An array with multiple visit entries containing an idvisit and idsite each. The data
* for these visits is requested to be deleted.
*/
Piwik::postEvent('PrivacyManager.deleteDataSubjects', array(&$results, $visits));
Piwik::postEvent('PrivacyManager.deleteDataSubjects', [&$results, $visits]);

$datesToInvalidateByIdSite = $this->getDatesToInvalidate($visits);

Expand All @@ -141,16 +144,16 @@ private function invalidateArchives($datesToInvalidateByIdSite)

private function getDatesToInvalidate($visits)
{
$idVisitsByIdSites = array();
$idVisitsByIdSites = [];
foreach ($visits as $visit) {
$idSite = (int)$visit['idsite'];
if (!isset($idVisitsByIdSites[$idSite])) {
$idVisitsByIdSites[$idSite] = array();
$idVisitsByIdSites[$idSite] = [];
}
$idVisitsByIdSites[$idSite][] = (int)$visit['idvisit'];
}

$datesToInvalidate = array();
$datesToInvalidate = [];
foreach ($idVisitsByIdSites as $idSite => $idVisits) {
$timezone = Site::getTimezoneFor($idSite);

Expand All @@ -159,7 +162,7 @@ private function getDatesToInvalidate($visits)
. ' AND idvisit IN (' . implode(',', $idVisits) . ')';

$resultSet = Db::fetchAll($sql);
$dates = array();
$dates = [];
foreach ($resultSet as $row) {
$date = Date::factory($row['visit_last_action_time'], $timezone);
$dates[$date->toString('Y-m-d')] = 1;
Expand Down Expand Up @@ -194,14 +197,14 @@ private function deleteLogDataFrom($logTables, callable $generateWhere)
foreach ($logTables as $logTable) {
$logTableName = $logTable->getName();

$from = array($logTableName);
$from = [$logTableName];
$tableToSelect = $this->findNeededTables($logTable, $from);

if (!$tableToSelect) {
throw new \Exception('Cannot join table ' . $logTable->getName());
}

list($where, $bind) = $generateWhere($tableToSelect);
[$where, $bind] = $generateWhere($tableToSelect);

$sql = "DELETE $logTableName FROM " . $this->makeFromStatement($from) . " WHERE $where";

Expand Down Expand Up @@ -229,11 +232,11 @@ private function sortLogTablesToEnsureDataErasureFromAllTablesIsPossible($logTab
$bName = $b->getName();
if ($bName === 'log_visit') {
return -1;
} else if ($aName === 'log_visit') {
} elseif ($aName === 'log_visit') {
return 1;
} else if ($bName === 'log_link_visit_action') {
} elseif ($bName === 'log_link_visit_action') {
return -1;
} else if ($aName === 'log_link_visit_action') {
} elseif ($aName === 'log_link_visit_action') {
return 1;
}

Expand Down Expand Up @@ -267,7 +270,7 @@ private function sortLogTablesToEnsureDataErasureFromAllTablesIsPossible($logTab
public function exportDataSubjects($visits)
{
if (empty($visits)) {
return array();
return [];
}

$logTables = $this->logTablesProvider->getAllLogTables();
Expand All @@ -277,15 +280,15 @@ public function exportDataSubjects($visits)

$dimensions = Dimension::getAllDimensions();

$results = array();
$results = [];

foreach ($logTables as $logTable) {
$logTableName = $logTable->getName();
if ('log_action' === $logTableName) {
continue; // we export these entries further below
}

$from = array($logTableName);
$from = [$logTableName];
$tableToSelect = $this->findNeededTables($logTable, $from);

if (!$tableToSelect) {
Expand All @@ -294,17 +297,21 @@ public function exportDataSubjects($visits)
continue;
}

list($where, $bind) = $this->visitsToWhereAndBind($tableToSelect, $visits);
[$where, $bind] = $this->visitsToWhereAndBind($tableToSelect, $visits);

$select = array();
$select = [];
$cols = DbHelper::getTableColumns(Common::prefixTable($logTableName));
ksort($cols); // make sure test results will be always in same order

$binaryFields = array();
$dimensionPerCol = array();
$binaryFields = [];
$dimensionPerCol = [];
foreach ($cols as $col => $config) {
foreach ($dimensions as $dimension) {
if ($dimension->getDbTableName() === $logTableName && $dimension->getColumnName() === $col) {
if (
$dimension->getDbTableName() === $logTableName
&& $dimension->getColumnName() === $col
&& $dimension->getSqlSegment() === $logTableName . '.' . $col
) {
if ($dimension->getType() === Dimension::TYPE_BINARY) {
$binaryFields[] = $col;
}
Expand All @@ -328,7 +335,7 @@ public function exportDataSubjects($visits)
$idFields = $logTable->getIdColumn();
if (!empty($idFields)) {
if (!is_array($idFields)) {
$idFields = array($idFields);
$idFields = [$idFields];
}
$sql .= ' ORDER BY ';
foreach ($idFields as $field) {
Expand All @@ -348,8 +355,27 @@ public function exportDataSubjects($visits)
}
foreach ($result[$index] as $rowColumn => $rowValue) {
if (isset($dimensionPerCol[$rowColumn])) {
$result[$index][$rowColumn] = $dimensionPerCol[$rowColumn]->formatValue($rowValue, $result[$index]['idsite'], new Formatter());
} else if (!empty($rowValue)) {
try {
$result[$index][$rowColumn] = $dimensionPerCol[$rowColumn]->formatValue(
$rowValue,
$result[$index]['idsite'],
new Formatter()
);
} catch (\Exception $e) {
// if formatting failes for some reason use the raw value
StaticContainer::get(LoggerInterface::class)->error(
'Failed to format column {column} with dimension {dimension}: {exception}',
[
'column' => $rowColumn,
'dimension' => get_class($dimensionPerCol[$rowColumn]),
'exception' => $e,
'ignoreInScreenWriter' => true,
]
);

$result[$index][$rowColumn] = $rowValue;
}
} elseif (!empty($rowValue)) {
// we try to auto detect uncompressed values so plugins have to do less themselves. makes it a bit slower but should be fine
$testValue = @gzuncompress($rowValue);
if ($testValue !== false) {
Expand All @@ -372,10 +398,10 @@ public function exportDataSubjects($visits)
$dimensionLogTable = $this->logTablesProvider->getLogTable($dimensionTable);

if ($join && $join instanceof ActionNameJoin && $dimensionColumn && $dimensionTable && $dimensionLogTable && $dimensionLogTable->getColumnToJoinOnIdVisit()) {
$from = array('log_action', array('table' => $dimensionTable, 'joinOn' => "log_action.idaction = `$dimensionTable`.`$dimensionColumn`"));
$from = ['log_action', ['table' => $dimensionTable, 'joinOn' => "log_action.idaction = `$dimensionTable`.`$dimensionColumn`"]];

$tableToSelect = $this->findNeededTables($dimensionLogTable, $from);
list($where, $bind) = $this->visitsToWhereAndBind($tableToSelect, $visits);
[$where, $bind] = $this->visitsToWhereAndBind($tableToSelect, $visits);
$from = $this->makeFromStatement($from);

$sql = "SELECT log_action.idaction, log_action.name, log_action.url_prefix FROM $from WHERE $where";
Expand All @@ -394,7 +420,7 @@ public function exportDataSubjects($visits)
usort($result, function ($a1, $a2) {
return $a1['idaction'] > $a2['idaction'] ? 1 : -1;
});
$results['log_action_' . $dimensionTable.'_' . $dimensionColumn] = $result;
$results['log_action_' . $dimensionTable . '_' . $dimensionColumn] = $result;
}
}
}
Expand All @@ -419,7 +445,7 @@ public function exportDataSubjects($visits)
* @param array &$visits An array with multiple visit entries containing an idvisit and idsite each. The data
* for these visits is requested to be exported.
*/
Piwik::postEvent('PrivacyManager.exportDataSubjects', array(&$results, $visits));
Piwik::postEvent('PrivacyManager.exportDataSubjects', [&$results, $visits]);

krsort($results); // make sure test results are always in same order

Expand All @@ -433,12 +459,12 @@ private function findNeededTables(LogTable $logTable, &$from)
if ($logTable->getColumnToJoinOnIdVisit()) {
$tableToSelect = 'log_visit';
if ($logTableName !== 'log_visit') {
$from[] = array('table' => 'log_visit', 'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $logTable->getColumnToJoinOnIdVisit(), 'log_visit', 'idvisit'));
$from[] = ['table' => 'log_visit', 'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $logTable->getColumnToJoinOnIdVisit(), 'log_visit', 'idvisit')];
}
} elseif ($logTable->getColumnToJoinOnIdAction()) {
$tableToSelect = 'log_link_visit_action';
if ($logTableName !== 'log_link_visit_action') {
$from[] = array('table' => 'log_link_visit_action', 'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $logTable->getColumnToJoinOnIdAction(), 'log_link_visit_action', 'idaction_url'));
$from[] = ['table' => 'log_link_visit_action', 'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $logTable->getColumnToJoinOnIdAction(), 'log_link_visit_action', 'idaction_url')];
}
} else {
$tableToSelect = $this->joinNonCoreTable($logTable, $from);
Expand All @@ -463,26 +489,31 @@ private function makeFromStatement($from)

private function visitsToWhereAndBind($tableToSelect, $visits)
{
$where = array();
$bind = array();
$in = array();
$where = [];
$bind = [];
$in = [];
foreach ($visits as $visit) {
if (empty($visit['idsite'])) {
$in[] = (int) $visit['idvisit'];
} else {
$where[] = sprintf('(%s.idsite = %d AND %s.idvisit = %d)',
$tableToSelect, (int) $visit['idsite'], $tableToSelect, (int) $visit['idvisit']);
$where[] = sprintf(
'(%s.idsite = %d AND %s.idvisit = %d)',
$tableToSelect,
(int) $visit['idsite'],
$tableToSelect,
(int) $visit['idvisit']
);
}
}
$where = implode(' OR ', $where);
if (!empty($in)) {
if (!empty($where)) {
$where .= ' OR ';
}
$where .= $tableToSelect . '.idvisit in (' . implode(',',$in) . ')';
$where .= $tableToSelect . '.idvisit in (' . implode(',', $in) . ')';
}

return array($where, $bind);
return [$where, $bind];
}

private function joinNonCoreTable(LogTable $logTable, &$from)
Expand All @@ -498,34 +529,33 @@ private function joinNonCoreTable(LogTable $logTable, &$from)
$joinTable = $this->logTablesProvider->getLogTable($tableName);

if ($joinTable->getColumnToJoinOnIdVisit()) {
$from[] = array(
$from[] = [
'table' => $joinTable->getName(),
'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $joinColumn, $joinTable->getName(), $joinColumn)
);
];
if ($joinTable->getName() !== 'log_visit') {
$from[] = array(
$from[] = [
'table' => 'log_visit',
'joinOn' => sprintf('%s.%s = %s.%s', $joinTable->getName(), $joinTable->getColumnToJoinOnIdVisit(), 'log_visit', $joinTable->getColumnToJoinOnIdVisit())
);
];
}
$tableToSelect = 'log_visit';
return $tableToSelect;
} else {
$subFroms = array();
$subFroms = [];
$tableToSelect = $this->joinNonCoreTable($joinTable, $subFroms);
if ($tableToSelect) {
$from[] = array(
$from[] = [
'table' => $joinTable->getName(),
'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $joinColumn, $joinTable->getName(), $joinColumn)
);
];
foreach ($subFroms as $subFrom) {
$from[] = $subFrom;
}
return $tableToSelect;
}
}
}

}

/**
Expand All @@ -541,5 +571,4 @@ public function deleteDataSubjectsWithoutInvalidatingArchives($visits): array
});
return $deleteCounts;
}

}