Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make archiving process to respect process_new_segments_from settings #17351

Merged
merged 17 commits into from May 7, 2021
41 changes: 41 additions & 0 deletions core/ArchiveProcessor/Loader.php
Expand Up @@ -27,6 +27,7 @@
use Piwik\SettingsServer;
use Piwik\Site;
use Psr\Log\LoggerInterface;
use Piwik\CronArchive\SegmentArchiving;

/**
* This class uses PluginsArchiver class to trigger data aggregation and create archives.
Expand Down Expand Up @@ -389,12 +390,52 @@ public function canSkipThisArchive()
$hasSiteVisitsBetweenTimeframe = $this->hasSiteVisitsBetweenTimeframe($idSite, $params->getPeriod());
$hasChildArchivesInPeriod = $this->dataAccessModel->hasChildArchivesInPeriod($idSite, $params->getPeriod());

if ($this->canSkipArchiveForSegment()) {
return true;
}
flamisz marked this conversation as resolved.
Show resolved Hide resolved

return $isWebsiteUsingTracker
&& !$isArchivingForcedWhenNoVisits
&& !$hasSiteVisitsBetweenTimeframe
&& !$hasChildArchivesInPeriod;
}

public function canSkipArchiveForSegment()
{
$params = $this->params;

if ($params->getSegment()->isEmpty()) {
return false;
}

/** @var SegmentArchiving */
$segmentArchiving = StaticContainer::get(SegmentArchiving::class);
$segmentInfo = $segmentArchiving->findSegmentForHash($params->getSegment()->getHash(), $params->getSite()->getId());

if (!$segmentInfo) {
return false;
}

$segmentArchiveStartDate = $segmentArchiving->getReArchiveSegmentStartDate($segmentInfo);

if ($segmentArchiveStartDate !==null && $segmentArchiveStartDate->isLater($params->getPeriod()->getDateEnd()->getEndOfDay())) {
$doneFlag = Rules::getDoneStringFlagFor(
[$params->getSite()->getId()],
$params->getSegment(),
$params->getPeriod()->getLabel(),
$params->getRequestedPlugin()
);

// if there is no invalidation where the report is null, we can skip
// if we have invalidations for the period and name, but only for a specific reports, we can skip
// if the report is not null we only want to rearchive if we have invalidation for that report
// if we don't find invalidation for that report, we can skip
return !$this->dataAccessModel->hasInvalidationForPeriodAndName($params->getSite()->getId(), $params->getPeriod(), $doneFlag, $params->getArchiveOnlyReport());
}

return false;
}

private function isWebsiteUsingTheTracker($idSite)
{
$idSitesNotUsingTracker = self::getSitesNotUsingTracker();
Expand Down
17 changes: 17 additions & 0 deletions core/CronArchive.php
Expand Up @@ -894,6 +894,23 @@ private function invalidateWithSegments($idSites, $date, $period, $_forceInvalid
if ($this->canWeSkipInvalidatingBecauseThereIsAUsablePeriod($params, $doNotIncludeTtlInExistingArchiveCheck)) {
$this->logger->debug(' Found usable archive for {archive}, skipping invalidation.', ['archive' => $params]);
} else {
if (empty($this->segmentArchiving)) {
// might not be initialised if init is not called
$this->segmentArchiving = StaticContainer::get(SegmentArchiving::class);
}
flamisz marked this conversation as resolved.
Show resolved Hide resolved

$segmentInfo = $this->segmentArchiving->findSegmentForHash($params->getSegment()->getHash(), $idSite);

if ($segmentInfo) {
$segmentArchiveStartDate = $this->segmentArchiving->getReArchiveSegmentStartDate($segmentInfo);

if ($segmentArchiveStartDate !== null && $segmentArchiveStartDate->isLater($params->getPeriod()->getDateEnd()->getEndOfDay())) {
// the system is not allowed to invalidate reports for this period
// automatically, only a user can specifically invalidate
continue;
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code (the function calls and ifs) looks very similar to the above code. Can it be moved to a method to reduce duplication? Maybe a method in the SegmentArchiving class?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@flamisz this wasn't addressed, did you discuss it w/ @tsteur or something?

Copy link
Contributor Author

@flamisz flamisz May 6, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't find any useful way to introduce a new function, but refactored both files, mostly get rid off temporary variables.

Copy link
Member

@diosmosis diosmosis May 7, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@flamisz I was thinking something like:

SegmentArchiving.php

public function getSegmentReArchiveDateIfPastEndOfArchivingPeriod($params)
{
    $segmentInfo = $this->findSegmentForHash($params->getSegment()->getHash(), $params->getSite()->getId());

        if (!$segmentInfo) {
            return false;
        }

        $segmentArchiveStartDate = $this->getReArchiveSegmentStartDate($segmentInfo);

    if ($segmentArchiveStartDate !==null && $segmentArchiveStartDate->isLater($params->getPeriod()->getDateEnd()->getEndOfDay())) {
        return $segmentArchiveDate;
    }

    return false;
}

then in Loader.php:

    public function canSkipArchiveForSegment()
    {
        /** @var SegmentArchiving */
        $segmentArchiving = StaticContainer::get(SegmentArchiving::class);
        $segmentReArchiveDate = $segmentArchiving->getSegmentReArchiveDateIfPastEndOfArchivingPeriod($this->params);

        if (!empty($segmentReArchiveDate)) {
            $doneFlag = Rules::getDoneStringFlagFor(
                [$params->getSite()->getId()],
                $params->getSegment(),
                $params->getPeriod()->getLabel(),
                $params->getRequestedPlugin()
            );

            // if there is no invalidation where the report is null, we can skip
            // if we have invalidations for the period and name, but only for a specific reports, we can skip
            // if the report is not null we only want to rearchive if we have invalidation for that report
            // if we don't find invalidation for that report, we can skip
            return !$this->dataAccessModel->hasInvalidationForPeriodAndName($idSite, $params->getPeriod(), $doneFlag, $params->getArchiveOnlyReport());
        }

        return false;
    }

and above in CronArchive.php:

$segmentReArchiveDate = $segmentArchiving->getSegmentReArchiveDateIfPastEndOfArchivingPeriod($this->params);
if (!empty($segmentReArchiveDate)) {
    // the system is not allowed to invalidate reports for this period
    // automatically, only a user can specifically invalidate
    continue;
}

Maybe this doesn't work or it's not a good idea for another reason, I'll just take your word for it since it's not a huge deal.


$this->getApiToInvalidateArchivedReport()->invalidateArchivedReports($idSite, $date, $period, $segmentDefinition,
$cascadeDown = false, $_forceInvalidateNonexistant);
}
Expand Down
42 changes: 42 additions & 0 deletions core/DataAccess/Model.php
Expand Up @@ -864,6 +864,48 @@ public function hasChildArchivesInPeriod($idSite, Period $period)
return false;
}

/**
* Returns true if any invalidations exists for the given
* $idsite and $doneFlag (name column) for the $period.
*
* @param mixed $idSite
* @param Period $period
* @param mixed $doneFlag
* @param mixed $report
* @return bool
* @throws Exception
*/
public function hasInvalidationForPeriodAndName($idSite, Period $period, $doneFlag, $report = null)
{
$table = Common::prefixTable('archive_invalidations');

if (empty($report)) {
$sql = "SELECT idinvalidation FROM `$table` WHERE idsite = ? AND date1 = ? AND date2 = ? AND `period` = ? AND `name` = ? AND `report` IS NULL LIMIT 1";
} else {
$sql = "SELECT idinvalidation FROM `$table` WHERE idsite = ? AND date1 = ? AND date2 = ? AND `period` = ? AND `name` = ? AND `report` = ? LIMIT 1";
}

$bind = [
$idSite,
$period->getDateStart()->toString(),
$period->getDateEnd()->toString(),
$period->getId(),
$doneFlag
];

if (!empty($report)) {
$bind[] = $report;
}

$idInvalidation = Db::fetchOne($sql, $bind);

if (empty($idInvalidation)) {
return false;
}

return true;
}

public function deleteInvalidationsForSites(array $idSites)
{
$idSites = array_map('intval', $idSites);
Expand Down
10 changes: 7 additions & 3 deletions plugins/CoreConsole/Commands/CoreArchiver.php
Expand Up @@ -23,6 +23,11 @@ protected function configure()

protected function execute(InputInterface $input, OutputInterface $output)
{
if($input->getOption('force-date-last-n')) {
$message = '"force-date-last-n" is deprecated. Please use the "process_new_segments_from" INI configuration option instead.';
$output->writeln('<comment>' . $message .'</comment>');
}

$archiver = self::makeArchiver($input->getOption('url'), $input);
$archiver->main();
}
Expand All @@ -38,7 +43,6 @@ public static function makeArchiver($url, InputInterface $input)
$archiver->shouldArchiveSpecifiedSites = self::getSitesListOption($input, "force-idsites");
$archiver->shouldSkipSpecifiedSites = self::getSitesListOption($input, "skip-idsites");
$archiver->phpCliConfigurationOptions = $input->getOption("php-cli-options");
$archiver->dateLastForced = $input->getOption('force-date-last-n');
$archiver->concurrentRequestsPerWebsite = $input->getOption('concurrent-requests-per-website');
$archiver->maxConcurrentArchivers = $input->getOption('concurrent-archivers');
$archiver->shouldArchiveAllSites = $input->getOption('force-all-websites');
Expand Down Expand Up @@ -91,8 +95,8 @@ public static function configureArchiveCommand(ConsoleCommand $command)
'If specified, segments will be only archived for yesterday, but not today. If the segment was created or changed recently, then it will still be archived for today and the setting will be ignored for this segment.');
$command->addOption('force-periods', null, InputOption::VALUE_OPTIONAL,
"If specified, archiving will be processed only for these Periods (comma separated eg. day,week,month,year,range)");
$command->addOption('force-date-last-n', null, InputOption::VALUE_REQUIRED,
"This last N number of years of data to invalidate when a recently created or updated segment is found.", 7);
$command->addOption('force-date-last-n', null, InputOption::VALUE_OPTIONAL,
"Deprecated. Please use the \"process_new_segments_from\" INI configuration option instead.");
$command->addOption('force-date-range', null, InputOption::VALUE_OPTIONAL,
"If specified, archiving will be processed only for periods included in this date range. Format: YYYY-MM-DD,YYYY-MM-DD");
$command->addOption('force-idsegments', null, InputOption::VALUE_REQUIRED,
Expand Down
5 changes: 4 additions & 1 deletion plugins/CoreConsole/tests/System/ArchiveCronTest.php
Expand Up @@ -432,7 +432,7 @@ private function runArchivePhpCron($options = array(), $archivePhpScript = false
$urlToProxy = Fixture::getRootUrl() . 'tests/PHPUnit/proxy/index.php';

// create the command
$cmd = "php \"$archivePhpScript\" --url=\"$urlToProxy\" --force-date-last-n=10";
$cmd = "php \"$archivePhpScript\" --url=\"$urlToProxy\"";
foreach ($options as $name => $value) {
$cmd .= " $name";
if ($value !== null) {
Expand Down Expand Up @@ -464,6 +464,9 @@ public static function provideContainerConfigBeforeClass()
},

'Tests.log.allowAllHandlers' => true,

CronArchive\SegmentArchiving::class => \DI\object()
->constructorParameter('beginningOfTimeLastNInYears', 10)
);
}

Expand Down
Expand Up @@ -17,6 +17,7 @@
use Piwik\Plugins\VisitsSummary;
use Piwik\Tests\Fixtures\OneVisitorTwoVisits;
use Piwik\Tests\Framework\TestCase\IntegrationTestCase;
use Piwik\CronArchive\SegmentArchiving;

/**
* @group SegmentEditor
Expand Down Expand Up @@ -257,6 +258,9 @@ public function provideContainerConfig()
$previous->General['browser_archiving_disabled_enforce'] = 1;
return $previous;
}),

SegmentArchiving::class => \DI\object()
->constructorParameter('beginningOfTimeLastNInYears', 15)
];
}

Expand Down
6 changes: 5 additions & 1 deletion tests/PHPUnit/Fixtures/UITestFixture.php
Expand Up @@ -43,6 +43,7 @@
use Piwik\Tests\Framework\XssTesting;
use Piwik\Plugins\ScheduledReports\API as APIScheduledReports;
use Psr\Container\ContainerInterface;
use Piwik\CronArchive\SegmentArchiving;

/**
* Fixture for UI tests.
Expand Down Expand Up @@ -508,6 +509,9 @@ public function provideContainerConfig()
$previous[] = $c->get(WebNotificationHandler::class);
return $previous;
}),

SegmentArchiving::class => \DI\object()
->constructorParameter('beginningOfTimeLastNInYears', 15)
];
}

Expand Down Expand Up @@ -648,4 +652,4 @@ public function isExistingApiAction($pluginName, $apiAction)
}
return parent::isExistingApiAction($pluginName, $apiAction);
}
}
}
131 changes: 131 additions & 0 deletions tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php
Expand Up @@ -29,6 +29,10 @@
use Piwik\Site;
use Piwik\Tests\Framework\Fixture;
use Piwik\Tests\Framework\TestCase\IntegrationTestCase;
use Piwik\Plugins\SegmentEditor\API as SegmentApi;
use Piwik\Option;
use Piwik\ArchiveProcessor\Rules;
use ReflectionClass;

class LoaderTest extends IntegrationTestCase
{
Expand Down Expand Up @@ -1345,6 +1349,120 @@ public function test_canSkipThisArchive_ignoresSegments()
$this->assertFalse($loader->canSkipThisArchive());
}

public function test_canSkipArchiveForSegment_returnsFalseIfNoSegments()
{
$params = new Parameters(new Site(1), Factory::build('year', '2016-02-03'), new Segment('', []));
$loader = new Loader($params);

$this->assertFalse($loader->canSkipArchiveForSegment());
}

public function test_canSkipArchiveForSegment_returnsFalseIfPeriodEndLaterThanSegmentArchiveStartDate()
{
Rules::setBrowserTriggerArchiving(false);
$definition = 'browserCode==ch';

SegmentApi::getInstance()->add('segment', $definition, 1, true, true);
$params = new Parameters(new Site(1), Factory::build('year', '2021-04-23'), new Segment($definition, [1]));
$loader = new Loader($params);

$this->assertFalse($loader->canSkipArchiveForSegment());
}

public function test_canSkipArchiveForSegment_returnsTrueIfPeriodEndEarlierThanSegmentArchiveStartDate()
{
Rules::setBrowserTriggerArchiving(false);

$definition = 'browserCode==ch';
SegmentApi::getInstance()->add('segment', $definition, 1, true, true);
$params = new Parameters(new Site(1), Factory::build('year', '2010-04-23'), new Segment($definition, [1]));
$loader = new Loader($params);

$this->assertTrue($loader->canSkipArchiveForSegment());
}

public function test_canSkipArchiveForSegment_returnsFalseIfHasInvalidationForThePeriod()
{
Rules::setBrowserTriggerArchiving(false);

$date = '2010-04-23';
$definition = 'browserCode==ch';
$segment = new Segment($definition, [1]);
$doneFlag = Rules::getDoneStringFlagFor([1], $segment, 'day', null);

$this->insertInvalidations([
['date1' => $date, 'date2' => $date, 'period' => 1, 'name' => $doneFlag],
]);

SegmentApi::getInstance()->add('segment', $definition, 1, true, true);
$params = new Parameters(new Site(1), Factory::build('day', $date), $segment);
$loader = new Loader($params);

$this->assertFalse($loader->canSkipArchiveForSegment());
}

public function test_canSkipArchiveForSegment_returnsTrueIfHasInvalidationForReportButWeDonSpecifyReport()
{
Rules::setBrowserTriggerArchiving(false);

$date = '2010-04-23';
$definition = 'browserCode==ch';
$segment = new Segment($definition, [1]);
$doneFlag = Rules::getDoneStringFlagFor([1], $segment, 'day', null);

$this->insertInvalidations([
['date1' => $date, 'date2' => $date, 'period' => 1, 'name' => $doneFlag, 'report' => 'myReport'],
]);

SegmentApi::getInstance()->add('segment', $definition, 1, true, true);
$params = new Parameters(new Site(1), Factory::build('day', $date), $segment);
$loader = new Loader($params);

$this->assertTrue($loader->canSkipArchiveForSegment());
}

public function test_canSkipArchiveForSegment_returnsFalseIfHasInvalidationForReportWeAskedFor()
{
Rules::setBrowserTriggerArchiving(false);

$date = '2010-04-23';
$definition = 'browserCode==ch';
$segment = new Segment($definition, [1]);
$doneFlag = Rules::getDoneStringFlagFor([1], $segment, 'day', null);

$this->insertInvalidations([
['date1' => $date, 'date2' => $date, 'period' => 1, 'name' => $doneFlag, 'report' => 'myReport'],
]);

SegmentApi::getInstance()->add('segment', $definition, 1, true, true);
$params = new Parameters(new Site(1), Factory::build('day', $date), $segment);
$params->setArchiveOnlyReport('myReport');
$loader = new Loader($params);

$this->assertFalse($loader->canSkipArchiveForSegment());
}

public function test_canSkipArchiveForSegment_returnsTrueIfHasNoInvalidationForReportWeAskedFor()
{
Rules::setBrowserTriggerArchiving(false);

$date = '2010-04-23';
$definition = 'browserCode==ch';
$segment = new Segment($definition, [1]);
$doneFlag = Rules::getDoneStringFlagFor([1], $segment, 'day', null);

$this->insertInvalidations([
['date1' => $date, 'date2' => $date, 'period' => 1, 'name' => $doneFlag, 'report' => 'myReport'],
]);

SegmentApi::getInstance()->add('segment', $definition, 1, true, true);
$params = new Parameters(new Site(1), Factory::build('day', $date), $segment);
$params->setArchiveOnlyReport('otherReport');
$loader = new Loader($params);

$this->assertTrue($loader->canSkipArchiveForSegment());
}

public function test_forcePluginArchiving_createsPluginSpecificArchive()
{
$_GET['trigger'] = 'archivephp';
Expand Down Expand Up @@ -1417,4 +1535,17 @@ private function getArchives()
}
return $results;
}

private function insertInvalidations(array $invalidations)
{
$table = Common::prefixTable('archive_invalidations');
$now = Date::now()->getDatetime();
foreach ($invalidations as $invalidation) {
$sql = "INSERT INTO `$table` (idsite, date1, date2, period, `name`, status, ts_invalidated, ts_started, report) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";
Db::query($sql, [
$invalidation['idsite'] ?? 1, $invalidation['date1'], $invalidation['date2'], $invalidation['period'], $invalidation['name'],
$invalidation['status'] ?? 0, $invalidation['ts_invalidated'] ?? $now, $invalidation['ts_started'] ?? null, $invalidation['report'] ?? null
]);
}
}
}