Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX Unpublising parent pages should include child pages #89

Merged
merged 9 commits into from
Apr 12, 2024
28 changes: 16 additions & 12 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
<phpunit bootstrap="vendor/silverstripe/framework/tests/bootstrap.php" colors="true">
<testsuite name="Default">
<directory>tests/</directory>
</testsuite>
<filter>
<whitelist processUncoveredFilesFromWhitelist="true">
<directory suffix=".php">src/</directory>
<exclude>
<directory suffix=".php">tests/</directory>
</exclude>
</whitelist>
</filter>
<?xml version="1.0"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" bootstrap="vendor/silverstripe/framework/tests/bootstrap.php" colors="true" xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/9.3/phpunit.xsd">
<coverage processUncoveredFiles="true">
<include>
<directory suffix=".php">src/</directory>
</include>
<exclude>
<directory suffix=".php">tests/</directory>
</exclude>
</coverage>
<testsuite name="Default">
<directory>tests/</directory>
</testsuite>
<php>
<get name="flush" value="1"/>
</php>
</phpunit>
2 changes: 2 additions & 0 deletions src/DataObject/DataObjectBatchProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ public function removeDocuments(array $documents): array
$this->run($job);

foreach ($documents as $doc) {
// Indexer::METHOD_ADD as default parameter make sure we check first its related documents
// and decide whether we should delete or update them automatically.
$childJob = RemoveDataObjectJob::create($doc, $timestamp);
$this->run($childJob);
}
Expand Down
22 changes: 22 additions & 0 deletions src/DataObject/DataObjectDocument.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use Exception;
use InvalidArgumentException;
use LogicException;
use SilverStripe\CMS\Model\SiteTree;
use SilverStripe\Core\Config\Configurable;
use SilverStripe\Core\Extensible;
use SilverStripe\Core\Injector\Injectable;
Expand Down Expand Up @@ -371,6 +372,10 @@ public function getFieldValue(Field $field): mixed
}

/**
* Collects related documents based on the underlying relationship of their DataObjects.
* These data are typically joined by $has_many or $many_many configured on searcheable classes
* defined by their fields to be indexed. Other relationships are considered a rare case.
*
ssmarco marked this conversation as resolved.
Show resolved Hide resolved
* @return DocumentInterface[]
*/
public function getDependentDocuments(): array
Expand Down Expand Up @@ -455,6 +460,10 @@ public function getDependentDocuments(): array
}
}

if ($ownedDataObject instanceof SiteTree && SiteTree::config()->get('enforce_strict_hierarchy')) {
$docs = array_merge($docs, $this->getChildDocuments($ownedDataObject));
}

$dependentDocs = array_values($docs);
$this->getDataObject()->invokeWithExtensions('updateSearchDependentDocuments', $dependentDocs);

Expand Down Expand Up @@ -636,4 +645,17 @@ public function onRemoveFromSearchIndexes(string $event): void
}
}

public function getChildDocuments(SiteTree $page): array
{
$docs = [];

foreach ($page->AllChildren() as $record) {
$document = DataObjectDocument::create($record);
$docs[$document->getIdentifier()] = $document;
$docs = array_merge($docs, $document->getDependentDocuments());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can calling getDependentDocuments() here ever result in an infinite loop?

Copy link
Contributor Author

@ssmarco ssmarco Oct 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If my theory is correct, then the CMS will prevent us from this happening especially on a SiteTree which I used as a guide.

https://github.com/silverstripe/silverstripe-cms/blob/668744e728154818873bc24cb0b8d45f73728b95/code/Model/SiteTree.php#L1752
The delete method on each child page executes on before delete as well which is kind of the same thing which is to recursively call the delete function.

My initial change was to account only the immediate children (SiteTree) of a certain page but @michalkleiner made a valid point to account other relationships on each child pages hence made the changes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am thinking of asking other vendors to stress test this implementation since this issue came from them since they have more content.

}

return $docs;
}

}
46 changes: 36 additions & 10 deletions src/Jobs/RemoveDataObjectJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
use SilverStripe\Versioned\Versioned;

/**
* By virture of the default parameter, Index::METHOD_ADD, this does not remove the documents straight away.
* It checks first the status of the underlaying DataObjects and decides whether to remove or add them to the index.
* Then pass on to the parent's process() method to handle the job.
*
* @property DataObjectDocument|null $document
* @property int|null $timestamp
*/
Expand All @@ -19,6 +23,8 @@ class RemoveDataObjectJob extends IndexJob

public function __construct(?DataObjectDocument $document = null, ?int $timestamp = null, ?int $batchSize = null)
{
// Indexer::METHOD_ADD as default parameter make sure we check first its related documents
// whether we should delete or update them automatically.
parent::__construct([], Indexer::METHOD_ADD, $batchSize);

if ($document !== null) {
Expand Down Expand Up @@ -46,26 +52,46 @@ public function getTitle(): string
*/
public function setup(): void
{
// Set the documents in setup to ensure async
/** @var DBDatetime $datetime - set the documents in setup to ensure async */
$datetime = DBField::create_field('Datetime', $this->getTimestamp());
$archiveDate = $datetime->format($datetime->getISOFormat());
$documents = Versioned::withVersionedMode(function () use ($archiveDate) {
Versioned::reading_archived_date($archiveDate);

$currentDocument = $this->getDocument();
// Go back in time to find out what the owners were before unpublish
$dependentDocs = $this->document->getDependentDocuments();
$dependentDocs = $currentDocument->getDependentDocuments();

// refetch everything on the live stage
Versioned::set_stage(Versioned::LIVE);

return array_map(function (DataObjectDocument $doc) {
return DataObjectDocument::create(
DataObject::get_by_id(
$doc->getSourceClass(),
$doc->getDataObject()->ID
)
);
}, $dependentDocs);
return array_reduce(
$dependentDocs,
function (array $carry, DataObjectDocument $doc) {
$record = DataObject::get_by_id($doc->getSourceClass(), $doc->getDataObject()->ID);

// Since SiteTree::onBeforeDelete recursively deletes the child pages,
// they end up not found on a live environment which breaks DataObjectDocument::_constructor
if ($record) {
$document = DataObjectDocument::create($record);
$carry[$document->getIdentifier()] = $document;

return $carry;
}

// Taking into account that this queued job has a reference of existing child pages
// We need to make sure that we are able to send these pages to ElasticSearch etc. for removal
$oldRecord = $doc->getDataObject();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__unserialise does this a bit differently by getting the 'latest' version - do we want to do that to be consistent? I"m assuming this is about getting documents that have been removed (ie were not picked up by the previous condition because of stage=Live)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tested your __unserialize PR on top of this and your fix works as expected. As discussed, this job only checks for relationships between dataobjects and not their contents. Sending the live data is what the other PR has resolved a seemingly module wide issue which this PR also needs.

Copy link
Collaborator

@blueo blueo Apr 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good to know they work together - I was thinking though - should these two code blocks work the same way?

  1. From this Job
                    $oldRecord = $doc->getDataObject();

                    if ($oldRecord->isArchived() || $oldRecord->isOnDraft()) {
                        $document = DataObjectDocument::create($oldRecord);
                        $carry[$document->getIdentifier()] = $document;
                    }
  1. from the unserialise function
if (!$dataObject && DataObject::has_extension($data['className'], Versioned::class) && $data['fallback']) {
            // get the latest version - usually this is an object that has been deleted
            $dataObject = Versioned::get_latest_version(
                $data['className'],
                $data['id']
            );
        }

eg your one does a check for isArchived + isOnDraft if there is no DataObject - where the unserialize does a Versioned::get_latest_version call. OR does it not matter and we get the same result in the end anyway?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as discussed - the __unserialize function will look after returning a draft/deleted version of the dataobject - so we don't need this check $oldRecord->isArchived() || $oldRecord->isOnDraft() - we can simply add the current dataobject - the job will then look after either indexing or removing it from the index


if ($oldRecord->isArchived() || $oldRecord->isOnDraft()) {
$document = DataObjectDocument::create($oldRecord);
$carry[$document->getIdentifier()] = $document;
}

return $carry;
},
[]
);
});

$this->setDocuments($documents);
Expand Down
2 changes: 2 additions & 0 deletions src/Service/Indexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ function (DocumentInterface $dependentDocument) use ($document) {
);

if ($dependentDocs) {
// Indexer::METHOD_ADD as default parameter make sure we check first its related documents
// and decide whether we should delete or update them automatically.
$child = Indexer::create($dependentDocs, self::METHOD_ADD, $this->getBatchSize());

while (!$child->finished()) {
Expand Down
59 changes: 59 additions & 0 deletions tests/DataObject/DataObjectDocumentTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace SilverStripe\SearchService\Tests\DataObject;

use Page;
use SilverStripe\ORM\ArrayList;
use SilverStripe\ORM\DataObject;
use SilverStripe\ORM\FieldType\DBDatetime;
use SilverStripe\ORM\RelationList;
Expand All @@ -16,6 +17,7 @@
use SilverStripe\SearchService\Tests\Fake\DataObjectFakeVersioned;
use SilverStripe\SearchService\Tests\Fake\DataObjectSubclassFake;
use SilverStripe\SearchService\Tests\Fake\ImageFake;
use SilverStripe\SearchService\Tests\Fake\PageFake;
use SilverStripe\SearchService\Tests\Fake\TagFake;
use SilverStripe\SearchService\Tests\SearchServiceTest;
use SilverStripe\Security\Member;
Expand Down Expand Up @@ -44,6 +46,7 @@ class DataObjectDocumentTest extends SearchServiceTest
DataObjectSubclassFake::class,
DataObjectFakeVersioned::class,
DataObjectFakePrivate::class,
PageFake::class,
];

public function testGetIdentifier(): void
Expand Down Expand Up @@ -531,6 +534,7 @@ public function testGetDependentDocuments(): void
$config->set('getSearchableClasses', [
DataObjectFake::class,
ImageFake::class,
Page::class,
]);
$config->set('getFieldsForClass', [
DataObjectFake::class => [
Expand All @@ -543,6 +547,10 @@ public function testGetDependentDocuments(): void
ImageFake::class => [
new Field('tagtitles', 'Tags.Title'),
],
Page::class => [
new Field('title'),
new Field('content'),
],
]);

$dataobject = $this->objFromFixture(TagFake::class, 'one');
Expand Down Expand Up @@ -576,6 +584,33 @@ public function testGetDependentDocuments(): void
}

$this->assertEqualsCanonicalizing($expectedDocuments, $resultDocuments);

$pageOne = $this->objFromFixture(Page::class, 'page1');
$pageDoc = DataObjectDocument::create($pageOne);
/** @var DataObjectDocument[] $dependentPages */
$dependentPages = $pageDoc->getDependentDocuments();

// Grab all the expected pages
$pageTwo = $this->objFromFixture(Page::class, 'page2');
$pageThree = $this->objFromFixture(Page::class, 'page3');
$pageSeven = $this->objFromFixture(Page::class, 'page7');
$pageEight = $this->objFromFixture(Page::class, 'page8');

$expectedPages = [
sprintf('%s-%s', Page::class, $pageTwo->ID),
sprintf('%s-%s', Page::class, $pageThree->ID),
sprintf('%s-%s', Page::class, $pageSeven->ID),
sprintf('%s-%s', Page::class, $pageEight->ID),
];

$resultPages = [];

// Now let's check that each Document represents the Pages that we expected
foreach ($dependentPages as $document) {
$resultPages[] = sprintf('%s-%s', $document->getSourceClass(), $document->getDataObject()?->ID);
}

$this->assertEqualsCanonicalizing($expectedPages, $resultPages);
}

public function testExtensionRequired(): void
Expand Down Expand Up @@ -626,4 +661,28 @@ public function testDeletedDataObject(): void
unserialize(serialize($doc));
}

public function testGetChildDocuments(): void
{
$pageOne = $this->objFromFixture(Page::class, 'page1');
$pageTwo = $this->objFromFixture(Page::class, 'page2');
$pageThree = $this->objFromFixture(Page::class, 'page3');
$pageSeven = $this->objFromFixture(Page::class, 'page7');
$pageEight = $this->objFromFixture(Page::class, 'page8');

$parentDocument = DataObjectDocument::create($pageOne);
$identifierPrefix = preg_replace('/\d$/', '', $parentDocument->getIdentifier());
$childDocs = $parentDocument->getChildDocuments($pageOne);

$expectedIdentifiers = [
$identifierPrefix . $pageTwo->ID,
$identifierPrefix . $pageThree->ID,
$identifierPrefix . $pageSeven->ID,
$identifierPrefix . $pageEight->ID,
];

$resultIdentifiers = ArrayList::create($childDocs)->column('getIdentifier');

$this->assertEqualsCanonicalizing($expectedIdentifiers, $resultIdentifiers);
}

}
8 changes: 8 additions & 0 deletions tests/Fake/ImageFake.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ class ImageFake extends DataObject implements TestOnly
{

private static array $db = [
'Title' => 'Varchar',
'URL' => 'Varchar',
];

private static array $has_one = [
'Parent' => DataObjectFake::class,
'PageFake' => PageFake::class,
];

private static array $many_many = [
Expand All @@ -27,4 +29,10 @@ class ImageFake extends DataObject implements TestOnly

private static string $table_name = 'ImageFake';

// For DataObjects that are not versioned, canView is needed and must return true
public function canView($member = null) // phpcs:ignore SlevomatCodingStandard.TypeHints
{
return true;
}

}
31 changes: 31 additions & 0 deletions tests/Fake/PageFake.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

namespace SilverStripe\SearchService\Tests\Fake;

use Page;
use SilverStripe\Dev\TestOnly;
use SilverStripe\SearchService\Extensions\SearchServiceExtension;

class PageFake extends Page implements TestOnly
{

private static array $many_many = [
'Tags' => TagFake::class,
];

private static array $has_many = [
'Images' => ImageFake::class,
];

private static array $owns = [
'Tags',
'Images',
];

private static string $table_name = 'PageFake';

private static array $extensions = [
SearchServiceExtension::class,
];

}