Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX Unpublising parent pages should include child pages #89

Merged
merged 9 commits into from
Apr 12, 2024
3 changes: 2 additions & 1 deletion _config/extensions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ Only:
---
SilverStripe\CMS\Model\SiteTree:
extensions:
- SilverStripe\SearchService\Extensions\SearchServiceExtension
SearchServiceExtension: SilverStripe\SearchService\Extensions\SearchServiceExtension
SiteTreeHierarchyExtension: SilverStripe\SearchService\Extensions\SiteTreeHierarchyExtension
28 changes: 16 additions & 12 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
<phpunit bootstrap="vendor/silverstripe/framework/tests/bootstrap.php" colors="true">
<testsuite name="Default">
<directory>tests/</directory>
</testsuite>
<filter>
<whitelist processUncoveredFilesFromWhitelist="true">
<directory suffix=".php">src/</directory>
<exclude>
<directory suffix=".php">tests/</directory>
</exclude>
</whitelist>
</filter>
<?xml version="1.0"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" bootstrap="vendor/silverstripe/framework/tests/bootstrap.php" colors="true" xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/9.3/phpunit.xsd">
<coverage processUncoveredFiles="true">
<include>
<directory suffix=".php">src/</directory>
</include>
<exclude>
<directory suffix=".php">tests/</directory>
</exclude>
</coverage>
<testsuite name="Default">
<directory>tests/</directory>
</testsuite>
<php>
<get name="flush" value="1"/>
</php>
</phpunit>
2 changes: 2 additions & 0 deletions src/DataObject/DataObjectBatchProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ public function removeDocuments(array $documents): array
$this->run($job);

foreach ($documents as $doc) {
// Indexer::METHOD_ADD as default parameter make sure we check first its related documents
// and decide whether we should delete or update them automatically.
$childJob = RemoveDataObjectJob::create($doc, $timestamp);
$this->run($childJob);
}
Expand Down
10 changes: 8 additions & 2 deletions src/DataObject/DataObjectDocument.php
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,12 @@ public function getFieldValue(Field $field): mixed
}

/**
* Collects documents that depend on the current DataObject for indexing.
* It will inspect the search index configuration for anything using this object in a field or, if the
* current object is an instance of SiteTree, it will respect `enforce_strict_hierarchy`
* and add any child objects.
*
* @see [the dependency tracking docs](docs/en/usage.md#dependency-tracking)
* @return DocumentInterface[]
*/
public function getDependentDocuments(): array
Expand Down Expand Up @@ -440,15 +446,15 @@ public function getDependentDocuments(): array

/** @var DataObjectDocument $candidateDocument */
foreach ($chunker->chunk() as $candidateDocument) {
$relatedObj = $candidateDocument->getFieldValue($field);
$relatedObj = $candidateDocument->getFieldDependency($field);

// Singleton returned a dataobject, but this record did not. Rare, but possible.
if (!$relatedObj instanceof $objectClass) {
continue;
}

if ($relatedObj->ID === $ownedDataObject->ID) {
$docs[$document->getIdentifier()] = $document;
$docs[$candidateDocument->getIdentifier()] = $candidateDocument;
}
}
}
Expand Down
27 changes: 27 additions & 0 deletions src/Extensions/SiteTreeHierarchyExtension.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php

namespace SilverStripe\SearchService\Extensions;

use SilverStripe\CMS\Model\SiteTree;
use SilverStripe\Core\Extension;
use SilverStripe\SearchService\DataObject\DataObjectDocument;

class SiteTreeHierarchyExtension extends Extension
{

public function updateSearchDependentDocuments(array &$dependentDocs): void
{
if (!SiteTree::config()->get('enforce_strict_hierarchy')) {
return;
}

$page = $this->getOwner();

foreach ($page->AllChildren() as $record) {
$document = DataObjectDocument::create($record);
$dependentDocs[$document->getIdentifier()] = $document;
$dependentDocs = array_merge($dependentDocs, $document->getDependentDocuments());
}
}

}
43 changes: 33 additions & 10 deletions src/Jobs/RemoveDataObjectJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
use SilverStripe\Versioned\Versioned;

/**
* By virture of the default parameter, Index::METHOD_ADD, this does not remove the documents straight away.
* It checks first the status of the underlaying DataObjects and decides whether to remove or add them to the index.
* Then pass on to the parent's process() method to handle the job.
*
* @property DataObjectDocument|null $document
* @property int|null $timestamp
*/
Expand All @@ -19,6 +23,8 @@ class RemoveDataObjectJob extends IndexJob

public function __construct(?DataObjectDocument $document = null, ?int $timestamp = null, ?int $batchSize = null)
{
// Indexer::METHOD_ADD as default parameter make sure we check first its related documents
// whether we should delete or update them automatically.
parent::__construct([], Indexer::METHOD_ADD, $batchSize);

if ($document !== null) {
Expand Down Expand Up @@ -46,26 +52,43 @@ public function getTitle(): string
*/
public function setup(): void
{
// Set the documents in setup to ensure async
/** @var DBDatetime $datetime - set the documents in setup to ensure async */
$datetime = DBField::create_field('Datetime', $this->getTimestamp());
$archiveDate = $datetime->format($datetime->getISOFormat());
$documents = Versioned::withVersionedMode(function () use ($archiveDate) {
Versioned::reading_archived_date($archiveDate);

$currentDocument = $this->getDocument();
// Go back in time to find out what the owners were before unpublish
$dependentDocs = $this->document->getDependentDocuments();
$dependentDocs = $currentDocument->getDependentDocuments();

// refetch everything on the live stage
Versioned::set_stage(Versioned::LIVE);

return array_map(function (DataObjectDocument $doc) {
return DataObjectDocument::create(
DataObject::get_by_id(
$doc->getSourceClass(),
$doc->getDataObject()->ID
)
);
}, $dependentDocs);
return array_reduce(
$dependentDocs,
function (array $carry, DataObjectDocument $doc) {
$record = DataObject::get_by_id($doc->getSourceClass(), $doc->getDataObject()->ID);

// Since SiteTree::onBeforeDelete recursively deletes the child pages,
// they end up not found on a live environment which breaks DataObjectDocument::_constructor
if ($record) {
$document = DataObjectDocument::create($record);
$carry[$document->getIdentifier()] = $document;

return $carry;
}

// Taking into account that this queued job has a reference of existing child pages
// We need to make sure that we are able to send these pages to ElasticSearch etc. for removal
$oldRecord = $doc->getDataObject();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__unserialise does this a bit differently by getting the 'latest' version - do we want to do that to be consistent? I"m assuming this is about getting documents that have been removed (ie were not picked up by the previous condition because of stage=Live)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tested your __unserialize PR on top of this and your fix works as expected. As discussed, this job only checks for relationships between dataobjects and not their contents. Sending the live data is what the other PR has resolved a seemingly module wide issue which this PR also needs.

Copy link
Collaborator

@blueo blueo Apr 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good to know they work together - I was thinking though - should these two code blocks work the same way?

  1. From this Job
                    $oldRecord = $doc->getDataObject();

                    if ($oldRecord->isArchived() || $oldRecord->isOnDraft()) {
                        $document = DataObjectDocument::create($oldRecord);
                        $carry[$document->getIdentifier()] = $document;
                    }
  1. from the unserialise function
if (!$dataObject && DataObject::has_extension($data['className'], Versioned::class) && $data['fallback']) {
            // get the latest version - usually this is an object that has been deleted
            $dataObject = Versioned::get_latest_version(
                $data['className'],
                $data['id']
            );
        }

eg your one does a check for isArchived + isOnDraft if there is no DataObject - where the unserialize does a Versioned::get_latest_version call. OR does it not matter and we get the same result in the end anyway?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as discussed - the __unserialize function will look after returning a draft/deleted version of the dataobject - so we don't need this check $oldRecord->isArchived() || $oldRecord->isOnDraft() - we can simply add the current dataobject - the job will then look after either indexing or removing it from the index

$document = DataObjectDocument::create($oldRecord);
$carry[$document->getIdentifier()] = $document;

return $carry;
},
[]
);
});

$this->setDocuments($documents);
Expand Down
2 changes: 2 additions & 0 deletions src/Service/Indexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ function (DocumentInterface $dependentDocument) use ($document) {
);

if ($dependentDocs) {
// Indexer::METHOD_ADD as default parameter make sure we check first its related documents
// and decide whether we should delete or update them automatically.
$child = Indexer::create($dependentDocs, self::METHOD_ADD, $this->getBatchSize());

while (!$child->finished()) {
Expand Down
36 changes: 35 additions & 1 deletion tests/DataObject/DataObjectDocumentTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use SilverStripe\SearchService\Tests\Fake\DataObjectFakeVersioned;
use SilverStripe\SearchService\Tests\Fake\DataObjectSubclassFake;
use SilverStripe\SearchService\Tests\Fake\ImageFake;
use SilverStripe\SearchService\Tests\Fake\PageFake;
use SilverStripe\SearchService\Tests\Fake\TagFake;
use SilverStripe\SearchService\Tests\SearchServiceTest;
use SilverStripe\Security\Member;
Expand Down Expand Up @@ -44,6 +45,7 @@ class DataObjectDocumentTest extends SearchServiceTest
DataObjectSubclassFake::class,
DataObjectFakeVersioned::class,
DataObjectFakePrivate::class,
PageFake::class,
];

public function testGetIdentifier(): void
Expand Down Expand Up @@ -531,6 +533,7 @@ public function testGetDependentDocuments(): void
$config->set('getSearchableClasses', [
DataObjectFake::class,
ImageFake::class,
Page::class,
]);
$config->set('getFieldsForClass', [
DataObjectFake::class => [
Expand All @@ -543,6 +546,10 @@ public function testGetDependentDocuments(): void
ImageFake::class => [
new Field('tagtitles', 'Tags.Title'),
],
Page::class => [
new Field('title'),
new Field('content'),
],
]);

$dataobject = $this->objFromFixture(TagFake::class, 'one');
Expand Down Expand Up @@ -576,6 +583,33 @@ public function testGetDependentDocuments(): void
}

$this->assertEqualsCanonicalizing($expectedDocuments, $resultDocuments);

$pageOne = $this->objFromFixture(Page::class, 'page1');
$pageDoc = DataObjectDocument::create($pageOne);
/** @var DataObjectDocument[] $dependentPages */
$dependentPages = $pageDoc->getDependentDocuments();

// Grab all the expected pages
$pageTwo = $this->objFromFixture(Page::class, 'page2');
$pageThree = $this->objFromFixture(Page::class, 'page3');
$pageSeven = $this->objFromFixture(Page::class, 'page7');
$pageEight = $this->objFromFixture(Page::class, 'page8');

$expectedPages = [
sprintf('%s-%s', Page::class, $pageTwo->ID),
sprintf('%s-%s', Page::class, $pageThree->ID),
sprintf('%s-%s', Page::class, $pageSeven->ID),
sprintf('%s-%s', Page::class, $pageEight->ID),
];

$resultPages = [];

// Now let's check that each Document represents the Pages that we expected
foreach ($dependentPages as $document) {
$resultPages[] = sprintf('%s-%s', $document->getSourceClass(), $document->getDataObject()?->ID);
}

$this->assertEqualsCanonicalizing($expectedPages, $resultPages);
}

public function testExtensionRequired(): void
Expand Down Expand Up @@ -612,7 +646,7 @@ public function testDeletedDataObject(): void
$id = $dataObject->ID;

$doc = DataObjectDocument::create($dataObject)->setShouldFallbackToLatestVersion(true);
$dataObject->delete();
$dataObject->doArchive();

/** @var DataObjectDocument $serialDoc */
$serialDoc = unserialize(serialize($doc));
Expand Down
47 changes: 47 additions & 0 deletions tests/Extensions/SiteTreeHierarchyExtensionTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?php

namespace SilverStripe\SearchService\Tests\Extensions;

use Page;
use SilverStripe\Dev\SapphireTest;
use SilverStripe\ORM\ArrayList;
use SilverStripe\SearchService\DataObject\DataObjectDocument;
use SilverStripe\SearchService\Extensions\SiteTreeHierarchyExtension;

class SiteTreeHierarchyExtensionTest extends SapphireTest
{

protected static $fixture_file = [ // phpcs:ignore
'../fixtures.yml',
'../pages.yml',
];

public function testGetChildDocuments(): void
{
$pageOne = $this->objFromFixture(Page::class, 'page1');
$pageTwo = $this->objFromFixture(Page::class, 'page2');
$pageThree = $this->objFromFixture(Page::class, 'page3');
$pageSeven = $this->objFromFixture(Page::class, 'page7');
$pageEight = $this->objFromFixture(Page::class, 'page8');

$extension = new SiteTreeHierarchyExtension();
$extension->setOwner($pageOne);

$parentDocument = DataObjectDocument::create($pageOne);
$identifierPrefix = preg_replace('/\d+$/', '', $parentDocument->getIdentifier());
$childDocs = [];
$extension->updateSearchDependentDocuments($childDocs);

$expectedIdentifiers = [
$identifierPrefix . $pageTwo->ID,
$identifierPrefix . $pageThree->ID,
$identifierPrefix . $pageSeven->ID,
$identifierPrefix . $pageEight->ID,
];

$resultIdentifiers = ArrayList::create($childDocs)->column('getIdentifier');

$this->assertEqualsCanonicalizing($expectedIdentifiers, $resultIdentifiers);
}

}
8 changes: 8 additions & 0 deletions tests/Fake/ImageFake.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ class ImageFake extends DataObject implements TestOnly
{

private static array $db = [
'Title' => 'Varchar',
'URL' => 'Varchar',
];

private static array $has_one = [
'Parent' => DataObjectFake::class,
'PageFake' => PageFake::class,
];

private static array $many_many = [
Expand All @@ -27,4 +29,10 @@ class ImageFake extends DataObject implements TestOnly

private static string $table_name = 'ImageFake';

// For DataObjects that are not versioned, canView is needed and must return true
public function canView($member = null) // phpcs:ignore SlevomatCodingStandard.TypeHints
{
return true;
}

}
31 changes: 31 additions & 0 deletions tests/Fake/PageFake.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

namespace SilverStripe\SearchService\Tests\Fake;

use Page;
use SilverStripe\Dev\TestOnly;
use SilverStripe\SearchService\Extensions\SearchServiceExtension;

class PageFake extends Page implements TestOnly
{

private static array $many_many = [
'Tags' => TagFake::class,
];

private static array $has_many = [
'Images' => ImageFake::class,
];

private static array $owns = [
'Tags',
'Images',
];

private static string $table_name = 'PageFake';

private static array $extensions = [
SearchServiceExtension::class,
];

}
Loading
Loading