Skip to content

Commit

Permalink
ENH Optimise site search
Browse files Browse the repository at this point in the history
  • Loading branch information
emteknetnz committed Jul 3, 2023
1 parent 96c81fe commit 8549742
Show file tree
Hide file tree
Showing 9 changed files with 278 additions and 9 deletions.
30 changes: 30 additions & 0 deletions docs/en/searching-blocks.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,33 @@ to make it clear in search results where one piece of content ends and another b
Page:
search_index_element_delimiter: ' ... '
```

## CMS page search

CMS page search will include search results for pages with elements that match the search query.

By default it uses the same method as the search indexing where it will fully render every element that is
being searched. This is an expensive operation and can cause performance issues if you have a large site with a lot of elements.

To increase performance by a large amount, likely more than doubling it, you can disable the rendering of elements and instead just look at the database values of the elements directly.

```yml
DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch:
render_elements: false
```

If `render_elements` is `false`, then all fields that have stored as a Varchar or Text like are searched. Individual fields on blocks can be excluded from the search by adding fields to the `exclude_fields_from_cms_search` array config variable on the element class. e.g.

```yml
App\MyElement:
fields_excluded_from_cms_search:
- MyFieldToExclude
- AnotherFieldToExclude
```

If the above is still not performant enough, searching elements for content in CMS page search can be disabled entirely can be disabled:

```yml
DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch:
search_for_term_in_content: false
```
16 changes: 14 additions & 2 deletions src/Controllers/ElementSiteTreeFilterSearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ class ElementSiteTreeFilterSearch extends CMSSiteTreeFilter_Search
*/
private static $search_for_term_in_content = true;

/**
* Whether to render elements with templates when doing a CMS SiteTree search
*
* @todo Change to false for CMS6?
*/
private static bool $render_elements = true;

/**
* @var array
*/
Expand Down Expand Up @@ -47,8 +54,13 @@ protected function applyDefaultFilters($query)
return false;
}

// Check whether the search term exists in the nested page content
$pageContent = $siteTree->getElementsForSearch();
if ($this->config()->get('render_elements') === true) {
// Check whether the search term exists in the nested page content
$pageContent = $siteTree->getElementsForSearch();
} else {
$pageContent = $siteTree->getContentFromElementsForCmsSearch();
}

return stripos($pageContent ?? '', $this->params['Term'] ?? '') !== false;
});

Expand Down
72 changes: 65 additions & 7 deletions src/Extensions/ElementalPageExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ class ElementalPageExtension extends ElementalAreasExtension
*/
private static $search_index_element_delimiter = ' ';

/**
* Used to cache all ElementalArea's prior to eager loading elements
*
* @internal
*/
private static ?array $elementalAreas = null;

/**
* Returns the contents of each ElementalArea has_one's markup for use in Solr or Elastic search indexing
*
Expand All @@ -49,14 +56,17 @@ public function getElementsForSearch()
SSViewer::set_themes(SSViewer::config()->get('themes'));
try {
$output = [];
$this->loopThroughElements(function (BaseElement $element) use (&$output) {
if ($element->getSearchIndexable()) {
$content = $element->getContentForSearchIndex();
if ($content) {
$output[] = $content;
}
$elements = $this->getEagerLoadedElements();
/** @var BaseElement $element */
foreach ($elements as $element) {
if (!$element->getSearchIndexable()) {
continue;
}
});
$content = $element->getContentForSearchIndex();
if ($content) {
$output[] = $content;
}
}
} finally {
// Reset theme if an exception occurs, if you don't have a
// try / finally around code that might throw an Exception,
Expand All @@ -66,6 +76,28 @@ public function getElementsForSearch()
return implode($this->owner->config()->get('search_index_element_delimiter') ?? '', $output);
}

/**
* Returns the contents of all Elements on the pages ElementalAreas for use in CMS search
*/
public function getContentFromElementsForCmsSearch(): string
{
$output = [];
$elements = $this->getEagerLoadedElements();
/** @var BaseElement $element */
foreach ($elements as $element) {
if (!$element->getSearchIndexable()) {
continue;
}
$content = $element->getContentForCmsSearch();
if ($content) {
$output[] = $content;
}
}
// Use |%| to delimite different elements rather than space so that you don't
// accidentally join results of two elements that are next to each other in a table
return implode('|%|', $output);
}

/**
* @see SiteTree::getAnchorsOnPage()
*/
Expand Down Expand Up @@ -98,6 +130,32 @@ public function MetaTags(&$tags)
}
}

private function getEagerLoadedElements(): array
{
$elements = [];
if (is_null(self::$elementalAreas)) {
self::$elementalAreas = [];
foreach (ElementalArea::get()->eagerLoad('Elements') as $elementalArea) {
self::$elementalAreas[$elementalArea->ID] = $elementalArea;
}
}
foreach ($this->owner->hasOne() as $relation => $class) {
if (!is_a($class, ElementalArea::class, true)) {
continue;
}
$elementalAreaID = $this->owner->{"{$relation}ID"};
if ($elementalAreaID && array_key_exists($elementalAreaID, self::$elementalAreas)) {
$elementalArea = self::$elementalAreas[$elementalAreaID];
} else {
$elementalArea = $this->owner->$relation();
}
foreach ($elementalArea->Elements() as $element) {
$elements[] = $element;
}
}
return $elements;
}

/**
* Call some function over all elements belonging to this page
*/
Expand Down
87 changes: 87 additions & 0 deletions src/Models/BaseElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
use SilverStripe\View\Parsers\URLSegmentFilter;
use SilverStripe\View\Requirements;
use SilverStripe\ORM\CMSPreviewable;
use SilverStripe\Core\Config\Config;
use SilverStripe\ORM\DataObjectSchema;

/**
* Class BaseElement
Expand Down Expand Up @@ -66,6 +68,12 @@ class BaseElement extends DataObject implements CMSPreviewable
*/
private static $description = 'Base element class';

/**
* List of fields to exclude from CMS SiteTree seatch
* @see ElementSiteTreeFilterSearch::applyDefaultFilters()
*/
private static array $fields_excluded_from_cms_search = [];

private static $db = [
'Title' => 'Varchar(255)',
'ShowTitle' => 'Boolean',
Expand Down Expand Up @@ -528,6 +536,85 @@ public function getContentForSearchIndex(): string
return $content;
}

/**
* Provides content for CMS search
*/
public function getContentForCmsSearch(): string
{
$textualDatabaseFields = $this->getTextualDatabaseFields();
$excludedFields = $this->getFieldsExcludedFromCmsSearch();
$contents = [];
foreach ($this->config()->get('db') as $fieldName => $fieldType) {
if (!in_array($fieldType, $textualDatabaseFields) || in_array($fieldName, $excludedFields)) {
continue;
}
$contents[] = $this->$fieldName;
}
// Use |#| to delimit different fields rather than space so that you don't
// accidentally join results of two columns that are next to each other in a table
$delimiter = '|#|';
$content = implode($delimiter, $contents);

// Strips tags and be sure there's a space between words.
$content = trim(strip_tags(str_replace('<', ' <', $content)));

// Allow projects to update content of third-party elements.
$content .= $delimiter;
$this->extend('updateContentForCmsSearch', $content);

// Remove delimiters that aren't required
$content = $this->removeExtraDelimiters($content, $delimiter);

return $content;
}
/**
* Get fields that have a Varchar or Text like type in the database
*/
private function getTextualDatabaseFields(): array
{
$textualDatabaseFields = singleton(DataObjectSchema::class)->databaseFields($this);
foreach ($textualDatabaseFields as $fieldName => $databaseFieldType) {
$lcType = strtolower($databaseFieldType);
if (!str_contains($lcType, 'varchar') && !str_contains($lcType, 'text')) {
unset($textualDatabaseFields[$fieldName]);
}
}
return $textualDatabaseFields;
}

private function getFieldsExcludedFromCmsSearch(): array
{
return [
// `fixed_fields` contains ['ID', 'ClassName', 'LastEdited', 'Created'] and possibly more
...array_keys(static::config()->get('fixed_fields')),
// generic field names that are not useful for search
'ExtraClass',
// manually excluded fields
...static::config()->get('fields_excluded_from_cms_search')
];
}

private function removeExtraDelimiters(string $content, string $delimiter): string
{
$len = strlen($delimiter);
if ($len === 0) {
return $content;
}
// Remove duplicate delimiters
while (str_contains($content, $delimiter . $delimiter)) {
$content = str_replace($delimiter . $delimiter, $delimiter, $content);
}
// ltrim() delimter
if (substr($content, 0, $len) === $delimiter) {
$content = substr($content, $len);
}
// rtrim() delimiter
if (substr($content, -$len) === $delimiter) {
$content = substr($content, 0, -$len);
}
return $content;
}

/**
* Default way to render element in templates. Note that all blocks should
* be rendered through their {@link ElementController} class as this
Expand Down
8 changes: 8 additions & 0 deletions tests/BaseElementTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -495,4 +495,12 @@ public function testPreviewLink(string $class, string $elementIdentifier, ?strin
$this->assertSame($link, $previewLink);
}
}

public function testGetContentForCmsSearch()
{
$element = $this->objFromFixture(ElementContent::class, 'content1');
$this->assertSame('Test Content', $element->getContentForCmsSearch());
$element = $this->objFromFixture(TestElement::class, 'elementDataObject3');
$this->assertSame('Element 3|#|Hello Test', $element->getContentForCmsSearch());
}
}
54 changes: 54 additions & 0 deletions tests/Controllers/ElementSiteTreeFilterSearchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
use DNADesign\Elemental\Tests\Src\TestPage;
use SilverStripe\CMS\Controllers\CMSSiteTreeFilter_Search;
use SilverStripe\Dev\SapphireTest;
use DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch;
use ReflectionMethod;
use SilverStripe\ORM\DataObject;
use SilverStripe\Core\Config\Config;
use SilverStripe\ORM\DataObjectSchema;
use DNADesign\Elemental\Models\ElementContent;
use DNADesign\Elemental\Tests\Src\TestElementContentExtension;

class ElementSiteTreeFilterSearchTest extends SapphireTest
{
Expand All @@ -15,6 +22,9 @@ class ElementSiteTreeFilterSearchTest extends SapphireTest
TestPage::class => [
ElementalPageExtension::class,
],
ElementContent::class => [
TestElementContentExtension::class,
]
];

protected static $extra_dataobjects = [
Expand Down Expand Up @@ -52,4 +62,48 @@ public function searchProvider()
]],
];
}

/**
* @dataProvider provideApplyDefaultFilters
*/
public function testApplyDefaultFilters(bool $renderElements, string $term, array $expected): void
{
// TODO delete
// $textualDatabaseFields = singleton(DataObjectSchema::class)->databaseFields($this);

// set protected method visibility - note applyDefaultFilters() is essentially an
// extension methods that's called by silverstripe/cms, so use of reflection here
// is pretty much required
$method = new ReflectionMethod(ElementSiteTreeFilterSearch::class, 'applyDefaultFilters');
Config::modify()->set(ElementSiteTreeFilterSearch::class, 'render_elements', $renderElements);
$filterSearch = new ElementSiteTreeFilterSearch(['Term' => $term]);
$ret = $method->invoke($filterSearch, DataObject::get(TestPage::class));
$this->assertSame($expected, $ret->column('Title'));
}

public function provideApplyDefaultFilters(): array
{
return [
'render_elements true - title search' => [
'render_elements' => true,
'term' => 'This content is rendered',
'expected' => ['Content blocks page']
],
'render_elements true - unrendered search' => [
'render_elements' => true,
'term' => 'This field is unrendered',
'expected' => []
],
'render_elements false - title search' => [
'render_elements' => false,
'term' => 'This content is rendered',
'expected' => ['Content blocks page']
],
'render_elements false - unrendered search' => [
'render_elements' => false,
'term' => 'This field is unrendered',
'expected' => ['Content blocks page']
],
];
}
}
5 changes: 5 additions & 0 deletions tests/Controllers/ElementSiteTreeFilterSearchTest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ DNADesign\Elemental\Models\ElementContent:
Title: Content
HTML: Specifically blocks page content
ParentID: =>DNADesign\Elemental\Models\ElementalArea.blocks_page_area
blocks_page_unrendered_content:
Title: My title
HTML: This content is rendered
ParentID: =>DNADesign\Elemental\Models\ElementalArea.blocks_page_area
UnrenderedField: This field is unrendered
2 changes: 2 additions & 0 deletions tests/Src/TestContentForSearchIndexExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ public function updateContentForSearchIndex(&$content)
{
$content = 'This is the updated content.';
}

// TODO the other extension hook
}
13 changes: 13 additions & 0 deletions tests/Src/TestElementContentExtension.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php

namespace DNADesign\Elemental\Tests\Src;

use SilverStripe\ORM\DataExtension;
use SilverStripe\Dev\TestOnly;

class TestElementContentExtension extends DataExtension implements TestOnly
{
private static $db = [
'UnrenderedField' => 'Varchar(255)'
];
}

0 comments on commit 8549742

Please sign in to comment.