Skip to content

Commit

Permalink
Implement Asset Sanitizer Queue & Preview Check (#16053)
Browse files Browse the repository at this point in the history
* Improve: add sanitizing pdf

* Improve: add sanitizing pdf

* Review changes - use scan instead of sanitizing

* Review changes - generate a version after scanning

* Review changes

* Update doc/23_Installation_and_Upgrade/09_Upgrade_Notes/README.md

* Apply suggestions from code review

Co-authored-by: Jacob Dreesen <jacob@hdreesen.de>

* Update models/Asset/Document.php

Co-authored-by: Jacob Dreesen <jacob@hdreesen.de>

* Review changes

* Update doc/23_Installation_and_Upgrade/09_Upgrade_Notes/README.md

Co-authored-by: aryaantony92 <97134765+aryaantony92@users.noreply.github.com>

---------

Co-authored-by: Divesh Pahuja <divesh.pahuja@pimcore.com>
Co-authored-by: Jacob Dreesen <jacob@hdreesen.de>
Co-authored-by: aryaantony92 <97134765+aryaantony92@users.noreply.github.com>
  • Loading branch information
4 people committed Oct 9, 2023
1 parent a789673 commit 7573756
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 0 deletions.
4 changes: 4 additions & 0 deletions bundles/CoreBundle/src/DependencyInjection/Configuration.php
Expand Up @@ -553,6 +553,10 @@ private function addAssetNode(ArrayNodeDefinition $rootNode): void
->defaultTrue()
->info('Process text for Asset documents (e.g. used by backend search).')
->end()
->booleanNode('scan_pdf')
->defaultTrue()
->info('Scan PDF documents for unsafe JavaScript.')
->end()
->end()
->end()
->arrayNode('versions')
Expand Down
1 change: 1 addition & 0 deletions doc/04_Assets/README.md
Expand Up @@ -28,6 +28,7 @@ pimcore:
enabled: false #disable generating thumbnail for asset documents
process_page_count: false #disable processing page count
process_text: false #disable processing text extraction
scan_pdf: false #disable scanning PDF documents for unsafe JavaScript.
```

The sub chapters of this chapter provide insight into details for
Expand Down
2 changes: 2 additions & 0 deletions doc/23_Installation_and_Upgrade/09_Upgrade_Notes/README.md
Expand Up @@ -20,6 +20,8 @@ pimcore:
enabled: false #disable generating thumbnail for asset documents
process_page_count: false #disable processing page count
process_text: false #disable processing text extraction
scan_pdf: false #disable scanning PDF documents for unsafe JavaScript.

```
- [Elements] Properties are now only updated in the database with dirty state (when calling `setProperties` or `setProperty`).
- `Pimcore\Helper\CsvFormulaFormatter` has been deprecated. Use `League\Csv\EscapeFormula` instead.
Expand Down
4 changes: 4 additions & 0 deletions lib/Messenger/Handler/AssetUpdateTasksHandler.php
Expand Up @@ -62,6 +62,10 @@ private function saveAsset(Asset $asset): void

private function processDocument(Asset\Document $asset): void
{
if ($asset->getMimeType() === 'application/pdf' && $asset->checkIfPdfContainsJS()) {
$asset->save(['versionNote' => 'PDF scan result']);
}

$pageCount = $asset->getCustomSetting('document_page_count');
if (!$pageCount || $pageCount === 'failed') {
if ($asset->processPageCount()) {
Expand Down
54 changes: 54 additions & 0 deletions models/Asset/Document.php
Expand Up @@ -26,6 +26,8 @@
*/
class Document extends Model\Asset
{
public const CUSTOM_SETTING_PDF_SCAN_STATUS = 'document_pdf_scan_status';

protected string $type = 'document';

protected function update(array $params = []): void
Expand Down Expand Up @@ -163,6 +165,53 @@ public function getText(int $page = null): ?string
return null;
}

public function checkIfPdfContainsJS(): bool
{
if (!$this->isPdfScanningEnabled()) {
return false;
}

$this->setCustomSetting(
self::CUSTOM_SETTING_PDF_SCAN_STATUS,
Model\Asset\Enum\PdfScanStatus::IN_PROGRESS->value
);

$chunkSize = 1024;
$filePointer = $this->getStream();

$tagLength = strlen('/JS');

while ($chunk = fread($filePointer, $chunkSize)) {
if (strlen($chunk) <= $tagLength) {
break;
}

if (str_contains($chunk, '/JS') || str_contains($chunk, '/JavaScript')) {
$this->setCustomSetting(
self::CUSTOM_SETTING_PDF_SCAN_STATUS,
Model\Asset\Enum\PdfScanStatus::UNSAFE->value
);
return true;
}
}

$this->setCustomSetting(
self::CUSTOM_SETTING_PDF_SCAN_STATUS,
Model\Asset\Enum\PdfScanStatus::SAFE->value
);

return true;
}

public function getScanStatus(): ?Model\Asset\Enum\PdfScanStatus
{
if ($scanStatus = $this->getCustomSetting(self::CUSTOM_SETTING_PDF_SCAN_STATUS)) {
return Model\Asset\Enum\PdfScanStatus::tryFrom($scanStatus);
}

return null;
}

private function isThumbnailsEnabled(): bool
{
return Config::getSystemConfiguration('assets')['document']['thumbnails']['enabled'];
Expand All @@ -177,4 +226,9 @@ private function isTextProcessingEnabled(): bool
{
return Config::getSystemConfiguration('assets')['document']['process_text'];
}

private function isPdfScanningEnabled(): bool
{
return Config::getSystemConfiguration('assets')['document']['scan_pdf'];
}
}
24 changes: 24 additions & 0 deletions models/Asset/Enum/PdfScanStatus.php
@@ -0,0 +1,24 @@
<?php
declare(strict_types=1);

/**
* Pimcore
*
* This source file is available under two different licenses:
* - GNU General Public License version 3 (GPLv3)
* - Pimcore Commercial License (PCL)
* Full copyright and license information is available in
* LICENSE.md which is distributed with this source code.
*
* @copyright Copyright (c) Pimcore GmbH (http://www.pimcore.org)
* @license http://www.pimcore.org/license GPLv3 and PCL
*/

namespace Pimcore\Model\Asset\Enum;

enum PdfScanStatus: string
{
case SAFE = 'safe';
case UNSAFE = 'unsafe';
case IN_PROGRESS = 'inProgress';
}
4 changes: 4 additions & 0 deletions models/Element/Service.php
Expand Up @@ -731,6 +731,10 @@ public static function renewReferences(mixed $data, bool $initial = true, string
return $data;
}
if (is_object($data)) {
if ($data instanceof \UnitEnum) {
return $data;
}

if ($data instanceof ElementInterface && !$initial) {
return self::getElementById(self::getElementType($data), $data->getId());
}
Expand Down

0 comments on commit 7573756

Please sign in to comment.