diff --git a/src/DjotConverter.php b/src/DjotConverter.php index a47012b..c7fbd9c 100644 --- a/src/DjotConverter.php +++ b/src/DjotConverter.php @@ -27,13 +27,43 @@ class DjotConverter * @param bool $xhtml Whether to use XHTML-compatible output * @param bool $warnings Whether to collect warnings during parsing * @param bool $strict Whether to throw exceptions on parse errors + * @param \Djot\SafeMode|bool|null $safeMode Enable safe mode (true for defaults, SafeMode instance for custom config) */ - public function __construct(bool $xhtml = false, bool $warnings = false, bool $strict = false) - { + public function __construct( + bool $xhtml = false, + bool $warnings = false, + bool $strict = false, + bool|SafeMode|null $safeMode = null, + ) { $this->collectWarnings = $warnings; $this->strictMode = $strict; $this->parser = new BlockParser($warnings, $strict); $this->renderer = new HtmlRenderer($xhtml); + + // Configure safe mode + if ($safeMode === true) { + $this->renderer->setSafeMode(SafeMode::defaults()); + } elseif ($safeMode instanceof SafeMode) { + $this->renderer->setSafeMode($safeMode); + } + } + + /** + * Enable or disable safe mode + * + * @param \Djot\SafeMode|bool|null $safeMode True for defaults, SafeMode for custom, null/false to disable + */ + public function setSafeMode(bool|SafeMode|null $safeMode): self + { + if ($safeMode === true) { + $this->renderer->setSafeMode(SafeMode::defaults()); + } elseif ($safeMode instanceof SafeMode) { + $this->renderer->setSafeMode($safeMode); + } else { + $this->renderer->setSafeMode(null); + } + + return $this; } /** diff --git a/src/Renderer/HtmlRenderer.php b/src/Renderer/HtmlRenderer.php index 53a988e..466ae8e 100644 --- a/src/Renderer/HtmlRenderer.php +++ b/src/Renderer/HtmlRenderer.php @@ -44,6 +44,7 @@ use Djot\Node\Inline\Symbol; use Djot\Node\Inline\Text; use Djot\Node\Node; +use Djot\SafeMode; /** * Renders AST to HTML @@ -52,6 +53,11 @@ class HtmlRenderer { protected bool $softBreakAsNewline = true; + /** + * Safe mode configuration (null = disabled) + */ + protected ?SafeMode $safeMode = null; + /** * @var array> */ @@ -99,6 +105,32 @@ public function __construct(protected bool $xhtml = false) { } + /** + * Enable safe mode with the given configuration + */ + public function setSafeMode(?SafeMode $safeMode): self + { + $this->safeMode = $safeMode; + + return $this; + } + + /** + * Get the current safe mode configuration + */ + public function getSafeMode(): ?SafeMode + { + return $this->safeMode; + } + + /** + * Check if safe mode is enabled + */ + public function isSafeModeEnabled(): bool + { + return $this->safeMode !== null; + } + public function setSoftBreakAsNewline(bool $value): void { $this->softBreakAsNewline = $value; @@ -312,6 +344,11 @@ protected function renderAttributesExcluding(Node $node, array $exclude): string return ''; } + // Filter dangerous attributes in safe mode + if ($this->safeMode !== null) { + $attrs = $this->safeMode->filterAttributes($attrs); + } + // Sort attributes: id first, then others in source order uksort($attrs, function (string $a, string $b): int { if ($a === 'id') { @@ -625,6 +662,11 @@ protected function renderLink(Link $node): string $href = $node->getDestination(); $title = $node->getTitle(); + // Sanitize URL in safe mode + if ($this->safeMode !== null && $href !== null) { + $href = $this->safeMode->sanitizeUrl($href); + } + $html = 'renderAttributes($node); $alt = $this->escape($node->getAlt()); - $src = $this->escape($node->getSource()); + $src = $node->getSource(); $title = $node->getTitle(); - $html = '' . $alt . 'safeMode !== null) { + $src = $this->safeMode->sanitizeUrl($src); + } + + $html = '' . $alt . 'escape($title) . '"'; } @@ -720,6 +767,11 @@ protected function renderAttributes(Node $node): string return ''; } + // Filter dangerous attributes in safe mode + if ($this->safeMode !== null) { + $attrs = $this->safeMode->filterAttributes($attrs); + } + // Sort attributes: id first, then others in source order uksort($attrs, function (string $a, string $b): int { if ($a === 'id') { @@ -768,21 +820,47 @@ protected function escapeAttribute(string $text): string protected function renderRawBlock(RawBlock $node): string { // Only output if format is HTML - if ($node->getFormat() === 'html') { - return $node->getContent() . "\n"; + if ($node->getFormat() !== 'html') { + return ''; } - return ''; + $content = $node->getContent(); + + // Handle raw HTML according to safe mode + if ($this->safeMode !== null) { + $mode = $this->safeMode->getRawHtmlMode(); + if ($mode === SafeMode::RAW_HTML_STRIP) { + return ''; + } + if ($mode === SafeMode::RAW_HTML_ESCAPE) { + return $this->escape($content) . "\n"; + } + } + + return $content . "\n"; } protected function renderRawInline(RawInline $node): string { // Only output if format is HTML - if ($node->getFormat() === 'html') { - return $node->getContent(); + if ($node->getFormat() !== 'html') { + return ''; } - return ''; + $content = $node->getContent(); + + // Handle raw HTML according to safe mode + if ($this->safeMode !== null) { + $mode = $this->safeMode->getRawHtmlMode(); + if ($mode === SafeMode::RAW_HTML_STRIP) { + return ''; + } + if ($mode === SafeMode::RAW_HTML_ESCAPE) { + return $this->escape($content); + } + } + + return $content; } protected function renderDefinitionList(DefinitionList $node): string diff --git a/src/SafeMode.php b/src/SafeMode.php new file mode 100644 index 0000000..f0e558b --- /dev/null +++ b/src/SafeMode.php @@ -0,0 +1,284 @@ + + */ + protected array $dangerousSchemes = [ + 'javascript', + 'vbscript', + 'data', + 'file', + ]; + + /** + * Allowed URL schemes (if set, only these are allowed) + * + * @var array|null + */ + protected ?array $allowedSchemes = null; + + /** + * Attribute prefixes to block (e.g., 'on' blocks onclick, onload, etc.) + * + * @var array + */ + protected array $blockedAttributePrefixes = ['on']; + + /** + * Specific attributes to block + * + * @var array + */ + protected array $blockedAttributes = ['srcdoc', 'formaction']; + + /** + * How to handle raw HTML blocks and inline + */ + protected string $rawHtmlMode = self::RAW_HTML_ESCAPE; + + /** + * Create a safe mode configuration with sensible defaults + */ + public static function defaults(): self + { + return new self(); + } + + /** + * Create a strict safe mode (strips raw HTML completely) + */ + public static function strict(): self + { + return (new self())->setRawHtmlMode(self::RAW_HTML_STRIP); + } + + /** + * Get dangerous URL schemes + * + * @return array + */ + public function getDangerousSchemes(): array + { + return $this->dangerousSchemes; + } + + /** + * Set dangerous URL schemes to block + * + * @param array $schemes + */ + public function setDangerousSchemes(array $schemes): self + { + $this->dangerousSchemes = $schemes; + + return $this; + } + + /** + * Add a dangerous URL scheme + */ + public function addDangerousScheme(string $scheme): self + { + $this->dangerousSchemes[] = strtolower($scheme); + + return $this; + } + + /** + * Get allowed URL schemes (null means all non-dangerous schemes allowed) + * + * @return array|null + */ + public function getAllowedSchemes(): ?array + { + return $this->allowedSchemes; + } + + /** + * Set allowed URL schemes (whitelist approach) + * + * @param array|null $schemes Null to allow all non-dangerous schemes + */ + public function setAllowedSchemes(?array $schemes): self + { + $this->allowedSchemes = $schemes; + + return $this; + } + + /** + * Get blocked attribute prefixes + * + * @return array + */ + public function getBlockedAttributePrefixes(): array + { + return $this->blockedAttributePrefixes; + } + + /** + * Set blocked attribute prefixes + * + * @param array $prefixes + */ + public function setBlockedAttributePrefixes(array $prefixes): self + { + $this->blockedAttributePrefixes = $prefixes; + + return $this; + } + + /** + * Get blocked attributes + * + * @return array + */ + public function getBlockedAttributes(): array + { + return $this->blockedAttributes; + } + + /** + * Set blocked attributes + * + * @param array $attributes + */ + public function setBlockedAttributes(array $attributes): self + { + $this->blockedAttributes = $attributes; + + return $this; + } + + /** + * Get raw HTML handling mode + */ + public function getRawHtmlMode(): string + { + return $this->rawHtmlMode; + } + + /** + * Set raw HTML handling mode + * + * @param string $mode One of RAW_HTML_STRIP, RAW_HTML_ESCAPE, RAW_HTML_ALLOW + */ + public function setRawHtmlMode(string $mode): self + { + $this->rawHtmlMode = $mode; + + return $this; + } + + /** + * Check if a URL is safe + */ + public function isUrlSafe(string $url): bool + { + $url = trim($url); + + // Empty URLs are safe + if ($url === '') { + return true; + } + + // Check for dangerous schemes + $colonPos = strpos($url, ':'); + if ($colonPos !== false) { + $scheme = strtolower(substr($url, 0, $colonPos)); + + // Check against dangerous schemes + if (in_array($scheme, $this->dangerousSchemes, true)) { + return false; + } + + // If we have an allowlist, check against it + if ($this->allowedSchemes !== null) { + return in_array($scheme, $this->allowedSchemes, true); + } + } + + return true; + } + + /** + * Sanitize a URL, returning empty string if unsafe + */ + public function sanitizeUrl(string $url): string + { + return $this->isUrlSafe($url) ? $url : ''; + } + + /** + * Check if an attribute name is safe + */ + public function isAttributeSafe(string $name): bool + { + $nameLower = strtolower($name); + + // Check blocked attributes + if (in_array($nameLower, $this->blockedAttributes, true)) { + return false; + } + + // Check blocked prefixes + foreach ($this->blockedAttributePrefixes as $prefix) { + if (str_starts_with($nameLower, strtolower($prefix))) { + return false; + } + } + + return true; + } + + /** + * Filter attributes, removing unsafe ones + * + * @param array $attributes + * + * @return array + */ + public function filterAttributes(array $attributes): array + { + return array_filter( + $attributes, + fn (string $key) => $this->isAttributeSafe($key), + ARRAY_FILTER_USE_KEY, + ); + } +} diff --git a/tests/TestCase/SafeModeTest.php b/tests/TestCase/SafeModeTest.php new file mode 100644 index 0000000..a2b9162 --- /dev/null +++ b/tests/TestCase/SafeModeTest.php @@ -0,0 +1,305 @@ +convert($djot); + + $this->assertStringContainsString('href=""', $result); + $this->assertStringNotContainsString('javascript:', $result); + } + + public function testJavascriptUrlInImageIsBlocked(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '![alt](javascript:alert(1))'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('src=""', $result); + $this->assertStringNotContainsString('javascript:', $result); + } + + public function testDataUrlIsBlocked(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '![img](data:text/html,)'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('src=""', $result); + $this->assertStringNotContainsString('data:', $result); + } + + public function testVbscriptUrlIsBlocked(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[click](vbscript:msgbox(1))'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('href=""', $result); + $this->assertStringNotContainsString('vbscript:', $result); + } + + public function testFileUrlIsBlocked(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[secret](file:///etc/passwd)'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('href=""', $result); + $this->assertStringNotContainsString('file:', $result); + } + + public function testHttpUrlIsAllowed(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[link](https://example.com)'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('href="https://example.com"', $result); + } + + public function testRelativeUrlIsAllowed(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[link](/path/to/page)'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('href="/path/to/page"', $result); + } + + public function testMailtoUrlIsAllowed(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[email](mailto:test@example.com)'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('href="mailto:test@example.com"', $result); + } + + // ==================== Attribute Filtering ==================== + + public function testOnclickAttributeIsFiltered(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[text]{onclick="alert(1)"}'; + $result = $converter->convert($djot); + + $this->assertStringNotContainsString('onclick', $result); + } + + public function testOnloadAttributeIsFiltered(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '![img](image.png){onload="alert(1)"}'; + $result = $converter->convert($djot); + + $this->assertStringNotContainsString('onload', $result); + } + + public function testOnerrorAttributeIsFiltered(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '![img](x){onerror="alert(1)"}'; + $result = $converter->convert($djot); + + $this->assertStringNotContainsString('onerror', $result); + } + + public function testOnmouseoverAttributeIsFiltered(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[hover me]{onmouseover="alert(1)"}'; + $result = $converter->convert($djot); + + $this->assertStringNotContainsString('onmouseover', $result); + } + + public function testClassAttributeIsAllowed(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[text]{.highlight}'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('class="highlight"', $result); + } + + public function testIdAttributeIsAllowed(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[text]{#myid}'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('id="myid"', $result); + } + + public function testDataAttributeIsAllowed(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '[text]{data-value="123"}'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('data-value="123"', $result); + } + + // ==================== Raw HTML Handling ==================== + + public function testRawHtmlIsEscapedByDefault(): void + { + $converter = new DjotConverter(safeMode: true); + $djot = '``{=html}'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('<script>', $result); + $this->assertStringNotContainsString('\n```"; + $result = $converter->convert($djot); + + $this->assertStringContainsString('<script>', $result); + $this->assertStringNotContainsString('`{=html}'; + $result = $converter->convert($djot); + + $this->assertStringNotContainsString('script', $result); + } + + public function testRawHtmlAllowedWhenConfigured(): void + { + $safeMode = SafeMode::defaults()->setRawHtmlMode(SafeMode::RAW_HTML_ALLOW); + $converter = new DjotConverter(safeMode: $safeMode); + $djot = '`bold`{=html}'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('bold', $result); + } + + // ==================== Safe Mode Configuration ==================== + + public function testSafeModeDisabledByDefault(): void + { + $converter = new DjotConverter(); + $djot = '[click](javascript:alert(1))'; + $result = $converter->convert($djot); + + // Without safe mode, dangerous URLs are allowed + $this->assertStringContainsString('javascript:alert(1)', $result); + } + + public function testSafeModeCanBeEnabledAfterConstruction(): void + { + $converter = new DjotConverter(); + $converter->setSafeMode(true); + + $djot = '[click](javascript:alert(1))'; + $result = $converter->convert($djot); + + $this->assertStringNotContainsString('javascript:', $result); + } + + public function testSafeModeCanBeDisabledAfterConstruction(): void + { + $converter = new DjotConverter(safeMode: true); + $converter->setSafeMode(false); + + $djot = '[click](javascript:alert(1))'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('javascript:alert(1)', $result); + } + + public function testCustomSafeModeConfiguration(): void + { + // Create safe mode that also blocks mailto: + $safeMode = SafeMode::defaults()->addDangerousScheme('mailto'); + $converter = new DjotConverter(safeMode: $safeMode); + + $djot = '[email](mailto:test@example.com)'; + $result = $converter->convert($djot); + + $this->assertStringContainsString('href=""', $result); + } + + public function testAllowedSchemesWhitelist(): void + { + // Only allow https + $safeMode = SafeMode::defaults()->setAllowedSchemes(['https']); + $converter = new DjotConverter(safeMode: $safeMode); + + $djot1 = '[link](https://example.com)'; + $result1 = $converter->convert($djot1); + $this->assertStringContainsString('href="https://example.com"', $result1); + + $djot2 = '[link](http://example.com)'; + $result2 = $converter->convert($djot2); + $this->assertStringContainsString('href=""', $result2); + } + + // ==================== SafeMode Class Tests ==================== + + public function testSafeModeIsUrlSafe(): void + { + $safeMode = SafeMode::defaults(); + + $this->assertTrue($safeMode->isUrlSafe('https://example.com')); + $this->assertTrue($safeMode->isUrlSafe('/relative/path')); + $this->assertTrue($safeMode->isUrlSafe('mailto:test@example.com')); + $this->assertFalse($safeMode->isUrlSafe('javascript:alert(1)')); + $this->assertFalse($safeMode->isUrlSafe('data:text/html,