From 8dc94460c39d1c334039f84bbf009a4eb1ea9e3e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 27 Mar 2026 18:03:32 +0100 Subject: [PATCH 01/44] Add css token processor This is based on https://github.com/WordPress/php-toolkit de13df1465d5b685dbd77ca0337aac017dcf606e --- .../css-api/class-wp-css-token-processor.php | 1813 ++++++ .../phpunit/data/css-api/css-test-cases.json | 4923 +++++++++++++++++ .../tests/css-api/wp-css-token-processor.php | 1540 ++++++ 3 files changed, 8276 insertions(+) create mode 100644 src/wp-includes/css-api/class-wp-css-token-processor.php create mode 100644 tests/phpunit/data/css-api/css-test-cases.json create mode 100644 tests/phpunit/tests/css-api/wp-css-token-processor.php diff --git a/src/wp-includes/css-api/class-wp-css-token-processor.php b/src/wp-includes/css-api/class-wp-css-token-processor.php new file mode 100644 index 0000000000000..f680fc3b890ff --- /dev/null +++ b/src/wp-includes/css-api/class-wp-css-token-processor.php @@ -0,0 +1,1813 @@ + Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) + * > code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE + * > FEED (LF) in input by a single U+000A LINE FEED (LF) code point. + * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT + * > CHARACTER (�). + * + * This processor delays normalization as much as possible. That keeps the raw byte + * positions intact for accurate rewrites while still letting consumers ask for a + * normalized token when they need one. + * + * ### No EOF token + * + * The EOF token is a CSS parsing concept, not CSS tokenization concept. Therefore, + * this processor does not produce it. + * + * ### UTF-8 handling + * + * Only UTF-8 strings are supported. Invalid sequences are replaced with U+FFFD (�) + * using the maximal subpart approach described in + * https://www.unicode.org/versions/Unicode9.0.0/ch03.pdf, section 3.9 Best Practices + * for Using U+FFFD. + * + * ## Usage + * + * Basic iteration: + * + * $css = 'width: 10px;'; + * $processor = WP_CSS_Token_Processor::create( $css ); + * while ( $processor->next_token() ) { + * echo $processor->get_normalized_token(); + * } + * // Outputs: + * // width: 10px; + * + * Rewriting a URL while keeping the rest of the stylesheet intact: + * + * $css = 'background: url(old.jpg) center / cover;'; + * $processor = WP_CSS_Token_Processor::create( $css ); + * while ( $processor->next_token() ) { + * if ( WP_CSS_Token_Processor::TOKEN_URL === $processor->get_token_type() ) { + * $processor->set_value( 'uploads/new.jpg' ); + * } + * } + * $result = $processor->get_updated_css(); + * // background: url(uploads/new.jpg) center / cover; + * + * Gathering diagnostics with byte offsets: + * + * $css = "color: red;\ncolor: re\nd;"; + * $processor = WP_CSS_Token_Processor::create( $css ); + * $bad_strings = array(); + * while ( $processor->next_token() ) { + * if ( WP_CSS_Token_Processor::TOKEN_BAD_STRING === $processor->get_token_type() ) { + * $bad_strings[] = array( + * 'start' => $processor->get_token_start(), + * 'length' => $processor->get_token_length(), + * 'value' => $processor->get_unnormalized_token(), + * ); + * } + * } + * + * @see https://www.w3.org/TR/css-syntax-3/#tokenization + */ +class WP_CSS_Token_Processor { + /** + * Token type constants matching the CSS Syntax Level 3 specification. + * + * @see https://www.w3.org/TR/css-syntax-3/#tokenization + */ + public const TOKEN_WHITESPACE = 'whitespace-token'; + public const TOKEN_COMMENT = 'comment'; + public const TOKEN_STRING = 'string-token'; + + /** + * BAD-STRING tokens occur when a string contains an unescaped newline. + * + * Valid strings: "hello", 'world', "line1\Aline2" (escaped newline) + * Invalid (produces bad-string): "hello + * world" (literal newline breaks the string) + * + * The processor stops at the newline and produces a bad-string token for error recovery. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-bad-string-token + */ + public const TOKEN_BAD_STRING = 'bad-string-token'; + public const TOKEN_HASH = 'hash-token'; + public const TOKEN_DELIM = 'delim-token'; + public const TOKEN_NUMBER = 'number-token'; + public const TOKEN_PERCENTAGE = 'percentage-token'; + public const TOKEN_DIMENSION = 'dimension-token'; + public const TOKEN_AT_KEYWORD = 'at-keyword-token'; + public const TOKEN_COLON = 'colon-token'; + public const TOKEN_SEMICOLON = 'semicolon-token'; + public const TOKEN_COMMA = 'comma-token'; + public const TOKEN_LEFT_PAREN = '(-token'; + public const TOKEN_RIGHT_PAREN = ')-token'; + public const TOKEN_LEFT_BRACKET = '[-token'; + public const TOKEN_RIGHT_BRACKET = ']-token'; + public const TOKEN_LEFT_BRACE = '{-token'; + public const TOKEN_RIGHT_BRACE = '}-token'; + public const TOKEN_FUNCTION = 'function-token'; + + /** + * URL tokens represent unquoted URLs in url() notation. + * + * Valid: url(image.jpg), url(https://example.com) + * Quoted URLs are parsed as url( + string-token + ), not url-token. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-url-token + */ + public const TOKEN_URL = 'url-token'; + + /** + * BAD-URL tokens occur when a URL contains invalid characters. + * + * Invalid characters: quotes ("), apostrophes ('), parentheses (() + * Example invalid: url(image(.jpg) or url(image".jpg) + * + * When detected, the processor consumes everything up to ) or EOF. + * This prevents the bad URL from breaking subsequent tokens. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-bad-url-token + */ + public const TOKEN_BAD_URL = 'bad-url-token'; + + /** + * Identifier tokens, such as `color`, `margin-top`, `red`, + * `inherit`, `--my-var`, `\escaped`, `über` (Unicode), etc. + * + * They can contain: letters, digits, hyphens, underscores, non-ASCII, escapes + * and cannot start with a digit (unless preceded by a hyphen). + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-ident-token + */ + public const TOKEN_IDENT = 'ident-token'; + + /** + * CDC (Comment Delimiter Close) token: --> + * + * Legacy token from when CSS was embedded in HTML + * + * Modern CSS no longer needs these, but they're preserved for compatibility. + * In stylesheets, they're typically treated like whitespace. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-CDC-token + */ + public const TOKEN_CDC = 'CDC-token'; + + /** + * CDO (Comment Delimiter Open) token: ) + * + * Comment Delimiter Close - legacy HTML comment syntax in CSS. + * + * @see https://www.w3.org/TR/css-syntax-3/#CDC-token-diagram + */ + if ( + $this->at + 2 < $this->length && + '-' === $this->css[ $this->at + 1 ] && + '>' === $this->css[ $this->at + 2 ] + ) { + // Consume them and return a . + $this->at += 3; + $this->token_type = self::TOKEN_CDC; + $this->token_length = 3; + return true; + } + + // Otherwise, if the input stream starts with an ident sequence, + // reconsume the current input code point, consume an ident-like + // token, and return it. + if ( $this->check_if_3_code_points_start_an_ident_sequence( $this->at ) ) { + return $this->consume_ident_like(); + } + + // Otherwise, return a with its value set to the current input code point. + ++$this->at; + $this->token_type = self::TOKEN_DELIM; + $this->token_length = 1; + return true; + } + + /* + * U+003C LESS-THAN SIGN (<) + * If followed by !--, this is a CDO token (\n", + "tokens": [ + { + "type": "CDC-token", + "raw": "-->", + "startIndex": 0, + "endIndex": 3, + "normalized": "-->", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0001": { + "css": "foo\n", + "tokens": [ + { + "type": "ident-token", + "raw": "foo", + "startIndex": 0, + "endIndex": 3, + "normalized": "foo", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0002": { + "css": "--\n", + "tokens": [ + { + "type": "ident-token", + "raw": "--", + "startIndex": 0, + "endIndex": 2, + "normalized": "--", + "value": "--" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 2, + "endIndex": 3, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0003": { + "css": "--0\n", + "tokens": [ + { + "type": "ident-token", + "raw": "--0", + "startIndex": 0, + "endIndex": 3, + "normalized": "--0", + "value": "--0" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0004": { + "css": "-\\\n", + "tokens": [ + { + "type": "delim-token", + "raw": "-", + "startIndex": 0, + "endIndex": 1, + "normalized": "-", + "value": "-" + }, + { + "type": "delim-token", + "raw": "\\", + "startIndex": 1, + "endIndex": 2, + "normalized": "\\", + "value": "\\" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 2, + "endIndex": 3, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0005": { + "css": "-\\ \n", + "tokens": [ + { + "type": "ident-token", + "raw": "-\\ ", + "startIndex": 0, + "endIndex": 3, + "normalized": "- ", + "value": "- " + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0006": { + "css": "--💅\n", + "tokens": [ + { + "type": "ident-token", + "raw": "--💅", + "startIndex": 0, + "endIndex": 6, + "normalized": "--💅", + "value": "--💅" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 6, + "endIndex": 7, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0007": { + "css": "-§\n", + "tokens": [ + { + "type": "ident-token", + "raw": "-§", + "startIndex": 0, + "endIndex": 3, + "normalized": "-§", + "value": "-§" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0008": { + "css": "-×\n", + "tokens": [ + { + "type": "ident-token", + "raw": "-×", + "startIndex": 0, + "endIndex": 3, + "normalized": "-×", + "value": "-×" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0009": { + "css": "--a𐀀\n", + "tokens": [ + { + "type": "ident-token", + "raw": "--a𐀀", + "startIndex": 0, + "endIndex": 7, + "normalized": "--a𐀀", + "value": "--a𐀀" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 7, + "endIndex": 8, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0001": { + "css": "url(foo)\n", + "tokens": [ + { + "type": "url-token", + "raw": "url(foo)", + "startIndex": 0, + "endIndex": 8, + "normalized": "url(foo)", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 8, + "endIndex": 9, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0002": { + "css": "\\75 Rl(foo)\n", + "tokens": [ + { + "type": "url-token", + "raw": "\\75 Rl(foo)", + "startIndex": 0, + "endIndex": 11, + "normalized": "uRl(foo)", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 11, + "endIndex": 12, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0003": { + "css": "uR\\6c (foo)\n", + "tokens": [ + { + "type": "url-token", + "raw": "uR\\6c (foo)", + "startIndex": 0, + "endIndex": 11, + "normalized": "uRl(foo)", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 11, + "endIndex": 12, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0004": { + "css": "url('foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "url(", + "startIndex": 0, + "endIndex": 4, + "normalized": "url(", + "value": "url" + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 4, + "endIndex": 9, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 9, + "endIndex": 10, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 10, + "endIndex": 11, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0005": { + "css": "url( 'foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "url(", + "startIndex": 0, + "endIndex": 4, + "normalized": "url(", + "value": "url" + }, + { + "type": "whitespace-token", + "raw": " ", + "startIndex": 4, + "endIndex": 5, + "normalized": " ", + "value": null + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 5, + "endIndex": 10, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 10, + "endIndex": 11, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 11, + "endIndex": 12, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0006": { + "css": "url( 'foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "url(", + "startIndex": 0, + "endIndex": 4, + "normalized": "url(", + "value": "url" + }, + { + "type": "whitespace-token", + "raw": " ", + "startIndex": 4, + "endIndex": 6, + "normalized": " ", + "value": null + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 6, + "endIndex": 11, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 11, + "endIndex": 12, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 12, + "endIndex": 13, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0007": { + "css": "url( 'foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "url(", + "startIndex": 0, + "endIndex": 4, + "normalized": "url(", + "value": "url" + }, + { + "type": "whitespace-token", + "raw": " ", + "startIndex": 4, + "endIndex": 7, + "normalized": " ", + "value": null + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 7, + "endIndex": 12, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 12, + "endIndex": 13, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 13, + "endIndex": 14, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0008": { + "css": "not-url( 'foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "not-url(", + "startIndex": 0, + "endIndex": 8, + "normalized": "not-url(", + "value": "not-url" + }, + { + "type": "whitespace-token", + "raw": " ", + "startIndex": 8, + "endIndex": 11, + "normalized": " ", + "value": null + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 11, + "endIndex": 16, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 16, + "endIndex": 17, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 17, + "endIndex": 18, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0009": { + "css": "url( foo)\n", + "tokens": [ + { + "type": "url-token", + "raw": "url( foo)", + "startIndex": 0, + "endIndex": 11, + "normalized": "url( foo)", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 11, + "endIndex": 12, + "normalized": "\n", + "value": null + } + ] + }, + "tests/left-curly-bracket/0001": { + "css": "{\n", + "tokens": [ + { + "type": "{-token", + "raw": "{", + "startIndex": 0, + "endIndex": 1, + "normalized": "{", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 1, + "endIndex": 2, + "normalized": "\n", + "value": null + } + ] + }, + "tests/left-parenthesis/0001": { + "css": "(\n", + "tokens": [ + { + "type": "(-token", + "raw": "(", + "startIndex": 0, + "endIndex": 1, + "normalized": "(", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 1, + "endIndex": 2, + "normalized": "\n", + "value": null + } + ] + }, + "tests/left-square-bracket/0001": { + "css": "[\n", + "tokens": [ + { + "type": "[-token", + "raw": "[", + "startIndex": 0, + "endIndex": 1, + "normalized": "[", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 1, + "endIndex": 2, + "normalized": "\n", + "value": null + } + ] + }, + "tests/less-than/0001": { + "css": "<\n", + "tokens": [ + { + "type": "delim-token", + "raw": "<", + "startIndex": 0, + "endIndex": 1, + "normalized": "<", + "value": "<" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 1, + "endIndex": 2, + "normalized": "\n", + "value": null + } + ] + }, + "tests/less-than/0002": { + "css": " + + + + +wpCssTokenProcessor.php +wpCssBuilder.php + + + + + From 22d7a3986d7b56b186f093be1d67e3777cc5707a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 1 Apr 2026 12:35:30 +0200 Subject: [PATCH 34/44] Scaffold CSS processor --- src/wp-includes/css-api/class-wp-css-processor.php | 4 ++++ tests/phpunit/tests/css-api/wpCssProcessor.php | 9 +++++++++ 2 files changed, 13 insertions(+) create mode 100644 src/wp-includes/css-api/class-wp-css-processor.php create mode 100644 tests/phpunit/tests/css-api/wpCssProcessor.php diff --git a/src/wp-includes/css-api/class-wp-css-processor.php b/src/wp-includes/css-api/class-wp-css-processor.php new file mode 100644 index 0000000000000..d9fbcb91fd859 --- /dev/null +++ b/src/wp-includes/css-api/class-wp-css-processor.php @@ -0,0 +1,4 @@ + Date: Wed, 1 Apr 2026 12:39:36 +0200 Subject: [PATCH 35/44] DROPME: update dev files --- tests/phpunit/tests/css-api/bootstrap-css-api.php | 3 ++- tests/phpunit/tests/css-api/phpunit.xml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/phpunit/tests/css-api/bootstrap-css-api.php b/tests/phpunit/tests/css-api/bootstrap-css-api.php index d55266724afdd..e4d80d277319d 100644 --- a/tests/phpunit/tests/css-api/bootstrap-css-api.php +++ b/tests/phpunit/tests/css-api/bootstrap-css-api.php @@ -21,8 +21,9 @@ require_once __DIR__ . '/../../../../src/wp-includes/html-api/class-wp-html-processor.php'; require_once __DIR__ . '/../../../../src/wp-includes/compat-utf8.php'; -require_once __DIR__ . '/../../../../src/wp-includes/css-api/class-wp-css-token-processor.php'; require_once __DIR__ . '/../../../../src/wp-includes/css-api/class-wp-css-builder.php'; +require_once __DIR__ . '/../../../../src/wp-includes/css-api/class-wp-css-token-processor.php'; +require_once __DIR__ . '/../../../../src/wp-includes/css-api/class-wp-css-processor.php'; if ( ! function_exists( 'wp_kses_uri_attributes' ) ) { diff --git a/tests/phpunit/tests/css-api/phpunit.xml b/tests/phpunit/tests/css-api/phpunit.xml index b74249b997f5f..7823b1d382f8b 100644 --- a/tests/phpunit/tests/css-api/phpunit.xml +++ b/tests/phpunit/tests/css-api/phpunit.xml @@ -16,6 +16,7 @@ +wpCssProcessor.php wpCssTokenProcessor.php wpCssBuilder.php From 10b4eadbf1ec9ae6d2dc01dd1596728cb22bdce8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 1 Apr 2026 21:49:37 +0200 Subject: [PATCH 36/44] ident + font family style normaliztion i1 --- .../css-api/class-wp-css-builder.php | 73 ++++++++++ src/wp-includes/fonts/class-wp-font-utils.php | 129 ++++++++++++++++++ tests/phpunit/tests/css-api/wpCssBuilder.php | 44 ++++++ 3 files changed, 246 insertions(+) diff --git a/src/wp-includes/css-api/class-wp-css-builder.php b/src/wp-includes/css-api/class-wp-css-builder.php index 045cae9b56211..aeae2be1a2145 100644 --- a/src/wp-includes/css-api/class-wp-css-builder.php +++ b/src/wp-includes/css-api/class-wp-css-builder.php @@ -1,6 +1,79 @@ = 0x80 ) { + $result .= $value[ $i ]; + continue; + } + + // ASCII letters and underscore: always valid in idents. + if ( + ( $byte >= 0x41 && $byte <= 0x5A ) || // A-Z + ( $byte >= 0x61 && $byte <= 0x7A ) || // a-z + 0x5F === $byte // _ + ) { + $result .= $value[ $i ]; + continue; + } + + // Hyphen: valid in idents, but check for hyphen-digit at start. + if ( 0x2D === $byte ) { + // Hyphen at position 0 followed by a digit at position 1: escape the digit. + if ( 0 === $i && $i + 1 < $length && ord( $value[ $i + 1 ] ) >= 0x30 && ord( $value[ $i + 1 ] ) <= 0x39 ) { + $result .= '-'; + ++$i; + $result .= sprintf( '\\%X ', ord( $value[ $i ] ) ); + continue; + } + $result .= '-'; + continue; + } + + // Digits: valid except at position 0. + if ( $byte >= 0x30 && $byte <= 0x39 ) { + if ( 0 === $i ) { + $result .= sprintf( '\\%X ', $byte ); + } else { + $result .= $value[ $i ]; + } + continue; + } + + // Everything else: hex-escape. + $result .= sprintf( '\\%X ', $byte ); + } + + return $result; + } + /** * Create a quoted CSS string from a plain PHP string value. * diff --git a/src/wp-includes/fonts/class-wp-font-utils.php b/src/wp-includes/fonts/class-wp-font-utils.php index 586fa75306fb8..58fcd3b7933d6 100644 --- a/src/wp-includes/fonts/class-wp-font-utils.php +++ b/src/wp-includes/fonts/class-wp-font-utils.php @@ -122,6 +122,135 @@ public static function normalize_css_font_face_font_family( string $font_family return WP_CSS_Builder::string( implode( ' ', $plaintext_font_ident_parts ) ); } + /** + * Normalize a CSS qualified rule font-family value. + * + * Warning! This function is unsuitable for `@font-face` `font-family` values. {@see WP_Font_Utils::normalize_css_font_face_font_family()} should be used for @font-face. + * > Value: + * > [ | ]# + * > Computed value: + * > list, each item a string and/or keywords + * + * @see https://drafts.csswg.org/css-fonts/#font-family-prop + * @see https://www.w3.org/TR/css-syntax-3/#parse-comma-list + */ + public static function normalize_css_font_family( string $font_family ): string { + // Scrub and CSS trim whitespace. + $font_family = trim( wp_scrub_utf8( $font_family ), "\t\n\f\r " ); + $processor = WP_CSS_Token_Processor::create( $font_family ); + assert( null !== $processor, 'A valid processor must be created' ); + + /* + * States for the parser: + * 0 = ITEM_START: expecting start of a new comma-separated item + * 1 = AFTER_STRING: saw a string, expecting comma or EOF + * 2 = IN_IDENTS: collecting ident tokens + * 3 = IN_GENERIC: inside generic() function, collecting idents + * 4 = AFTER_GENERIC: after closing ) of generic(), expecting comma or EOF + */ + $state = 0; + $items = array(); + $ident_parts = array(); + + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + + // Whitespace and comments are skipped in all states. + if ( + WP_CSS_Token_Processor::TOKEN_WHITESPACE === $type || + WP_CSS_Token_Processor::TOKEN_COMMENT === $type + ) { + continue; + } + + switch ( $state ) { + case 0: // ITEM_START + if ( WP_CSS_Token_Processor::TOKEN_STRING === $type ) { + $items[] = WP_CSS_Builder::string( $processor->get_token_value() ); + $state = 1; + } elseif ( WP_CSS_Token_Processor::TOKEN_IDENT === $type ) { + $ident_parts = array( $processor->get_token_value() ); + $state = 2; + } elseif ( WP_CSS_Token_Processor::TOKEN_FUNCTION === $type && 'generic' === strtolower( $processor->get_token_value() ) ) { + $ident_parts = array(); + $state = 3; + } else { + return ''; + } + break; + + case 1: // AFTER_STRING + if ( WP_CSS_Token_Processor::TOKEN_COMMA === $type ) { + $state = 0; + } else { + return ''; + } + break; + + case 2: // IN_IDENTS + if ( WP_CSS_Token_Processor::TOKEN_IDENT === $type ) { + $ident_parts[] = $processor->get_token_value(); + } elseif ( WP_CSS_Token_Processor::TOKEN_COMMA === $type ) { + $items[] = implode( ' ', array_map( array( 'WP_CSS_Builder', 'ident' ), $ident_parts ) ); + $state = 0; + } else { + return ''; + } + break; + + case 3: // IN_GENERIC + if ( WP_CSS_Token_Processor::TOKEN_IDENT === $type ) { + $ident_parts[] = $processor->get_token_value(); + } elseif ( WP_CSS_Token_Processor::TOKEN_RIGHT_PAREN === $type ) { + if ( empty( $ident_parts ) ) { + return ''; + } + $items[] = 'generic(' . implode( ' ', array_map( array( 'WP_CSS_Builder', 'ident' ), $ident_parts ) ) . ')'; + $state = 4; + } else { + return ''; + } + break; + + case 4: // AFTER_GENERIC + if ( WP_CSS_Token_Processor::TOKEN_COMMA === $type ) { + $state = 0; + } else { + return ''; + } + break; + } + } + + // Finalize last item based on state at EOF. + switch ( $state ) { + case 0: + // EOF at ITEM_START: either empty input or trailing comma. + if ( empty( $items ) ) { + return ''; + } + // Trailing comma — last item was followed by comma but no next item. + return ''; + + case 1: // String at EOF — already added to items. + case 4: // After generic close at EOF — already added to items. + break; + + case 2: // Ident sequence at EOF — finalize. + $items[] = implode( ' ', array_map( array( 'WP_CSS_Builder', 'ident' ), $ident_parts ) ); + break; + + case 3: // Inside unclosed generic() — invalid. + return ''; + } + + if ( empty( $items ) ) { + return ''; + } + + return implode( ', ', $items ); + } + /** * Sanitizes and formats font family names. * diff --git a/tests/phpunit/tests/css-api/wpCssBuilder.php b/tests/phpunit/tests/css-api/wpCssBuilder.php index 967b323cad8d8..91bae79b0bec7 100644 --- a/tests/phpunit/tests/css-api/wpCssBuilder.php +++ b/tests/phpunit/tests/css-api/wpCssBuilder.php @@ -103,4 +103,48 @@ public function test_mixed_newlines(): void { $this->assertSame( $expected, WP_CSS_Builder::string( $input ) ); } + + /** + * Tests WP_CSS_Builder::ident() produces valid CSS ident tokens. + * + * @ticket TBD + * + * @dataProvider data_ident + * + * @covers ::ident + */ + public function test_ident( string $input, string $expected ): void { + $this->assertSame( $expected, WP_CSS_Builder::ident( $input ) ); + } + + /** + * Data provider for ident() tests. + */ + public static function data_ident(): Generator { + // Simple idents — no escaping needed. + yield 'Simple alpha ident' => array( 'serif', 'serif' ); + yield 'Hyphenated ident' => array( 'sans-serif', 'sans-serif' ); + yield 'Underscore prefix' => array( '_foo', '_foo' ); + yield 'Single char' => array( 'a', 'a' ); + yield 'Custom property prefix' => array( '--custom', '--custom' ); + + // Invalid ident starts — must be escaped. + yield 'Leading digits' => array( '123', '\\31 23' ); + yield 'Leading digit with alpha' => array( '5foo', '\\35 foo' ); + yield 'Hyphen then digit' => array( '-5px', '-\\35 px' ); + yield 'Leading space' => array( ' leading-space', '\\20 leading-space' ); + yield 'Leading tab' => array( "\tleading-tab", '\\9 leading-tab' ); + + // Whitespace within ident. + yield 'Space within' => array( 'My Font', 'My\\20 Font' ); + yield 'Multiple spaces within' => array( 'a b c', 'a\\20 b\\20 c' ); + yield 'Tab within' => array( "has\ttab", 'has\\9 tab' ); + yield 'Newline within' => array( "has\nnewline", 'has\\A newline' ); + + // Special characters. + yield 'Apostrophe' => array( "Font's", 'Font\\27 s' ); + yield 'Angle brackets' => array( '', '\\3C html\\3E ' ); + yield 'Comma' => array( 'a,b', 'a\\2C b' ); + yield 'Semicolon' => array( 'a;b', 'a\\3B b' ); + } } From ff0e1803fd22e2d1d9281977a4b05e1bfadba5c4 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 6 Apr 2026 15:53:48 +0200 Subject: [PATCH 37/44] Add font normalization tests --- .../wpFontUtils/normalizeCssFontFamily.php | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tests/phpunit/tests/fonts/font-library/wpFontUtils/normalizeCssFontFamily.php diff --git a/tests/phpunit/tests/fonts/font-library/wpFontUtils/normalizeCssFontFamily.php b/tests/phpunit/tests/fonts/font-library/wpFontUtils/normalizeCssFontFamily.php new file mode 100644 index 0000000000000..0c1136b70ec54 --- /dev/null +++ b/tests/phpunit/tests/fonts/font-library/wpFontUtils/normalizeCssFontFamily.php @@ -0,0 +1,70 @@ +assertSame( $expected, $result ); + + // Idempotency check — normalizing the result should produce the same result. + $this->assertSame( $result, WP_Font_Utils::normalize_css_font_family( $result ), 'Normalization must be idempotent.' ); + } + + /** + * Data provider. + */ + public static function data_provider(): Generator { + // Single idents. + yield 'Simple ident' => array( 'serif', 'serif' ); + yield 'Hyphenated ident' => array( 'sans-serif', 'sans-serif' ); + yield 'Custom ident' => array( 'Font', 'Font' ); + yield 'CamelCase ident' => array( 'MyFont', 'MyFont' ); + yield 'Ident with underscore' => array( 'My_Font', 'My_Font' ); + yield 'Ident with backslash escape' => array( 'Font\\\'s', 'Font\\27 s' ); + yield 'Ident with hex escape' => array( 'F\\6fnt', 'Font' ); + + // Multi-ident sequences (each ident normalized, joined by single space). + yield 'Two idents' => array( 'Font Name', 'Font Name' ); + yield 'Three idents' => array( 'Times New Roman', 'Times New Roman' ); + yield 'Extra whitespace between idents' => array( 'Font Name', 'Font Name' ); + + // Strings (re-encoded via WP_CSS_Builder::string()). + yield 'Double-quoted string' => array( '"Font"', '"Font"' ); + yield 'Double-quoted string with spaces' => array( '"Times New Roman"', '"Times New Roman"' ); + yield 'Single-quoted string' => array( "'Font'", '"Font"' ); + + // generic() function. + yield 'generic function' => array( 'generic(fangsong)', 'generic(fangsong)' ); + yield 'generic function with whitespace' => array( 'generic( fangsong )', 'generic(fangsong)' ); + + // Comma-separated lists. + yield 'Two generic families' => array( 'serif, sans-serif', 'serif, sans-serif' ); + yield 'String and generic' => array( '"Times New Roman", serif', '"Times New Roman", serif' ); + yield 'Idents and generic' => array( 'Times New Roman, serif', 'Times New Roman, serif' ); + yield 'Whitespace around commas' => array( ' serif , sans-serif ', 'serif, sans-serif' ); + yield 'Mixed types' => array( 'serif, "My Font", generic(fangsong)', 'serif, "My Font", generic(fangsong)' ); + yield 'Escaped ident in list' => array( 'Font\\\'s, serif', 'Font\\27 s, serif' ); + + // Invalid inputs — return empty string. + yield 'Empty string' => array( '', '' ); + yield 'Whitespace only' => array( ' ', '' ); + yield 'Trailing comma' => array( 'serif,', '' ); + yield 'Leading comma' => array( ',serif', '' ); + yield 'Double comma' => array( 'serif,,sans-serif', '' ); + yield 'Empty generic function' => array( 'generic()', '' ); + } +} From 417d7a0a3614a146d2227ef29b7c7b8b29b246d5 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 6 Apr 2026 16:49:32 +0200 Subject: [PATCH 38/44] parse_a_rule TDD scaffold --- .../css-api/class-wp-css-processor.php | 3 + .../phpunit/tests/css-api/wpCssProcessor.php | 159 +++++++++++++++++- 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/css-api/class-wp-css-processor.php b/src/wp-includes/css-api/class-wp-css-processor.php index d9fbcb91fd859..1da10700e6f81 100644 --- a/src/wp-includes/css-api/class-wp-css-processor.php +++ b/src/wp-includes/css-api/class-wp-css-processor.php @@ -1,4 +1,7 @@ assertTrue( + WP_CSS_Processor::parse_a_rule( $css ), + "Expected true for: {$css}" + ); + } + + /** + * @ticket TBD + * @dataProvider data_invalid_rules + * @covers ::parse_a_rule + */ + public function test_parse_a_rule_invalid( string $css ): void { + $this->assertFalse( + WP_CSS_Processor::parse_a_rule( $css ), + "Expected false for: {$css}" + ); + } + + public static function data_valid_rules(): Generator { + /* + * Qualified rules. + */ + yield 'Class selector' => array( '.a { color: red }' ); + yield 'Type selector' => array( 'div { }' ); + yield 'Universal selector' => array( '* { }' ); + yield 'ID selector' => array( '#main { }' ); + yield 'Descendant combinator' => array( 'div .a { }' ); + yield 'Child combinator' => array( 'div > .a + .b { }' ); + yield 'Attribute selector' => array( '[href] { }' ); + yield 'Attribute selector with value' => array( '[type="text"] { }' ); + yield 'Pseudo-class' => array( ':hover { }' ); + yield 'Pseudo-element' => array( '::before { }' ); + yield 'Compound selector' => array( 'div.class#id { }' ); + yield 'Selector list' => array( 'h1, h2, h3 { }' ); + yield 'Multiple declarations' => array( '.a { color: red; font-size: 16px }' ); + yield 'Empty block no space' => array( '.a{}' ); + yield 'Empty prelude' => array( '{ color: red }' ); + yield 'Empty prelude empty block' => array( '{}' ); + yield 'Declaration with !important' => array( '.a { color: red !important }' ); + + /* + * CSS nesting. + */ + yield 'Nested rule' => array( '.parent { color: red; .child { color: blue } }' ); + yield 'Deep nesting' => array( '.a { .b { .c { } } }' ); + yield 'Nesting with declarations at multiple levels' => array( + '.parent { color: red; .child { color: blue; .grandchild { color: green } } }', + ); + + /* + * At-rules: semicolon-terminated. + */ + yield '@charset' => array( '@charset "utf-8";' ); + yield '@import url()' => array( '@import url("a");' ); + yield '@import string' => array( '@import "styles.css";' ); + yield '@namespace' => array( '@namespace svg "http://www.w3.org/2000/svg";' ); + yield '@layer name semicolon' => array( '@layer name;' ); + + /* + * At-rules: block-terminated. + */ + yield '@media with block' => array( '@media screen { }' ); + yield '@font-face' => array( '@font-face { }' ); + yield '@keyframes' => array( '@keyframes name { }' ); + yield '@supports' => array( '@supports (display: grid) { }' ); + yield '@layer with block' => array( '@layer { }' ); + yield '@media with content' => array( '@media screen { body { color: red } }' ); + + /* + * At-rules: complex preludes. + */ + yield '@media complex prelude' => array( + '@media (min-width: 600px) and (max-width: 900px) { }', + ); + yield '@supports or' => array( '@supports (display: flex) or (display: grid) { }' ); + + /* + * At-rules: EOF during at-rule (spec returns the at-rule despite parse error). + */ + yield '@charset no semicolon' => array( '@charset "utf-8"' ); + yield '@import no semicolon' => array( '@import url("a")' ); + yield '@layer name no semicolon' => array( '@layer name' ); + yield 'At-keyword only' => array( '@media' ); + + /* + * Whitespace handling. + */ + yield 'Leading spaces' => array( ' .a { }' ); + yield 'Trailing spaces' => array( '.a { } ' ); + yield 'Leading and trailing spaces' => array( ' .a { } ' ); + yield 'Leading tab' => array( "\t.a { }" ); + yield 'Leading newline' => array( "\n.a { }" ); + yield 'Trailing newline' => array( ".a { }\n" ); + yield 'Leading CRLF' => array( "\r\n.a { }" ); + yield 'Mixed whitespace' => array( " \t\n .a { } \t\n " ); + + /* + * Comment handling (spec assumes comments stripped; skip them like whitespace). + */ + yield 'Leading comment' => array( '/* comment */ .a { }' ); + yield 'Trailing comment' => array( '.a { } /* comment */' ); + yield 'Comments around at-rule' => array( '/* c1 */ @media screen { } /* c2 */' ); + } + + public static function data_invalid_rules(): Generator { + /* + * Empty / whitespace-only (step 3: EOF after skipping whitespace). + */ + yield 'Empty string' => array( '' ); + yield 'Whitespace only spaces' => array( ' ' ); + yield 'Whitespace only tab' => array( "\t" ); + yield 'Whitespace only newline' => array( "\n" ); + yield 'Whitespace only mixed' => array( " \t\n\r\n " ); + yield 'Only a comment' => array( '/* comment */' ); + yield 'Only comments' => array( '/* c1 */ /* c2 */' ); + + /* + * Multiple rules (step 7: non-EOF after consuming rule). + */ + yield 'Two qualified rules' => array( '.a {} .b {}' ); + yield 'Two at-rules' => array( '@charset "utf-8"; @import url("a");' ); + yield 'Qualified then at-rule' => array( '.a {} @media {}' ); + yield 'At-rule then qualified' => array( '@media {} .a {}' ); + yield 'At-rule semicolon then qualified' => array( '@charset "utf-8"; .a {}' ); + + /* + * Trailing non-whitespace after valid rule (step 7). + */ + yield 'Trailing ident after qualified rule' => array( '.a {} foo' ); + yield 'Trailing selector after at-rule' => array( '@media screen { } .extra' ); + yield 'Trailing semicolon after qualified rule' => array( '.a {} ;' ); + yield 'Trailing brace after qualified rule' => array( '.a {} }' ); + + /* + * Qualified rule with no block (EOF during consume qualified rule). + */ + yield 'Selector without block' => array( '.a' ); + yield 'Type selector without block' => array( 'div' ); + yield 'Complex selector without block' => array( 'div > .a + .b' ); + + /* + * Lone punctuation (consumed as prelude, no block found -> EOF -> nothing). + */ + yield 'Just a semicolon' => array( ';' ); + yield 'Just a closing brace' => array( '}' ); + yield 'Just a colon' => array( ':' ); + yield 'Just a comma' => array( ',' ); + } } From 6e213632edc7c08e62ac5833dd70081704d2f314 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 6 Apr 2026 17:25:22 +0200 Subject: [PATCH 39/44] Implement parse_a_rule --- .../css-api/class-wp-css-processor.php | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/src/wp-includes/css-api/class-wp-css-processor.php b/src/wp-includes/css-api/class-wp-css-processor.php index 1da10700e6f81..83353157f58e2 100644 --- a/src/wp-includes/css-api/class-wp-css-processor.php +++ b/src/wp-includes/css-api/class-wp-css-processor.php @@ -1,7 +1,143 @@ get_token_type() ) { + // Step 4: Consume an at-rule. + self::consume_at_rule( $processor ); + } else { + // Step 5: Consume a qualified rule. + if ( ! self::consume_qualified_rule( $processor ) ) { + return false; + } + } + + // Steps 6–7: Discard whitespace/comments, then expect EOF. + if ( self::next_non_whitespace_comment_token( $processor ) ) { + // Non-EOF after the rule → syntax error. + return false; + } + + return true; + } + + /** + * Advances past whitespace and comment tokens. + * + * Returns true if a non-whitespace/non-comment token was found, + * false if EOF was reached. + */ + private static function next_non_whitespace_comment_token( WP_CSS_Token_Processor $processor ): bool { + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + if ( + WP_CSS_Token_Processor::TOKEN_WHITESPACE !== $type && + WP_CSS_Token_Processor::TOKEN_COMMENT !== $type + ) { + return true; + } + } return false; } + + /** + * Consumes an at-rule from the token stream. + * + * The processor must be positioned on an at-keyword token. + * Per spec, at-rules are always returned even on EOF (parse error noted). + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-an-at-rule + */ + private static function consume_at_rule( WP_CSS_Token_Processor $processor ): void { + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON === $type ) { + return; + } + + if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type ) { + self::consume_simple_block( $processor ); + return; + } + } + // EOF: at-rule is still returned per spec. + } + + /** + * Consumes a qualified rule from the token stream. + * + * The processor must be positioned on the first prelude token. + * Returns true if a qualified rule was found (a block was consumed), + * false if EOF was reached without finding a block. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-qualified-rule + */ + private static function consume_qualified_rule( WP_CSS_Token_Processor $processor ): bool { + // The processor is already on the first token of the prelude. + // Check if it's already a left brace. + if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $processor->get_token_type() ) { + self::consume_simple_block( $processor ); + return true; + } + + while ( $processor->next_token() ) { + if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $processor->get_token_type() ) { + self::consume_simple_block( $processor ); + return true; + } + } + + // EOF without finding a block → return nothing (syntax error). + return false; + } + + /** + * Consumes a simple block from the token stream. + * + * The processor must be positioned on a left brace token. + * Consumes tokens until the matching right brace or EOF, + * tracking nested brace pairs. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-simple-block + */ + private static function consume_simple_block( WP_CSS_Token_Processor $processor ): void { + $depth = 1; + + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + + if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type ) { + ++$depth; + } elseif ( WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE === $type ) { + --$depth; + if ( 0 === $depth ) { + return; + } + } + } + // EOF: block is still returned per spec. + } } From 5a7b36b606a03e40fd5771c6f44455568104e5df Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 9 Apr 2026 14:49:59 +0200 Subject: [PATCH 40/44] Implement parse_a_rule() --- .../css-api/class-wp-css-processor.php | 84 +++++++++++++------ .../phpunit/tests/css-api/wpCssProcessor.php | 78 +++++++++++++++++ 2 files changed, 136 insertions(+), 26 deletions(-) diff --git a/src/wp-includes/css-api/class-wp-css-processor.php b/src/wp-includes/css-api/class-wp-css-processor.php index 83353157f58e2..7f39f37bba9f9 100644 --- a/src/wp-includes/css-api/class-wp-css-processor.php +++ b/src/wp-includes/css-api/class-wp-css-processor.php @@ -7,6 +7,16 @@ class WP_CSS_Processor { * Returns true if the input contains exactly one CSS rule * (at-rule or qualified rule), false for syntax errors. * + * > 5.3.5. Parse a rule + * > To parse a rule from input: + * > 1. Normalize input, and set input to the result. + * > 2. While the next input token from input is a , consume the next input token from input. + * > 3. If the next input token from input is an , return a syntax error. + * > Otherwise, if the next input token from input is an , consume an at-rule from input, and let rule be the return value. + * > Otherwise, consume a qualified rule from input and let rule be the return value. If nothing was returned, return a syntax error. + * > 4. While the next input token from input is a , consume the next input token from input. + * > 5. If the next input token from input is an , return rule. Otherwise, return a syntax error. + * * @see https://www.w3.org/TR/css-syntax-3/#parse-a-rule * * @param string $css The CSS input. @@ -79,9 +89,13 @@ private static function consume_at_rule( WP_CSS_Token_Processor $processor ): vo } if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type ) { - self::consume_simple_block( $processor ); + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE ); return; } + + // Consume component values so that `;` and `{` inside + // paired tokens ((), [], functions) are not misidentified. + self::consume_component_value( $processor ); } // EOF: at-rule is still returned per spec. } @@ -96,47 +110,65 @@ private static function consume_at_rule( WP_CSS_Token_Processor $processor ): vo * @see https://www.w3.org/TR/css-syntax-3/#consume-a-qualified-rule */ private static function consume_qualified_rule( WP_CSS_Token_Processor $processor ): bool { - // The processor is already on the first token of the prelude. - // Check if it's already a left brace. - if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $processor->get_token_type() ) { - self::consume_simple_block( $processor ); - return true; - } - - while ( $processor->next_token() ) { + /* + * The processor is already positioned on the first token. + * Loop starting from the current token, then continue with next_token(). + */ + do { if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $processor->get_token_type() ) { - self::consume_simple_block( $processor ); + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE ); return true; } - } + + // Consume component values so that `{` inside + // paired tokens ((), [], functions) is not misidentified. + self::consume_component_value( $processor ); + } while ( $processor->next_token() ); // EOF without finding a block → return nothing (syntax error). return false; } + /** + * Consumes a component value from the token stream. + * + * The processor must be positioned on the current token. If the token + * opens a paired block — `(`, `[`, or a function token — the entire + * block is consumed up to the matching close token or EOF. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-component-value + */ + private static function consume_component_value( WP_CSS_Token_Processor $processor ): void { + $type = $processor->get_token_type(); + + if ( WP_CSS_Token_Processor::TOKEN_LEFT_PAREN === $type || WP_CSS_Token_Processor::TOKEN_FUNCTION === $type ) { + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_PAREN ); + } elseif ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACKET === $type ) { + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_BRACKET ); + } elseif ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type ) { + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE ); + } + } + /** * Consumes a simple block from the token stream. * - * The processor must be positioned on a left brace token. - * Consumes tokens until the matching right brace or EOF, - * tracking nested brace pairs. + * The processor must be positioned on the opening token. + * Consumes tokens until the matching ending token or EOF. + * Nested component values (paired tokens) are consumed recursively. * * @see https://www.w3.org/TR/css-syntax-3/#consume-a-simple-block + * + * @param WP_CSS_Token_Processor $processor The token processor. + * @param string $ending_token The token type that closes this block. */ - private static function consume_simple_block( WP_CSS_Token_Processor $processor ): void { - $depth = 1; - + private static function consume_simple_block( WP_CSS_Token_Processor $processor, string $ending_token ): void { while ( $processor->next_token() ) { - $type = $processor->get_token_type(); - - if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type ) { - ++$depth; - } elseif ( WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE === $type ) { - --$depth; - if ( 0 === $depth ) { - return; - } + if ( $ending_token === $processor->get_token_type() ) { + return; } + + self::consume_component_value( $processor ); } // EOF: block is still returned per spec. } diff --git a/tests/phpunit/tests/css-api/wpCssProcessor.php b/tests/phpunit/tests/css-api/wpCssProcessor.php index 7fe056e806ce4..9620ae8326293 100644 --- a/tests/phpunit/tests/css-api/wpCssProcessor.php +++ b/tests/phpunit/tests/css-api/wpCssProcessor.php @@ -117,6 +117,62 @@ public static function data_valid_rules(): Generator { yield 'Leading comment' => array( '/* comment */ .a { }' ); yield 'Trailing comment' => array( '.a { } /* comment */' ); yield 'Comments around at-rule' => array( '/* c1 */ @media screen { } /* c2 */' ); + + /* + * Strings and URLs containing braces (tokenizer treats these as + * single tokens, so braces inside must not affect block matching). + */ + yield 'String with closing brace in block' => array( '.a { content: "}" }' ); + yield 'String with opening brace in block' => array( '.a { content: "{" }' ); + yield 'String with braces in block' => array( '.a { content: "{ }" }' ); + yield 'URL with closing brace in block' => array( ".a { background: url(a}) }" ); + + /* + * Functional notation and paired tokens in preludes. + */ + yield 'Functional pseudo-class in prelude' => array( '.a:has(.b) { }' ); + yield 'Nested parens and brackets in qualified prelude' => array( '.a:not([b]) { }' ); + yield 'Parens with brace in at-rule prelude (string)' => array( '@supports (content: "{") { }' ); + yield 'Brackets in at-rule prelude' => array( '@foo [screen] { }' ); + yield 'Semicolon inside brackets in at-rule prelude' => array( '@foo [ ; ] { }' ); + yield 'Brace inside brackets in at-rule prelude' => array( '@foo [{] { }' ); + yield 'Brace inside function in at-rule prelude' => array( '@media func({) { }' ); + + /* + * Escaped characters in preludes. + */ + yield 'Escaped open brace in prelude' => array( '.a\{ { }' ); + yield 'Escaped close brace in prelude' => array( '.a\} { }' ); + + /* + * Nested at-rules inside blocks. + */ + yield 'Nested at-rule inside media' => array( '@media screen { @font-face { } }' ); + yield 'Multiple nested rules inside media' => array( '@media screen { .a { } .b { } }' ); + + /* + * CDO/CDC tokens () in prelude. + */ + yield 'CDO and CDC in qualified prelude' => array( ' { }' ); + yield 'CDO and CDC in at-rule prelude' => array( '@foo { }' ); + + /* + * Unclosed blocks (spec returns rule on EOF inside block). + */ + yield 'Unclosed qualified rule block' => array( '.a { color: red' ); + yield 'Unclosed at-rule block' => array( '@media screen { .a { color: red' ); + yield 'Unclosed nested block' => array( '.a { .b {' ); + + /* + * Additional edge cases from CSS parsing test suites. + */ + yield 'At-rule with prelude and trailing comment' => array( '@foo bar; /* comment */' ); + yield 'At-rule with bracket and paren nesting' => array( ' /**/ @foo bar{[(4' ); + yield 'At-rule unclosed block with content' => array( '@foo { bar' ); + yield 'At-rule with unclosed bracket in prelude' => array( '@foo [ bar' ); + yield 'Qualified rule with surrounding comments' => array( ' /**/ div > p { color: #aaa; } /**/ ' ); + yield 'Empty prelude unclosed block with comment' => array( ' /**/ { color: #aaa ' ); + yield 'CDO CDC not special in prelude' => array( ' /* CDO/CDC are not special */ {' ); } public static function data_invalid_rules(): Generator { @@ -162,5 +218,27 @@ public static function data_invalid_rules(): Generator { yield 'Just a closing brace' => array( '}' ); yield 'Just a colon' => array( ':' ); yield 'Just a comma' => array( ',' ); + + /* + * @ sign not followed by ident (tokenizer produces DELIM, not AT_KEYWORD). + * Treated as qualified rule prelude — no block → syntax error. + */ + yield 'Bare @ sign' => array( '@' ); + yield '@ then semicolon' => array( '@;' ); + + /* + * Braces inside () or [] in qualified rule prelude are consumed as + * component values, not as the rule's block. The qualified rule never + * finds its block → EOF → nothing. + */ + yield 'Brace inside parens in qualified prelude' => array( '.a:has({) { }' ); + yield 'Brace inside brackets in qualified prelude' => array( '[x={] { }' ); + + /* + * Additional edge cases from CSS parsing test suites. + */ + yield 'Two qualified rules with declarations' => array( 'div { color: #aaa; } p{}' ); + yield 'Qualified rule then CDC' => array( 'div {} -->' ); + yield 'Empty block then ident' => array( '{}a' ); } } From ac0ad6395de60bd19b59ac37ffed93e11b45db79 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 9 Apr 2026 14:50:18 +0200 Subject: [PATCH 41/44] Implement normalization --- .../css-api/class-wp-css-builder.php | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/src/wp-includes/css-api/class-wp-css-builder.php b/src/wp-includes/css-api/class-wp-css-builder.php index aeae2be1a2145..abda30e609097 100644 --- a/src/wp-includes/css-api/class-wp-css-builder.php +++ b/src/wp-includes/css-api/class-wp-css-builder.php @@ -124,4 +124,116 @@ public static function string( string $value ): string { ); return "\"{$escaped}\""; } + + public static function normalize_and_escape_css( string $css ): string { + $css = wp_scrub_utf8( $css ); + $processor = WP_CSS_Token_Processor::create( $css ); + if ( null === $processor ) { + return ''; + } + + $normalized_css = ''; + + while ( $processor->next_token() ) { + switch ( $processor->get_token_type() ) { + + // Basic punctuation: + case WP_CSS_Token_Processor::TOKEN_SEMICOLON: $normalized_css .= ';'; break; + case WP_CSS_Token_Processor::TOKEN_COMMA: $normalized_css .= ','; break; + case WP_CSS_Token_Processor::TOKEN_WHITESPACE: $normalized_css .= ' '; break; + case WP_CSS_Token_Processor::TOKEN_COLON: $normalized_css .= ':'; break; + + // Paired punctuation: + case WP_CSS_Token_Processor::TOKEN_LEFT_BRACE: $normalized_css .= '{'; break; + case WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE: $normalized_css .= '}'; break; + case WP_CSS_Token_Processor::TOKEN_LEFT_PAREN: $normalized_css .= '('; break; + case WP_CSS_Token_Processor::TOKEN_RIGHT_PAREN: $normalized_css .= ')'; break; + case WP_CSS_Token_Processor::TOKEN_LEFT_BRACKET: $normalized_css .= '['; break; + case WP_CSS_Token_Processor::TOKEN_RIGHT_BRACKET: $normalized_css .= ']'; break; + + // "@" + ident + case WP_CSS_Token_Processor::TOKEN_AT_KEYWORD: + $normalized_css .= '@' . self::ident( $processor->get_token_value() ); + break; + + // ident + "(" + case WP_CSS_Token_Processor::TOKEN_FUNCTION: + $normalized_css .= self::ident( $processor->get_token_value() ) . '('; + break; + + /* + * Hash tokens are not idents but their value can be escaped as such. + * + * ‖→ "#" →─┐ ┌──────────────────────────────┐ ┌─→‖ + * ├─→─┤ a-z A-Z 0-9 _ - or non-ASCII ├─→─┤ + * │ └──────────────────────────────┘ │ + * │ ┌──────────────────────────────┐ │ + * ├─→─┤ escape ├─→─┤ + * │ └──────────────────────────────┘ │ + * └──────────────────←───────────────────┘ + */ + case WP_CSS_Token_Processor::TOKEN_HASH: + $normalized_css .= '#' . self::ident( $processor->get_token_value() ); + break; + + case WP_CSS_Token_Processor::TOKEN_DIMENSION: + $normalized_css .= $processor->get_token_value() . $processor->get_token_unit(); + break; + + case WP_CSS_Token_Processor::TOKEN_PERCENTAGE: + $normalized_css .= "%{$processor->get_token_value()}"; + break; + + case WP_CSS_Token_Processor::TOKEN_NUMBER: + $normalized_css .= $processor->get_token_value(); + break; + + case WP_CSS_Token_Processor::TOKEN_DELIM: + $normalized_css .= $processor->get_token_value(); + break; + + case WP_CSS_Token_Processor::TOKEN_IDENT: + $normalized_css .= self::ident( $processor->get_token_value() ); + break; + + case WP_CSS_Token_Processor::TOKEN_STRING: + var_dump( $processor->get_token_value() ); + $normalized_css .= self::string( $processor->get_token_value() ); + break; + + // Keep or strip comments? + case WP_CSS_Token_Processor::TOKEN_COMMENT: + $normalized_css .= substr( $css, $processor->get_token_start(), $processor->get_token_length() ); + break; + + /** + * A is an open string that reaches a newline. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-string-token + * + * @see https://www.w3.org/TR/css-syntax-3/#preserved-tokens + * > Note: The tokens <}-token>s, <)-token>s, <]-token>, , and are always parse errors, but they are preserved in the token stream by this specification to allow other specs, such as Media Queries, to define more fine-grained error-handling than just dropping an entire declaration or block. + */ + case WP_CSS_Token_Processor::TOKEN_BAD_STRING: + $normalized_css .= substr( $css, $processor->get_token_start(), $processor->get_token_length() ) . "\n"; + break; + + case WP_CSS_Token_Processor::TOKEN_URL: + case WP_CSS_Token_Processor::TOKEN_BAD_URL: + case WP_CSS_Token_Processor::TOKEN_CDC: + case WP_CSS_Token_Processor::TOKEN_CDO: + default: + throw new Error( 'unhandled token type ' . $processor->get_token_type() . ' with value ' . var_export( $processor->get_token_value(), true ) ); + } + } + + return strtr( + $normalized_css, + array( + ' ' => '␠', + "\t" => "␉\t", + "\n" => "␊\n", + ) + ); + } } From 9cc1030945560b805d3a03004cbf2f57de907ae8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 9 Apr 2026 17:28:37 +0200 Subject: [PATCH 42/44] Add parse_a_list_of_declarations() --- .../css-api/class-wp-css-processor.php | 119 ++++++++++++ .../phpunit/tests/css-api/wpCssProcessor.php | 173 ++++++++++++++++++ 2 files changed, 292 insertions(+) diff --git a/src/wp-includes/css-api/class-wp-css-processor.php b/src/wp-includes/css-api/class-wp-css-processor.php index 7f39f37bba9f9..f0791224429f4 100644 --- a/src/wp-includes/css-api/class-wp-css-processor.php +++ b/src/wp-includes/css-api/class-wp-css-processor.php @@ -53,6 +53,106 @@ public static function parse_a_rule( string $css ): bool { return true; } + /** + * Implements "parse a list of declarations" from CSS Syntax Level 3. + * + * Returns a Generator that yields for each valid CSS declaration found. + * Each yield produces a property name (key) and the component values + * string (value). At-rules are consumed but not yielded. Invalid + * declarations trigger error recovery and are skipped. + * + * > 5.3.8. Parse a list of declarations + * > To parse a list of declarations from input: + * > 1. Normalize input, and set input to the result. + * > 2. Consume a list of declarations from input, and return the result. + * + * @see https://www.w3.org/TR/css-syntax-3/#parse-list-of-declarations + * + * @param string $css The CSS input (e.g. declaration block contents). + * @return Generator Yields property name => component values string. + */ + public static function parse_a_list_of_declarations( string $css ): Generator { + $processor = WP_CSS_Token_Processor::create( $css ); + if ( null === $processor ) { + return; + } + + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + + // Whitespace, comments, and semicolons: do nothing. + if ( + WP_CSS_Token_Processor::TOKEN_WHITESPACE === $type || + WP_CSS_Token_Processor::TOKEN_COMMENT === $type || + WP_CSS_Token_Processor::TOKEN_SEMICOLON === $type + ) { + continue; + } + + // At-keyword: consume an at-rule, do not yield. + if ( WP_CSS_Token_Processor::TOKEN_AT_KEYWORD === $type ) { + self::consume_at_rule( $processor ); + continue; + } + + // Ident token: attempt to consume a declaration. + if ( WP_CSS_Token_Processor::TOKEN_IDENT === $type ) { + $name = $processor->get_token_value(); + + // Skip whitespace/comments to find the colon. + if ( ! self::next_non_whitespace_comment_token( $processor ) ) { + // EOF without colon. + return; + } + + if ( WP_CSS_Token_Processor::TOKEN_COLON !== $processor->get_token_type() ) { + // No colon: not a valid declaration. Consume remaining until ; or EOF. + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON !== $processor->get_token_type() ) { + self::consume_the_remnants_of_a_bad_declaration( $processor ); + } + continue; + } + + // Colon found. Consume the value, tracking byte offsets. + $value_start = null; + $value_end = null; + + while ( $processor->next_token() ) { + $vtype = $processor->get_token_type(); + + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON === $vtype ) { + break; + } + + if ( + WP_CSS_Token_Processor::TOKEN_WHITESPACE === $vtype || + WP_CSS_Token_Processor::TOKEN_COMMENT === $vtype + ) { + continue; + } + + if ( null === $value_start ) { + $value_start = $processor->get_token_start(); + } + + self::consume_component_value( $processor ); + + $value_end = $processor->get_token_start() + $processor->get_token_length(); + } + + if ( null !== $value_start ) { + yield $name => substr( $css, $value_start, $value_end - $value_start ); + } else { + yield $name => ''; + } + continue; + } + + // Anything else: parse error. Consume until ; or EOF. + self::consume_the_remnants_of_a_bad_declaration( $processor ); + } + } + /** * Advances past whitespace and comment tokens. * @@ -150,6 +250,25 @@ private static function consume_component_value( WP_CSS_Token_Processor $process } } + /** + * Consumes the remnants of a bad declaration from the token stream. + * + * The processor must be positioned on the current token. + * Consumes it as a component value, then advances consuming + * component values until a semicolon or EOF is reached. + * + * @param WP_CSS_Token_Processor $processor The token processor. + */ + private static function consume_the_remnants_of_a_bad_declaration( WP_CSS_Token_Processor $processor ): void { + self::consume_component_value( $processor ); + while ( $processor->next_token() ) { + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON === $processor->get_token_type() ) { + return; + } + self::consume_component_value( $processor ); + } + } + /** * Consumes a simple block from the token stream. * diff --git a/tests/phpunit/tests/css-api/wpCssProcessor.php b/tests/phpunit/tests/css-api/wpCssProcessor.php index 9620ae8326293..444dde2e3d4fb 100644 --- a/tests/phpunit/tests/css-api/wpCssProcessor.php +++ b/tests/phpunit/tests/css-api/wpCssProcessor.php @@ -241,4 +241,177 @@ public static function data_invalid_rules(): Generator { yield 'Qualified rule then CDC' => array( 'div {} -->' ); yield 'Empty block then ident' => array( '{}a' ); } + + /** + * @ticket TBD + * @dataProvider data_parse_a_list_of_declarations + * @covers ::parse_a_list_of_declarations + */ + public function test_parse_a_list_of_declarations( string $css, array $expected ): void { + $actual = array(); + foreach ( WP_CSS_Processor::parse_a_list_of_declarations( $css ) as $name => $value ) { + $actual[] = array( $name, $value ); + } + $this->assertSame( $expected, $actual, "Declarations from: {$css}" ); + } + + public static function data_parse_a_list_of_declarations(): Generator { + // Basic declarations. + yield 'Single declaration' => array( + 'color: red', + array( array( 'color', 'red' ) ), + ); + yield 'Multiple declarations' => array( + 'color: red; font-size: 16px', + array( array( 'color', 'red' ), array( 'font-size', '16px' ) ), + ); + yield 'Declaration with semicolon terminator' => array( + 'color: red;', + array( array( 'color', 'red' ) ), + ); + yield 'No space after colon' => array( + 'color:red', + array( array( 'color', 'red' ) ), + ); + + // Whitespace trimming. + yield 'Leading whitespace in value' => array( + 'color: red', + array( array( 'color', 'red' ) ), + ); + yield 'Trailing whitespace in value' => array( + 'color: red ;', + array( array( 'color', 'red' ) ), + ); + yield 'Trailing whitespace at EOF' => array( + 'color: red ', + array( array( 'color', 'red' ) ), + ); + yield 'Whitespace around declaration' => array( + ' color: red ; font: bold ', + array( array( 'color', 'red' ), array( 'font', 'bold' ) ), + ); + + // Empty and whitespace-only values. + yield 'Empty value with semicolon' => array( + 'color: ;', + array( array( 'color', '' ) ), + ); + yield 'Empty value at EOF' => array( + 'color:', + array( array( 'color', '' ) ), + ); + yield 'Whitespace-only value' => array( + 'color: ;', + array( array( 'color', '' ) ), + ); + + // Multi-token values. + yield 'Value with multiple tokens' => array( + 'font: bold 14px/1.5 sans-serif', + array( array( 'font', 'bold 14px/1.5 sans-serif' ) ), + ); + yield 'Value with !important' => array( + 'color: red !important', + array( array( 'color', 'red !important' ) ), + ); + + // Values with blocks. + yield 'Function value' => array( + 'color: var(--x)', + array( array( 'color', 'var(--x)' ) ), + ); + yield 'Function with fallback' => array( + 'color: var(--x, red)', + array( array( 'color', 'var(--x, red)' ) ), + ); + yield 'Semicolon inside function' => array( + '--x: var(--y, a;b); color: red', + array( array( '--x', 'var(--y, a;b)' ), array( 'color', 'red' ) ), + ); + yield 'Custom property with brace block' => array( + '--x: { a: b }', + array( array( '--x', '{ a: b }' ) ), + ); + yield 'Nested functions' => array( + 'background: linear-gradient(rgb(0, 0, 0), rgb(255, 255, 255))', + array( array( 'background', 'linear-gradient(rgb(0, 0, 0), rgb(255, 255, 255))' ) ), + ); + + // Comments. + yield 'Leading comment in value' => array( + 'color: /* comment */ red', + array( array( 'color', 'red' ) ), + ); + yield 'Comment between value tokens' => array( + 'font: bold /* comment */ 14px', + array( array( 'font', 'bold /* comment */ 14px' ) ), + ); + yield 'Trailing comment in value' => array( + 'color: red /* comment */', + array( array( 'color', 'red' ) ), + ); + yield 'Comment between declarations' => array( + 'color: red; /* comment */ font: bold', + array( array( 'color', 'red' ), array( 'font', 'bold' ) ), + ); + + // At-rules (consumed, not yielded). + yield 'At-rule before declaration' => array( + '@foo; color: red', + array( array( 'color', 'red' ) ), + ); + yield 'At-rule with block' => array( + '@media screen { body { color: red } } color: blue', + array( array( 'color', 'blue' ) ), + ); + yield 'Only at-rules' => array( + '@foo; @bar {}', + array(), + ); + + // Error recovery. + yield 'No colon' => array( + 'foo bar; color: red', + array( array( 'color', 'red' ) ), + ); + yield 'Non-ident start' => array( + ': red; color: blue', + array( array( 'color', 'blue' ) ), + ); + yield 'Number start' => array( + '123 { }; color: red', + array( array( 'color', 'red' ) ), + ); + yield 'Just semicolons' => array( + ';;;', + array(), + ); + + // Empty and whitespace-only input. + yield 'Empty string' => array( + '', + array(), + ); + yield 'Whitespace only' => array( + ' ', + array(), + ); + yield 'Only comments' => array( + '/* comment */', + array(), + ); + + // Duplicate properties. + yield 'Duplicate properties' => array( + 'color: red; color: blue', + array( array( 'color', 'red' ), array( 'color', 'blue' ) ), + ); + + // Escaped property names. + yield 'Escaped property name' => array( + '\63 olor: red', + array( array( 'color', 'red' ) ), + ); + } } From 2829bb126e3519a7df56149fb953b7ccf552852f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 9 Apr 2026 21:33:03 +0200 Subject: [PATCH 43/44] implemenet declaration list processing --- .../css-api/class-wp-css-processor.php | 509 +++++++++++++++--- .../phpunit/tests/css-api/wpCssProcessor.php | 386 +++++++++---- 2 files changed, 719 insertions(+), 176 deletions(-) diff --git a/src/wp-includes/css-api/class-wp-css-processor.php b/src/wp-includes/css-api/class-wp-css-processor.php index f0791224429f4..b8ab2c0adb99e 100644 --- a/src/wp-includes/css-api/class-wp-css-processor.php +++ b/src/wp-includes/css-api/class-wp-css-processor.php @@ -2,83 +2,133 @@ class WP_CSS_Processor { /** - * Implements "parse a rule" from CSS Syntax Level 3. + * The original CSS input string. * - * Returns true if the input contains exactly one CSS rule - * (at-rule or qualified rule), false for syntax errors. + * @var string + */ + private string $css; + + /** + * The underlying token processor. * - * > 5.3.5. Parse a rule - * > To parse a rule from input: - * > 1. Normalize input, and set input to the result. - * > 2. While the next input token from input is a , consume the next input token from input. - * > 3. If the next input token from input is an , return a syntax error. - * > Otherwise, if the next input token from input is an , consume an at-rule from input, and let rule be the return value. - * > Otherwise, consume a qualified rule from input and let rule be the return value. If nothing was returned, return a syntax error. - * > 4. While the next input token from input is a , consume the next input token from input. - * > 5. If the next input token from input is an , return rule. Otherwise, return a syntax error. + * @var WP_CSS_Token_Processor + */ + private WP_CSS_Token_Processor $processor; + + /** + * Queued byte-range replacements, applied by get_updated_css(). * - * @see https://www.w3.org/TR/css-syntax-3/#parse-a-rule + * Each entry is an array with keys: 'start', 'length', 'text'. * - * @param string $css The CSS input. - * @return bool Whether the input is a single valid CSS rule. + * @var array[] */ - public static function parse_a_rule( string $css ): bool { - $processor = WP_CSS_Token_Processor::create( $css ); - if ( null === $processor ) { - return false; - } + private array $lexical_updates = array(); - // Step 2: Discard whitespace and comments. - if ( ! self::next_non_whitespace_comment_token( $processor ) ) { - // Step 3: EOF → syntax error. - return false; - } + /** + * Declarations to append, each with 'name' and 'value' keys. + * + * @var array[] + */ + private array $appended_declarations = array(); - if ( WP_CSS_Token_Processor::TOKEN_AT_KEYWORD === $processor->get_token_type() ) { - // Step 4: Consume an at-rule. - self::consume_at_rule( $processor ); - } else { - // Step 5: Consume a qualified rule. - if ( ! self::consume_qualified_rule( $processor ) ) { - return false; - } - } + /** + * Decoded property name of the current declaration, or null. + * + * @var string|null + */ + private ?string $current_name = null; - // Steps 6–7: Discard whitespace/comments, then expect EOF. - if ( self::next_non_whitespace_comment_token( $processor ) ) { - // Non-EOF after the rule → syntax error. - return false; - } + /** + * Byte offset of the property name start. + * + * @var int|null + */ + private ?int $declaration_start = null; - return true; - } + /** + * Byte offset after the trailing `;` or after the value end. + * + * @var int|null + */ + private ?int $declaration_end = null; /** - * Implements "parse a list of declarations" from CSS Syntax Level 3. + * Byte offset immediately after the colon token. * - * Returns a Generator that yields for each valid CSS declaration found. - * Each yield produces a property name (key) and the component values - * string (value). At-rules are consumed but not yielded. Invalid - * declarations trigger error recovery and are skipped. + * @var int|null + */ + private ?int $after_colon = null; + + /** + * Byte offset of the first non-whitespace value token, or null for empty values. * - * > 5.3.8. Parse a list of declarations - * > To parse a list of declarations from input: - * > 1. Normalize input, and set input to the result. - * > 2. Consume a list of declarations from input, and return the result. + * @var int|null + */ + private ?int $value_start = null; + + /** + * Byte offset after the last non-whitespace value token, or null for empty values. * - * @see https://www.w3.org/TR/css-syntax-3/#parse-list-of-declarations + * @var int|null + */ + private ?int $value_end = null; + + /** + * Pending mutation for the current declaration, or null. * - * @param string $css The CSS input (e.g. declaration block contents). - * @return Generator Yields property name => component values string. + * @var array|null + */ + private ?array $pending_mutation = null; + + /** + * Private constructor. Use create_declaration_list() or parse_a_rule(). + * + * @param string $css The CSS input. + * @param WP_CSS_Token_Processor $processor The token processor. */ - public static function parse_a_list_of_declarations( string $css ): Generator { + private function __construct( string $css, WP_CSS_Token_Processor $processor ) { + $this->css = $css; + $this->processor = $processor; + } + + /** + * Creates a processor for iterating and mutating declarations. + * + * @param string $css The CSS declaration list (e.g. contents of a style block). + * @return self|null The processor, or null on invalid encoding. + */ + public static function create_declaration_list( string $css ): ?self { $processor = WP_CSS_Token_Processor::create( $css ); if ( null === $processor ) { - return; + return null; } + return new self( $css, $processor ); + } - while ( $processor->next_token() ) { - $type = $processor->get_token_type(); + /** + * Advances to the next declaration in the list. + * + * Implements "consume a list of declarations" from CSS Syntax Level 3, + * stopping at each valid declaration. At-rules are consumed but skipped. + * Invalid declarations trigger error recovery and are skipped. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-list-of-declarations + * + * @return bool True if a declaration was found, false at EOF. + */ + public function next_declaration(): bool { + $this->commit_pending_mutation(); + + // Reset declaration state. + $this->current_name = null; + $this->declaration_start = null; + $this->declaration_end = null; + $this->after_colon = null; + $this->value_start = null; + $this->value_end = null; + + while ( $this->processor->next_token() ) { + $type = $this->processor->get_token_type(); // Whitespace, comments, and semicolons: do nothing. if ( @@ -91,36 +141,41 @@ public static function parse_a_list_of_declarations( string $css ): Generator { // At-keyword: consume an at-rule, do not yield. if ( WP_CSS_Token_Processor::TOKEN_AT_KEYWORD === $type ) { - self::consume_at_rule( $processor ); + self::consume_at_rule( $this->processor ); continue; } // Ident token: attempt to consume a declaration. if ( WP_CSS_Token_Processor::TOKEN_IDENT === $type ) { - $name = $processor->get_token_value(); + $name = $this->processor->get_token_value(); + $decl_start = $this->processor->get_token_start(); // Skip whitespace/comments to find the colon. - if ( ! self::next_non_whitespace_comment_token( $processor ) ) { + if ( ! self::next_non_whitespace_comment_token( $this->processor ) ) { // EOF without colon. - return; + return false; } - if ( WP_CSS_Token_Processor::TOKEN_COLON !== $processor->get_token_type() ) { - // No colon: not a valid declaration. Consume remaining until ; or EOF. - if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON !== $processor->get_token_type() ) { - self::consume_the_remnants_of_a_bad_declaration( $processor ); + if ( WP_CSS_Token_Processor::TOKEN_COLON !== $this->processor->get_token_type() ) { + // No colon: not a valid declaration. + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON !== $this->processor->get_token_type() ) { + self::consume_the_remnants_of_a_bad_declaration( $this->processor ); } continue; } - // Colon found. Consume the value, tracking byte offsets. - $value_start = null; - $value_end = null; + $colon_end = $this->processor->get_token_start() + $this->processor->get_token_length(); - while ( $processor->next_token() ) { - $vtype = $processor->get_token_type(); + // Consume the value, tracking byte offsets. + $value_start = null; + $value_end = null; + $semicolon_end = null; + + while ( $this->processor->next_token() ) { + $vtype = $this->processor->get_token_type(); if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON === $vtype ) { + $semicolon_end = $this->processor->get_token_start() + $this->processor->get_token_length(); break; } @@ -132,25 +187,321 @@ public static function parse_a_list_of_declarations( string $css ): Generator { } if ( null === $value_start ) { - $value_start = $processor->get_token_start(); + $value_start = $this->processor->get_token_start(); } - self::consume_component_value( $processor ); + self::consume_component_value( $this->processor ); + + $value_end = $this->processor->get_token_start() + $this->processor->get_token_length(); + } + + // Store declaration state. + $this->current_name = $name; + $this->declaration_start = $decl_start; + $this->after_colon = $colon_end; + $this->value_start = $value_start; + $this->value_end = $value_end; + $this->declaration_end = $semicolon_end ?? $value_end ?? $colon_end; + + return true; + } + + // Anything else: parse error. Consume until ; or EOF. + self::consume_the_remnants_of_a_bad_declaration( $this->processor ); + } + + return false; + } + + /** + * Returns the property name of the current declaration. + * + * @return string|null The decoded property name, or null. + */ + public function get_name(): ?string { + return $this->current_name; + } + + /** + * Returns the component values string of the current declaration. + * + * Leading and trailing whitespace/comments are trimmed. Comments + * between value tokens are preserved. + * + * @return string|null The value string, empty string for empty values, or null. + */ + public function get_value(): ?string { + if ( null === $this->current_name ) { + return null; + } + if ( null === $this->value_start ) { + return ''; + } + return substr( $this->css, $this->value_start, $this->value_end - $this->value_start ); + } + + /** + * Queues a replacement of the current declaration's value. + * + * The new value is validated for structural safety: bare semicolons, + * unmatched closing braces, and unbalanced blocks are rejected. + * + * If both set_value() and remove() are called on the same declaration, + * the last call wins. + * + * @param string $value The new CSS value text. + * @return bool True if accepted, false on validation failure or no current declaration. + */ + public function set_value( string $value ): bool { + if ( null === $this->current_name ) { + return false; + } + if ( ! self::is_valid_declaration_value( $value ) ) { + return false; + } + $this->pending_mutation = array( + 'type' => 'set_value', + 'value' => $value, + ); + return true; + } + + /** + * Queues removal of the current declaration. + * + * The declaration bytes including the trailing semicolon (if present) + * are removed. If both set_value() and remove() are called on the + * same declaration, the last call wins. + * + * @return bool True if accepted, false when not on a declaration. + */ + public function remove(): bool { + if ( null === $this->current_name ) { + return false; + } + $this->pending_mutation = array( 'type' => 'remove' ); + return true; + } + + /** + * Queues a new declaration to be appended after all existing content. + * + * Both the name and value are validated. The name must tokenize as a + * single CSS ident. The value must be structurally safe. + * + * @param string $name The property name. + * @param string $value The CSS value text. + * @return bool True if accepted, false on validation failure. + */ + public function append_declaration( string $name, string $value ): bool { + if ( ! self::is_valid_declaration_value( $value ) ) { + return false; + } - $value_end = $processor->get_token_start() + $processor->get_token_length(); + $name_processor = WP_CSS_Token_Processor::create( $name ); + if ( null === $name_processor || ! $name_processor->next_token() ) { + return false; + } + if ( WP_CSS_Token_Processor::TOKEN_IDENT !== $name_processor->get_token_type() ) { + return false; + } + $decoded_name = $name_processor->get_token_value(); + if ( $name_processor->next_token() ) { + return false; + } + + $this->appended_declarations[] = array( + 'name' => WP_CSS_Builder::ident( $decoded_name ), + 'value' => $value, + ); + return true; + } + + /** + * Returns the CSS with all queued mutations applied. + * + * @return string The updated CSS string. + */ + public function get_updated_css(): string { + $this->commit_pending_mutation(); + + if ( ! empty( $this->lexical_updates ) ) { + usort( + $this->lexical_updates, + static function ( $a, $b ) { + return $a['start'] - $b['start']; } + ); + + $output = ''; + $bytes_already_copied = 0; + + foreach ( $this->lexical_updates as $update ) { + $output .= substr( $this->css, $bytes_already_copied, $update['start'] - $bytes_already_copied ); + $output .= $update['text']; + $bytes_already_copied = $update['start'] + $update['length']; + } + + $output .= substr( $this->css, $bytes_already_copied ); + } else { + $output = $this->css; + } + + foreach ( $this->appended_declarations as $decl ) { + if ( '' !== $output && ';' !== substr( $output, -1 ) ) { + $output .= ';'; + } + $output .= ' ' . $decl['name'] . ': ' . $decl['value']; + } + + return $output; + } + + /** + * Commits the pending mutation for the current declaration to the lexical updates list. + */ + private function commit_pending_mutation(): void { + if ( null === $this->pending_mutation ) { + return; + } + + $mutation = $this->pending_mutation; + $this->pending_mutation = null; + + if ( 'remove' === $mutation['type'] ) { + $this->lexical_updates[] = array( + 'start' => $this->declaration_start, + 'length' => $this->declaration_end - $this->declaration_start, + 'text' => '', + ); + return; + } + + if ( 'set_value' === $mutation['type'] ) { + if ( null !== $this->value_start ) { + $this->lexical_updates[] = array( + 'start' => $this->value_start, + 'length' => $this->value_end - $this->value_start, + 'text' => $mutation['value'], + ); + } else { + // Empty value: insert after the colon. + $this->lexical_updates[] = array( + 'start' => $this->after_colon, + 'length' => 0, + 'text' => ' ' . $mutation['value'], + ); + } + } + } + + /** + * Validates that a string is structurally safe as a declaration value. + * + * Rejects values containing bare semicolons, unmatched closing braces, + * or unbalanced blocks that could break out of the declaration or + * enclosing rule context. + * + * @param string $css The candidate value text. + * @return bool Whether the value is structurally safe. + */ + private static function is_valid_declaration_value( string $css ): bool { + $processor = WP_CSS_Token_Processor::create( $css ); + if ( null === $processor ) { + return false; + } + + $depth = 0; + + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + + if ( + WP_CSS_Token_Processor::TOKEN_LEFT_PAREN === $type || + WP_CSS_Token_Processor::TOKEN_LEFT_BRACKET === $type || + WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type || + WP_CSS_Token_Processor::TOKEN_FUNCTION === $type + ) { + ++$depth; + continue; + } + + if ( + WP_CSS_Token_Processor::TOKEN_RIGHT_PAREN === $type || + WP_CSS_Token_Processor::TOKEN_RIGHT_BRACKET === $type + ) { + --$depth; + continue; + } - if ( null !== $value_start ) { - yield $name => substr( $css, $value_start, $value_end - $value_start ); - } else { - yield $name => ''; + if ( WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE === $type ) { + if ( $depth <= 0 ) { + // Unmatched } would escape an enclosing block. + return false; } + --$depth; continue; } - // Anything else: parse error. Consume until ; or EOF. - self::consume_the_remnants_of_a_bad_declaration( $processor ); + if ( 0 === $depth && WP_CSS_Token_Processor::TOKEN_SEMICOLON === $type ) { + // Bare semicolon would split the declaration. + return false; + } } + + return 0 === $depth; + } + + /** + * Implements "parse a rule" from CSS Syntax Level 3. + * + * Returns true if the input contains exactly one CSS rule + * (at-rule or qualified rule), false for syntax errors. + * + * > 5.3.5. Parse a rule + * > To parse a rule from input: + * > 1. Normalize input, and set input to the result. + * > 2. While the next input token from input is a , consume the next input token from input. + * > 3. If the next input token from input is an , return a syntax error. + * > Otherwise, if the next input token from input is an , consume an at-rule from input, and let rule be the return value. + * > Otherwise, consume a qualified rule from input and let rule be the return value. If nothing was returned, return a syntax error. + * > 4. While the next input token from input is a , consume the next input token from input. + * > 5. If the next input token from input is an , return rule. Otherwise, return a syntax error. + * + * @see https://www.w3.org/TR/css-syntax-3/#parse-a-rule + * + * @param string $css The CSS input. + * @return bool Whether the input is a single valid CSS rule. + */ + public static function parse_a_rule( string $css ): bool { + $processor = WP_CSS_Token_Processor::create( $css ); + if ( null === $processor ) { + return false; + } + + // Step 2: Discard whitespace and comments. + if ( ! self::next_non_whitespace_comment_token( $processor ) ) { + // Step 3: EOF → syntax error. + return false; + } + + if ( WP_CSS_Token_Processor::TOKEN_AT_KEYWORD === $processor->get_token_type() ) { + // Step 4: Consume an at-rule. + self::consume_at_rule( $processor ); + } else { + // Step 5: Consume a qualified rule. + if ( ! self::consume_qualified_rule( $processor ) ) { + return false; + } + } + + // Steps 6–7: Discard whitespace/comments, then expect EOF. + if ( self::next_non_whitespace_comment_token( $processor ) ) { + // Non-EOF after the rule → syntax error. + return false; + } + + return true; } /** diff --git a/tests/phpunit/tests/css-api/wpCssProcessor.php b/tests/phpunit/tests/css-api/wpCssProcessor.php index 444dde2e3d4fb..5e1b4b5854107 100644 --- a/tests/phpunit/tests/css-api/wpCssProcessor.php +++ b/tests/phpunit/tests/css-api/wpCssProcessor.php @@ -242,21 +242,29 @@ public static function data_invalid_rules(): Generator { yield 'Empty block then ident' => array( '{}a' ); } + /* + * ----------------------------------------------------------------------- + * Declaration list cursor tests. + * ----------------------------------------------------------------------- + */ + /** * @ticket TBD - * @dataProvider data_parse_a_list_of_declarations - * @covers ::parse_a_list_of_declarations + * @dataProvider data_declaration_list_navigation + * @covers ::next_declaration + * @covers ::get_name + * @covers ::get_value */ - public function test_parse_a_list_of_declarations( string $css, array $expected ): void { + public function test_declaration_list_navigation( string $css, array $expected ): void { + $proc = WP_CSS_Processor::create_declaration_list( $css ); $actual = array(); - foreach ( WP_CSS_Processor::parse_a_list_of_declarations( $css ) as $name => $value ) { - $actual[] = array( $name, $value ); + while ( $proc->next_declaration() ) { + $actual[] = array( $proc->get_name(), $proc->get_value() ); } $this->assertSame( $expected, $actual, "Declarations from: {$css}" ); } - public static function data_parse_a_list_of_declarations(): Generator { - // Basic declarations. + public static function data_declaration_list_navigation(): Generator { yield 'Single declaration' => array( 'color: red', array( array( 'color', 'red' ) ), @@ -265,7 +273,7 @@ public static function data_parse_a_list_of_declarations(): Generator { 'color: red; font-size: 16px', array( array( 'color', 'red' ), array( 'font-size', '16px' ) ), ); - yield 'Declaration with semicolon terminator' => array( + yield 'Trailing semicolon' => array( 'color: red;', array( array( 'color', 'red' ) ), ); @@ -273,8 +281,6 @@ public static function data_parse_a_list_of_declarations(): Generator { 'color:red', array( array( 'color', 'red' ) ), ); - - // Whitespace trimming. yield 'Leading whitespace in value' => array( 'color: red', array( array( 'color', 'red' ) ), @@ -283,16 +289,6 @@ public static function data_parse_a_list_of_declarations(): Generator { 'color: red ;', array( array( 'color', 'red' ) ), ); - yield 'Trailing whitespace at EOF' => array( - 'color: red ', - array( array( 'color', 'red' ) ), - ); - yield 'Whitespace around declaration' => array( - ' color: red ; font: bold ', - array( array( 'color', 'red' ), array( 'font', 'bold' ) ), - ); - - // Empty and whitespace-only values. yield 'Empty value with semicolon' => array( 'color: ;', array( array( 'color', '' ) ), @@ -301,30 +297,18 @@ public static function data_parse_a_list_of_declarations(): Generator { 'color:', array( array( 'color', '' ) ), ); - yield 'Whitespace-only value' => array( - 'color: ;', - array( array( 'color', '' ) ), - ); - - // Multi-token values. - yield 'Value with multiple tokens' => array( + yield 'Multi-token value' => array( 'font: bold 14px/1.5 sans-serif', array( array( 'font', 'bold 14px/1.5 sans-serif' ) ), ); - yield 'Value with !important' => array( + yield '!important in value' => array( 'color: red !important', array( array( 'color', 'red !important' ) ), ); - - // Values with blocks. yield 'Function value' => array( 'color: var(--x)', array( array( 'color', 'var(--x)' ) ), ); - yield 'Function with fallback' => array( - 'color: var(--x, red)', - array( array( 'color', 'var(--x, red)' ) ), - ); yield 'Semicolon inside function' => array( '--x: var(--y, a;b); color: red', array( array( '--x', 'var(--y, a;b)' ), array( 'color', 'red' ) ), @@ -333,85 +317,293 @@ public static function data_parse_a_list_of_declarations(): Generator { '--x: { a: b }', array( array( '--x', '{ a: b }' ) ), ); - yield 'Nested functions' => array( - 'background: linear-gradient(rgb(0, 0, 0), rgb(255, 255, 255))', - array( array( 'background', 'linear-gradient(rgb(0, 0, 0), rgb(255, 255, 255))' ) ), - ); - - // Comments. - yield 'Leading comment in value' => array( - 'color: /* comment */ red', - array( array( 'color', 'red' ) ), - ); - yield 'Comment between value tokens' => array( - 'font: bold /* comment */ 14px', - array( array( 'font', 'bold /* comment */ 14px' ) ), - ); - yield 'Trailing comment in value' => array( - 'color: red /* comment */', - array( array( 'color', 'red' ) ), - ); - yield 'Comment between declarations' => array( - 'color: red; /* comment */ font: bold', - array( array( 'color', 'red' ), array( 'font', 'bold' ) ), - ); - - // At-rules (consumed, not yielded). - yield 'At-rule before declaration' => array( + yield 'At-rule consumed not yielded' => array( '@foo; color: red', array( array( 'color', 'red' ) ), ); - yield 'At-rule with block' => array( - '@media screen { body { color: red } } color: blue', - array( array( 'color', 'blue' ) ), - ); - yield 'Only at-rules' => array( - '@foo; @bar {}', - array(), - ); - - // Error recovery. - yield 'No colon' => array( + yield 'Error recovery no colon' => array( 'foo bar; color: red', array( array( 'color', 'red' ) ), ); - yield 'Non-ident start' => array( + yield 'Error recovery non-ident' => array( ': red; color: blue', array( array( 'color', 'blue' ) ), ); - yield 'Number start' => array( - '123 { }; color: red', - array( array( 'color', 'red' ) ), - ); - yield 'Just semicolons' => array( - ';;;', - array(), - ); - - // Empty and whitespace-only input. - yield 'Empty string' => array( - '', - array(), - ); - yield 'Whitespace only' => array( - ' ', - array(), - ); - yield 'Only comments' => array( - '/* comment */', - array(), - ); - - // Duplicate properties. + yield 'Empty string' => array( '', array() ); + yield 'Whitespace only' => array( ' ', array() ); yield 'Duplicate properties' => array( 'color: red; color: blue', array( array( 'color', 'red' ), array( 'color', 'blue' ) ), ); - - // Escaped property names. yield 'Escaped property name' => array( '\63 olor: red', array( array( 'color', 'red' ) ), ); + yield 'Comment between value tokens' => array( + 'font: bold /* comment */ 14px', + array( array( 'font', 'bold /* comment */ 14px' ) ), + ); + } + + /** + * @ticket TBD + * @covers ::set_value + * @covers ::get_updated_css + */ + public function test_set_value_basic(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red; font: bold' ); + while ( $proc->next_declaration() ) { + if ( 'color' === $proc->get_name() ) { + $this->assertTrue( $proc->set_value( 'blue' ) ); + } + } + $this->assertSame( 'color: blue; font: bold', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::set_value + * @covers ::get_updated_css + */ + public function test_set_value_preserves_whitespace(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red ;' ); + $proc->next_declaration(); + $proc->set_value( 'blue' ); + $this->assertSame( 'color: blue ;', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::set_value + * @covers ::get_updated_css + */ + public function test_set_value_on_empty_value(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: ;' ); + $proc->next_declaration(); + $proc->set_value( 'blue' ); + $this->assertSame( 'color: blue ;', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::set_value + */ + public function test_set_value_rejects_bare_semicolon(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red' ); + $proc->next_declaration(); + $this->assertFalse( $proc->set_value( 'red; font: evil' ) ); + } + + /** + * @ticket TBD + * @covers ::set_value + */ + public function test_set_value_rejects_unmatched_brace(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red' ); + $proc->next_declaration(); + $this->assertFalse( $proc->set_value( '0;} body { display: none }' ) ); + } + + /** + * @ticket TBD + * @covers ::set_value + */ + public function test_set_value_rejects_unbalanced_blocks(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red' ); + $proc->next_declaration(); + $this->assertFalse( $proc->set_value( 'calc(1 + 2' ) ); + } + + /** + * @ticket TBD + * @covers ::set_value + */ + public function test_set_value_accepts_matched_blocks(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red' ); + $proc->next_declaration(); + $this->assertTrue( $proc->set_value( 'var(--x, fallback)' ) ); + } + + /** + * @ticket TBD + * @covers ::set_value + */ + public function test_set_value_accepts_semicolon_inside_block(): void { + $proc = WP_CSS_Processor::create_declaration_list( '--x: a' ); + $proc->next_declaration(); + $this->assertTrue( $proc->set_value( 'var(--y, a;b)' ) ); + } + + /** + * @ticket TBD + * @covers ::remove + * @covers ::get_updated_css + */ + public function test_remove_first_declaration(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red; font: bold' ); + $proc->next_declaration(); + $proc->remove(); + $proc->next_declaration(); + $this->assertSame( ' font: bold', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::remove + * @covers ::get_updated_css + */ + public function test_remove_last_declaration(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red; font: bold' ); + $proc->next_declaration(); + $proc->next_declaration(); + $proc->remove(); + $this->assertSame( 'color: red; ', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::remove + * @covers ::get_updated_css + */ + public function test_remove_all_declarations(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red; font: bold' ); + while ( $proc->next_declaration() ) { + $proc->remove(); + } + $this->assertSame( ' ', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::remove + * @covers ::get_updated_css + */ + public function test_remove_single_declaration_no_semicolon(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red' ); + $proc->next_declaration(); + $proc->remove(); + $this->assertSame( '', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::set_value + * @covers ::remove + * @covers ::get_updated_css + */ + public function test_last_mutation_wins(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red' ); + $proc->next_declaration(); + $proc->set_value( 'blue' ); + $proc->remove(); + $this->assertSame( '', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::set_value + * @covers ::remove + * @covers ::get_updated_css + */ + public function test_last_mutation_wins_set_after_remove(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red' ); + $proc->next_declaration(); + $proc->remove(); + $proc->set_value( 'blue' ); + $this->assertSame( 'color: blue', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::append_declaration + * @covers ::get_updated_css + */ + public function test_append_declaration(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red' ); + $proc->append_declaration( 'font', 'bold' ); + $this->assertSame( 'color: red; font: bold', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::append_declaration + * @covers ::get_updated_css + */ + public function test_append_declaration_to_empty(): void { + $proc = WP_CSS_Processor::create_declaration_list( '' ); + $proc->append_declaration( 'color', 'red' ); + $this->assertSame( ' color: red', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::append_declaration + * @covers ::get_updated_css + */ + public function test_append_declaration_with_trailing_semicolon(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red;' ); + $proc->append_declaration( 'font', 'bold' ); + $this->assertSame( 'color: red; font: bold', $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::append_declaration + */ + public function test_append_declaration_rejects_invalid_name(): void { + $proc = WP_CSS_Processor::create_declaration_list( '' ); + $this->assertFalse( $proc->append_declaration( '123', 'red' ) ); + } + + /** + * @ticket TBD + * @covers ::append_declaration + */ + public function test_append_declaration_rejects_multi_token_name(): void { + $proc = WP_CSS_Processor::create_declaration_list( '' ); + $this->assertFalse( $proc->append_declaration( 'color font', 'red' ) ); + } + + /** + * @ticket TBD + * @covers ::append_declaration + */ + public function test_append_declaration_rejects_injection_in_value(): void { + $proc = WP_CSS_Processor::create_declaration_list( '' ); + $this->assertFalse( $proc->append_declaration( 'color', 'red; } body { display: none' ) ); + } + + /** + * @ticket TBD + * @covers ::get_updated_css + */ + public function test_no_changes_round_trip(): void { + $css = 'color: red; font-size: 16px'; + $proc = WP_CSS_Processor::create_declaration_list( $css ); + while ( $proc->next_declaration() ) { + // Read only, no mutations. + } + $this->assertSame( $css, $proc->get_updated_css() ); + } + + /** + * @ticket TBD + * @covers ::set_value + * @covers ::remove + * @covers ::append_declaration + * @covers ::get_updated_css + */ + public function test_combined_set_remove_append(): void { + $proc = WP_CSS_Processor::create_declaration_list( 'color: red; display: none; font: bold' ); + while ( $proc->next_declaration() ) { + if ( 'color' === $proc->get_name() ) { + $proc->set_value( 'blue' ); + } + if ( 'display' === $proc->get_name() ) { + $proc->remove(); + } + } + $proc->append_declaration( 'margin', '0' ); + $this->assertSame( 'color: blue; font: bold; margin: 0', $proc->get_updated_css() ); } } From c56e56de479c6a771666b6275a6f6e4be6872908 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 28 Apr 2026 09:47:04 +0000 Subject: [PATCH 44/44] Add wp_scrub_utf8() to inputs of string and ident methods Agent-Logs-Url: https://github.com/sirreal/wordpress-develop/sessions/5418c5d6-d4e2-4e30-8060-8ad768d4b1c1 Co-authored-by: sirreal <841763+sirreal@users.noreply.github.com> --- src/wp-includes/css-api/class-wp-css-builder.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wp-includes/css-api/class-wp-css-builder.php b/src/wp-includes/css-api/class-wp-css-builder.php index abda30e609097..56ae7a5ba009a 100644 --- a/src/wp-includes/css-api/class-wp-css-builder.php +++ b/src/wp-includes/css-api/class-wp-css-builder.php @@ -16,6 +16,7 @@ abstract class WP_CSS_Builder { * @return string CSS ident token text. */ public static function ident( string $value ): string { + $value = wp_scrub_utf8( $value ); $result = ''; $length = strlen( $value ); @@ -89,6 +90,7 @@ public static function ident( string $value ): string { * @see https://www.w3.org/TR/css-syntax-3/#escaping */ public static function string( string $value ): string { + $value = wp_scrub_utf8( $value ); $escaped = strtr( $value, array(