diff --git a/src/wp-admin/includes/class-wp-posts-list-table.php b/src/wp-admin/includes/class-wp-posts-list-table.php index fc039a7573f19..ce3484ff364e6 100644 --- a/src/wp-admin/includes/class-wp-posts-list-table.php +++ b/src/wp-admin/includes/class-wp-posts-list-table.php @@ -1122,7 +1122,7 @@ public function column_title( $post ) { if ( get_option( 'wp_collaboration_enabled' ) ) { $locked_avatar = ''; /* translators: Collaboration status message for a singular post in the post list. Can be any type of post. */ - $locked_text = esc_html_x( 'Currently being edited', 'post list' ); + $locked_text = esc_html_x( 'Currently being edited', 'post list' ); } else { $lock_holder = get_userdata( $lock_holder ); $locked_avatar = get_avatar( $lock_holder->ID, 18 ); diff --git a/src/wp-includes/css-api/class-wp-css-builder.php b/src/wp-includes/css-api/class-wp-css-builder.php new file mode 100644 index 0000000000000..56ae7a5ba009a --- /dev/null +++ b/src/wp-includes/css-api/class-wp-css-builder.php @@ -0,0 +1,241 @@ += 0x80 ) { + $result .= $value[ $i ]; + continue; + } + + // ASCII letters and underscore: always valid in idents. + if ( + ( $byte >= 0x41 && $byte <= 0x5A ) || // A-Z + ( $byte >= 0x61 && $byte <= 0x7A ) || // a-z + 0x5F === $byte // _ + ) { + $result .= $value[ $i ]; + continue; + } + + // Hyphen: valid in idents, but check for hyphen-digit at start. + if ( 0x2D === $byte ) { + // Hyphen at position 0 followed by a digit at position 1: escape the digit. + if ( 0 === $i && $i + 1 < $length && ord( $value[ $i + 1 ] ) >= 0x30 && ord( $value[ $i + 1 ] ) <= 0x39 ) { + $result .= '-'; + ++$i; + $result .= sprintf( '\\%X ', ord( $value[ $i ] ) ); + continue; + } + $result .= '-'; + continue; + } + + // Digits: valid except at position 0. + if ( $byte >= 0x30 && $byte <= 0x39 ) { + if ( 0 === $i ) { + $result .= sprintf( '\\%X ', $byte ); + } else { + $result .= $value[ $i ]; + } + continue; + } + + // Everything else: hex-escape. + $result .= sprintf( '\\%X ', $byte ); + } + + return $result; + } + + /** + * Create a quoted CSS string from a plain PHP string value. + * + * Example: + * $value = 'CSS & a ""; + * + * CSS strings are quoted many characters that are problematic in HTML + * or may be complicated for rudimentary CSS or HTML processors to handle + * are encoded using Unicode escape sequences. + * + * @see https://www.w3.org/TR/css-syntax-3/#escaping + */ + public static function string( string $value ): string { + $value = wp_scrub_utf8( $value ); + $escaped = strtr( + $value, + array( + // Escape existing backslashes to prevent unintentional escapes in result. + '\\' => '\\5C ', + + // Pre-processing replaces NULLs and some newlines. Replace and escape as necessary. + "\0" => "\u{FFFD}", + + // Normalize and replace newlines. https://www.w3.org/TR/css-syntax-3/#input-preprocessing + "\r\n" => '\\A ', + "\r" => '\\A ', + "\f" => '\\A ', + + // Newlines must be escaped in CSS strings. + "\n" => '\\A ', + + // Arbitrary characters for Unicode escaping: + + // HTML syntax may be problematic. + '<' => '\\3C ', + '>' => '\\3E ', + '&' => '\\26 ', + + // CSS syntax may be problematic. + ',' => '\\2C ', + ';' => '\\3B ', + '{' => '\\7B ', + '}' => '\\7D ', + '"' => '\\22 ', + "'" => '\\27 ', + ) + ); + return "\"{$escaped}\""; + } + + public static function normalize_and_escape_css( string $css ): string { + $css = wp_scrub_utf8( $css ); + $processor = WP_CSS_Token_Processor::create( $css ); + if ( null === $processor ) { + return ''; + } + + $normalized_css = ''; + + while ( $processor->next_token() ) { + switch ( $processor->get_token_type() ) { + + // Basic punctuation: + case WP_CSS_Token_Processor::TOKEN_SEMICOLON: $normalized_css .= ';'; break; + case WP_CSS_Token_Processor::TOKEN_COMMA: $normalized_css .= ','; break; + case WP_CSS_Token_Processor::TOKEN_WHITESPACE: $normalized_css .= ' '; break; + case WP_CSS_Token_Processor::TOKEN_COLON: $normalized_css .= ':'; break; + + // Paired punctuation: + case WP_CSS_Token_Processor::TOKEN_LEFT_BRACE: $normalized_css .= '{'; break; + case WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE: $normalized_css .= '}'; break; + case WP_CSS_Token_Processor::TOKEN_LEFT_PAREN: $normalized_css .= '('; break; + case WP_CSS_Token_Processor::TOKEN_RIGHT_PAREN: $normalized_css .= ')'; break; + case WP_CSS_Token_Processor::TOKEN_LEFT_BRACKET: $normalized_css .= '['; break; + case WP_CSS_Token_Processor::TOKEN_RIGHT_BRACKET: $normalized_css .= ']'; break; + + // "@" + ident + case WP_CSS_Token_Processor::TOKEN_AT_KEYWORD: + $normalized_css .= '@' . self::ident( $processor->get_token_value() ); + break; + + // ident + "(" + case WP_CSS_Token_Processor::TOKEN_FUNCTION: + $normalized_css .= self::ident( $processor->get_token_value() ) . '('; + break; + + /* + * Hash tokens are not idents but their value can be escaped as such. + * + * ‖→ "#" →─┐ ┌──────────────────────────────┐ ┌─→‖ + * ├─→─┤ a-z A-Z 0-9 _ - or non-ASCII ├─→─┤ + * │ └──────────────────────────────┘ │ + * │ ┌──────────────────────────────┐ │ + * ├─→─┤ escape ├─→─┤ + * │ └──────────────────────────────┘ │ + * └──────────────────←───────────────────┘ + */ + case WP_CSS_Token_Processor::TOKEN_HASH: + $normalized_css .= '#' . self::ident( $processor->get_token_value() ); + break; + + case WP_CSS_Token_Processor::TOKEN_DIMENSION: + $normalized_css .= $processor->get_token_value() . $processor->get_token_unit(); + break; + + case WP_CSS_Token_Processor::TOKEN_PERCENTAGE: + $normalized_css .= "%{$processor->get_token_value()}"; + break; + + case WP_CSS_Token_Processor::TOKEN_NUMBER: + $normalized_css .= $processor->get_token_value(); + break; + + case WP_CSS_Token_Processor::TOKEN_DELIM: + $normalized_css .= $processor->get_token_value(); + break; + + case WP_CSS_Token_Processor::TOKEN_IDENT: + $normalized_css .= self::ident( $processor->get_token_value() ); + break; + + case WP_CSS_Token_Processor::TOKEN_STRING: + var_dump( $processor->get_token_value() ); + $normalized_css .= self::string( $processor->get_token_value() ); + break; + + // Keep or strip comments? + case WP_CSS_Token_Processor::TOKEN_COMMENT: + $normalized_css .= substr( $css, $processor->get_token_start(), $processor->get_token_length() ); + break; + + /** + * A is an open string that reaches a newline. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-string-token + * + * @see https://www.w3.org/TR/css-syntax-3/#preserved-tokens + * > Note: The tokens <}-token>s, <)-token>s, <]-token>, , and are always parse errors, but they are preserved in the token stream by this specification to allow other specs, such as Media Queries, to define more fine-grained error-handling than just dropping an entire declaration or block. + */ + case WP_CSS_Token_Processor::TOKEN_BAD_STRING: + $normalized_css .= substr( $css, $processor->get_token_start(), $processor->get_token_length() ) . "\n"; + break; + + case WP_CSS_Token_Processor::TOKEN_URL: + case WP_CSS_Token_Processor::TOKEN_BAD_URL: + case WP_CSS_Token_Processor::TOKEN_CDC: + case WP_CSS_Token_Processor::TOKEN_CDO: + default: + throw new Error( 'unhandled token type ' . $processor->get_token_type() . ' with value ' . var_export( $processor->get_token_value(), true ) ); + } + } + + return strtr( + $normalized_css, + array( + ' ' => '␠', + "\t" => "␉\t", + "\n" => "␊\n", + ) + ); + } +} diff --git a/src/wp-includes/css-api/class-wp-css-processor.php b/src/wp-includes/css-api/class-wp-css-processor.php new file mode 100644 index 0000000000000..b8ab2c0adb99e --- /dev/null +++ b/src/wp-includes/css-api/class-wp-css-processor.php @@ -0,0 +1,645 @@ +css = $css; + $this->processor = $processor; + } + + /** + * Creates a processor for iterating and mutating declarations. + * + * @param string $css The CSS declaration list (e.g. contents of a style block). + * @return self|null The processor, or null on invalid encoding. + */ + public static function create_declaration_list( string $css ): ?self { + $processor = WP_CSS_Token_Processor::create( $css ); + if ( null === $processor ) { + return null; + } + return new self( $css, $processor ); + } + + /** + * Advances to the next declaration in the list. + * + * Implements "consume a list of declarations" from CSS Syntax Level 3, + * stopping at each valid declaration. At-rules are consumed but skipped. + * Invalid declarations trigger error recovery and are skipped. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-list-of-declarations + * + * @return bool True if a declaration was found, false at EOF. + */ + public function next_declaration(): bool { + $this->commit_pending_mutation(); + + // Reset declaration state. + $this->current_name = null; + $this->declaration_start = null; + $this->declaration_end = null; + $this->after_colon = null; + $this->value_start = null; + $this->value_end = null; + + while ( $this->processor->next_token() ) { + $type = $this->processor->get_token_type(); + + // Whitespace, comments, and semicolons: do nothing. + if ( + WP_CSS_Token_Processor::TOKEN_WHITESPACE === $type || + WP_CSS_Token_Processor::TOKEN_COMMENT === $type || + WP_CSS_Token_Processor::TOKEN_SEMICOLON === $type + ) { + continue; + } + + // At-keyword: consume an at-rule, do not yield. + if ( WP_CSS_Token_Processor::TOKEN_AT_KEYWORD === $type ) { + self::consume_at_rule( $this->processor ); + continue; + } + + // Ident token: attempt to consume a declaration. + if ( WP_CSS_Token_Processor::TOKEN_IDENT === $type ) { + $name = $this->processor->get_token_value(); + $decl_start = $this->processor->get_token_start(); + + // Skip whitespace/comments to find the colon. + if ( ! self::next_non_whitespace_comment_token( $this->processor ) ) { + // EOF without colon. + return false; + } + + if ( WP_CSS_Token_Processor::TOKEN_COLON !== $this->processor->get_token_type() ) { + // No colon: not a valid declaration. + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON !== $this->processor->get_token_type() ) { + self::consume_the_remnants_of_a_bad_declaration( $this->processor ); + } + continue; + } + + $colon_end = $this->processor->get_token_start() + $this->processor->get_token_length(); + + // Consume the value, tracking byte offsets. + $value_start = null; + $value_end = null; + $semicolon_end = null; + + while ( $this->processor->next_token() ) { + $vtype = $this->processor->get_token_type(); + + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON === $vtype ) { + $semicolon_end = $this->processor->get_token_start() + $this->processor->get_token_length(); + break; + } + + if ( + WP_CSS_Token_Processor::TOKEN_WHITESPACE === $vtype || + WP_CSS_Token_Processor::TOKEN_COMMENT === $vtype + ) { + continue; + } + + if ( null === $value_start ) { + $value_start = $this->processor->get_token_start(); + } + + self::consume_component_value( $this->processor ); + + $value_end = $this->processor->get_token_start() + $this->processor->get_token_length(); + } + + // Store declaration state. + $this->current_name = $name; + $this->declaration_start = $decl_start; + $this->after_colon = $colon_end; + $this->value_start = $value_start; + $this->value_end = $value_end; + $this->declaration_end = $semicolon_end ?? $value_end ?? $colon_end; + + return true; + } + + // Anything else: parse error. Consume until ; or EOF. + self::consume_the_remnants_of_a_bad_declaration( $this->processor ); + } + + return false; + } + + /** + * Returns the property name of the current declaration. + * + * @return string|null The decoded property name, or null. + */ + public function get_name(): ?string { + return $this->current_name; + } + + /** + * Returns the component values string of the current declaration. + * + * Leading and trailing whitespace/comments are trimmed. Comments + * between value tokens are preserved. + * + * @return string|null The value string, empty string for empty values, or null. + */ + public function get_value(): ?string { + if ( null === $this->current_name ) { + return null; + } + if ( null === $this->value_start ) { + return ''; + } + return substr( $this->css, $this->value_start, $this->value_end - $this->value_start ); + } + + /** + * Queues a replacement of the current declaration's value. + * + * The new value is validated for structural safety: bare semicolons, + * unmatched closing braces, and unbalanced blocks are rejected. + * + * If both set_value() and remove() are called on the same declaration, + * the last call wins. + * + * @param string $value The new CSS value text. + * @return bool True if accepted, false on validation failure or no current declaration. + */ + public function set_value( string $value ): bool { + if ( null === $this->current_name ) { + return false; + } + if ( ! self::is_valid_declaration_value( $value ) ) { + return false; + } + $this->pending_mutation = array( + 'type' => 'set_value', + 'value' => $value, + ); + return true; + } + + /** + * Queues removal of the current declaration. + * + * The declaration bytes including the trailing semicolon (if present) + * are removed. If both set_value() and remove() are called on the + * same declaration, the last call wins. + * + * @return bool True if accepted, false when not on a declaration. + */ + public function remove(): bool { + if ( null === $this->current_name ) { + return false; + } + $this->pending_mutation = array( 'type' => 'remove' ); + return true; + } + + /** + * Queues a new declaration to be appended after all existing content. + * + * Both the name and value are validated. The name must tokenize as a + * single CSS ident. The value must be structurally safe. + * + * @param string $name The property name. + * @param string $value The CSS value text. + * @return bool True if accepted, false on validation failure. + */ + public function append_declaration( string $name, string $value ): bool { + if ( ! self::is_valid_declaration_value( $value ) ) { + return false; + } + + $name_processor = WP_CSS_Token_Processor::create( $name ); + if ( null === $name_processor || ! $name_processor->next_token() ) { + return false; + } + if ( WP_CSS_Token_Processor::TOKEN_IDENT !== $name_processor->get_token_type() ) { + return false; + } + $decoded_name = $name_processor->get_token_value(); + if ( $name_processor->next_token() ) { + return false; + } + + $this->appended_declarations[] = array( + 'name' => WP_CSS_Builder::ident( $decoded_name ), + 'value' => $value, + ); + return true; + } + + /** + * Returns the CSS with all queued mutations applied. + * + * @return string The updated CSS string. + */ + public function get_updated_css(): string { + $this->commit_pending_mutation(); + + if ( ! empty( $this->lexical_updates ) ) { + usort( + $this->lexical_updates, + static function ( $a, $b ) { + return $a['start'] - $b['start']; + } + ); + + $output = ''; + $bytes_already_copied = 0; + + foreach ( $this->lexical_updates as $update ) { + $output .= substr( $this->css, $bytes_already_copied, $update['start'] - $bytes_already_copied ); + $output .= $update['text']; + $bytes_already_copied = $update['start'] + $update['length']; + } + + $output .= substr( $this->css, $bytes_already_copied ); + } else { + $output = $this->css; + } + + foreach ( $this->appended_declarations as $decl ) { + if ( '' !== $output && ';' !== substr( $output, -1 ) ) { + $output .= ';'; + } + $output .= ' ' . $decl['name'] . ': ' . $decl['value']; + } + + return $output; + } + + /** + * Commits the pending mutation for the current declaration to the lexical updates list. + */ + private function commit_pending_mutation(): void { + if ( null === $this->pending_mutation ) { + return; + } + + $mutation = $this->pending_mutation; + $this->pending_mutation = null; + + if ( 'remove' === $mutation['type'] ) { + $this->lexical_updates[] = array( + 'start' => $this->declaration_start, + 'length' => $this->declaration_end - $this->declaration_start, + 'text' => '', + ); + return; + } + + if ( 'set_value' === $mutation['type'] ) { + if ( null !== $this->value_start ) { + $this->lexical_updates[] = array( + 'start' => $this->value_start, + 'length' => $this->value_end - $this->value_start, + 'text' => $mutation['value'], + ); + } else { + // Empty value: insert after the colon. + $this->lexical_updates[] = array( + 'start' => $this->after_colon, + 'length' => 0, + 'text' => ' ' . $mutation['value'], + ); + } + } + } + + /** + * Validates that a string is structurally safe as a declaration value. + * + * Rejects values containing bare semicolons, unmatched closing braces, + * or unbalanced blocks that could break out of the declaration or + * enclosing rule context. + * + * @param string $css The candidate value text. + * @return bool Whether the value is structurally safe. + */ + private static function is_valid_declaration_value( string $css ): bool { + $processor = WP_CSS_Token_Processor::create( $css ); + if ( null === $processor ) { + return false; + } + + $depth = 0; + + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + + if ( + WP_CSS_Token_Processor::TOKEN_LEFT_PAREN === $type || + WP_CSS_Token_Processor::TOKEN_LEFT_BRACKET === $type || + WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type || + WP_CSS_Token_Processor::TOKEN_FUNCTION === $type + ) { + ++$depth; + continue; + } + + if ( + WP_CSS_Token_Processor::TOKEN_RIGHT_PAREN === $type || + WP_CSS_Token_Processor::TOKEN_RIGHT_BRACKET === $type + ) { + --$depth; + continue; + } + + if ( WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE === $type ) { + if ( $depth <= 0 ) { + // Unmatched } would escape an enclosing block. + return false; + } + --$depth; + continue; + } + + if ( 0 === $depth && WP_CSS_Token_Processor::TOKEN_SEMICOLON === $type ) { + // Bare semicolon would split the declaration. + return false; + } + } + + return 0 === $depth; + } + + /** + * Implements "parse a rule" from CSS Syntax Level 3. + * + * Returns true if the input contains exactly one CSS rule + * (at-rule or qualified rule), false for syntax errors. + * + * > 5.3.5. Parse a rule + * > To parse a rule from input: + * > 1. Normalize input, and set input to the result. + * > 2. While the next input token from input is a , consume the next input token from input. + * > 3. If the next input token from input is an , return a syntax error. + * > Otherwise, if the next input token from input is an , consume an at-rule from input, and let rule be the return value. + * > Otherwise, consume a qualified rule from input and let rule be the return value. If nothing was returned, return a syntax error. + * > 4. While the next input token from input is a , consume the next input token from input. + * > 5. If the next input token from input is an , return rule. Otherwise, return a syntax error. + * + * @see https://www.w3.org/TR/css-syntax-3/#parse-a-rule + * + * @param string $css The CSS input. + * @return bool Whether the input is a single valid CSS rule. + */ + public static function parse_a_rule( string $css ): bool { + $processor = WP_CSS_Token_Processor::create( $css ); + if ( null === $processor ) { + return false; + } + + // Step 2: Discard whitespace and comments. + if ( ! self::next_non_whitespace_comment_token( $processor ) ) { + // Step 3: EOF → syntax error. + return false; + } + + if ( WP_CSS_Token_Processor::TOKEN_AT_KEYWORD === $processor->get_token_type() ) { + // Step 4: Consume an at-rule. + self::consume_at_rule( $processor ); + } else { + // Step 5: Consume a qualified rule. + if ( ! self::consume_qualified_rule( $processor ) ) { + return false; + } + } + + // Steps 6–7: Discard whitespace/comments, then expect EOF. + if ( self::next_non_whitespace_comment_token( $processor ) ) { + // Non-EOF after the rule → syntax error. + return false; + } + + return true; + } + + /** + * Advances past whitespace and comment tokens. + * + * Returns true if a non-whitespace/non-comment token was found, + * false if EOF was reached. + */ + private static function next_non_whitespace_comment_token( WP_CSS_Token_Processor $processor ): bool { + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + if ( + WP_CSS_Token_Processor::TOKEN_WHITESPACE !== $type && + WP_CSS_Token_Processor::TOKEN_COMMENT !== $type + ) { + return true; + } + } + return false; + } + + /** + * Consumes an at-rule from the token stream. + * + * The processor must be positioned on an at-keyword token. + * Per spec, at-rules are always returned even on EOF (parse error noted). + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-an-at-rule + */ + private static function consume_at_rule( WP_CSS_Token_Processor $processor ): void { + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON === $type ) { + return; + } + + if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type ) { + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE ); + return; + } + + // Consume component values so that `;` and `{` inside + // paired tokens ((), [], functions) are not misidentified. + self::consume_component_value( $processor ); + } + // EOF: at-rule is still returned per spec. + } + + /** + * Consumes a qualified rule from the token stream. + * + * The processor must be positioned on the first prelude token. + * Returns true if a qualified rule was found (a block was consumed), + * false if EOF was reached without finding a block. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-qualified-rule + */ + private static function consume_qualified_rule( WP_CSS_Token_Processor $processor ): bool { + /* + * The processor is already positioned on the first token. + * Loop starting from the current token, then continue with next_token(). + */ + do { + if ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $processor->get_token_type() ) { + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE ); + return true; + } + + // Consume component values so that `{` inside + // paired tokens ((), [], functions) is not misidentified. + self::consume_component_value( $processor ); + } while ( $processor->next_token() ); + + // EOF without finding a block → return nothing (syntax error). + return false; + } + + /** + * Consumes a component value from the token stream. + * + * The processor must be positioned on the current token. If the token + * opens a paired block — `(`, `[`, or a function token — the entire + * block is consumed up to the matching close token or EOF. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-component-value + */ + private static function consume_component_value( WP_CSS_Token_Processor $processor ): void { + $type = $processor->get_token_type(); + + if ( WP_CSS_Token_Processor::TOKEN_LEFT_PAREN === $type || WP_CSS_Token_Processor::TOKEN_FUNCTION === $type ) { + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_PAREN ); + } elseif ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACKET === $type ) { + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_BRACKET ); + } elseif ( WP_CSS_Token_Processor::TOKEN_LEFT_BRACE === $type ) { + self::consume_simple_block( $processor, WP_CSS_Token_Processor::TOKEN_RIGHT_BRACE ); + } + } + + /** + * Consumes the remnants of a bad declaration from the token stream. + * + * The processor must be positioned on the current token. + * Consumes it as a component value, then advances consuming + * component values until a semicolon or EOF is reached. + * + * @param WP_CSS_Token_Processor $processor The token processor. + */ + private static function consume_the_remnants_of_a_bad_declaration( WP_CSS_Token_Processor $processor ): void { + self::consume_component_value( $processor ); + while ( $processor->next_token() ) { + if ( WP_CSS_Token_Processor::TOKEN_SEMICOLON === $processor->get_token_type() ) { + return; + } + self::consume_component_value( $processor ); + } + } + + /** + * Consumes a simple block from the token stream. + * + * The processor must be positioned on the opening token. + * Consumes tokens until the matching ending token or EOF. + * Nested component values (paired tokens) are consumed recursively. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-simple-block + * + * @param WP_CSS_Token_Processor $processor The token processor. + * @param string $ending_token The token type that closes this block. + */ + private static function consume_simple_block( WP_CSS_Token_Processor $processor, string $ending_token ): void { + while ( $processor->next_token() ) { + if ( $ending_token === $processor->get_token_type() ) { + return; + } + + self::consume_component_value( $processor ); + } + // EOF: block is still returned per spec. + } +} diff --git a/src/wp-includes/css-api/class-wp-css-token-processor.php b/src/wp-includes/css-api/class-wp-css-token-processor.php new file mode 100644 index 0000000000000..1599a8a584882 --- /dev/null +++ b/src/wp-includes/css-api/class-wp-css-token-processor.php @@ -0,0 +1,1785 @@ + Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) + * > code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE + * > FEED (LF) in input by a single U+000A LINE FEED (LF) code point. + * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT + * > CHARACTER (�). + * + * This processor delays normalization as much as possible. That keeps the raw byte + * positions intact for accurate rewrites while still letting consumers ask for a + * normalized token when they need one. + * + * ### No EOF token + * + * The EOF token is a CSS parsing concept, not CSS tokenization concept. Therefore, + * this processor does not produce it. + * + * ### UTF-8 handling + * + * Only UTF-8 strings are supported. Invalid sequences are replaced with U+FFFD (�) + * using the maximal subpart approach described in + * https://www.unicode.org/versions/Unicode9.0.0/ch03.pdf, section 3.9 Best Practices + * for Using U+FFFD. + * + * ## Usage + * + * Basic iteration: + * + * $css = 'width: 10px;'; + * $processor = WP_CSS_Token_Processor::create( $css ); + * while ( $processor->next_token() ) { + * echo $processor->get_normalized_token(); + * } + * // Outputs: + * // width: 10px; + * + * Rewriting a URL while keeping the rest of the stylesheet intact: + * + * $css = 'background: url(old.jpg) center / cover;'; + * $processor = WP_CSS_Token_Processor::create( $css ); + * while ( $processor->next_token() ) { + * if ( WP_CSS_Token_Processor::TOKEN_URL === $processor->get_token_type() ) { + * $processor->set_value( 'uploads/new.jpg' ); + * } + * } + * $result = $processor->get_updated_css(); + * // background: url(uploads/new.jpg) center / cover; + * + * Gathering diagnostics with byte offsets: + * + * $css = "color: red;\ncolor: re\nd;"; + * $processor = WP_CSS_Token_Processor::create( $css ); + * $bad_strings = array(); + * while ( $processor->next_token() ) { + * if ( WP_CSS_Token_Processor::TOKEN_BAD_STRING === $processor->get_token_type() ) { + * $bad_strings[] = array( + * 'start' => $processor->get_token_start(), + * 'length' => $processor->get_token_length(), + * 'value' => $processor->get_unnormalized_token(), + * ); + * } + * } + * + * @see https://www.w3.org/TR/css-syntax-3/#tokenization + */ +class WP_CSS_Token_Processor { + /** + * Token type constants matching the CSS Syntax Level 3 specification. + * + * @see https://www.w3.org/TR/css-syntax-3/#tokenization + */ + public const TOKEN_WHITESPACE = 'whitespace-token'; + public const TOKEN_COMMENT = 'comment'; + public const TOKEN_STRING = 'string-token'; + + /** + * BAD-STRING tokens occur when a string contains an unescaped newline. + * + * Valid strings: "hello", 'world', "line1\Aline2" (escaped newline) + * Invalid (produces bad-string): "hello + * world" (literal newline breaks the string) + * + * The processor stops at the newline and produces a bad-string token for error recovery. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-bad-string-token + */ + public const TOKEN_BAD_STRING = 'bad-string-token'; + public const TOKEN_HASH = 'hash-token'; + public const TOKEN_DELIM = 'delim-token'; + public const TOKEN_NUMBER = 'number-token'; + public const TOKEN_PERCENTAGE = 'percentage-token'; + public const TOKEN_DIMENSION = 'dimension-token'; + public const TOKEN_AT_KEYWORD = 'at-keyword-token'; + public const TOKEN_COLON = 'colon-token'; + public const TOKEN_SEMICOLON = 'semicolon-token'; + public const TOKEN_COMMA = 'comma-token'; + public const TOKEN_LEFT_PAREN = '(-token'; + public const TOKEN_RIGHT_PAREN = ')-token'; + public const TOKEN_LEFT_BRACKET = '[-token'; + public const TOKEN_RIGHT_BRACKET = ']-token'; + public const TOKEN_LEFT_BRACE = '{-token'; + public const TOKEN_RIGHT_BRACE = '}-token'; + public const TOKEN_FUNCTION = 'function-token'; + + /** + * URL tokens represent unquoted URLs in url() notation. + * + * For example, `url(image.jpg)` is a URL token. + * + * Quoted URLs like `url( "https://example.com" )` are handled as a function + * token, _not_ a URL token. + * + * Bad URL tokens are created when invalid characters are encountered in + * a URL token. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-url-token + */ + public const TOKEN_URL = 'url-token'; + + /** + * BAD-URL tokens occur when a URL contains invalid characters. + * + * Invalid characters: quotes ("), apostrophes ('), parentheses (() + * Example invalid: url(image(.jpg) or url(image".jpg) + * + * When detected, the processor consumes everything up to ) or EOF. + * This prevents the bad URL from breaking subsequent tokens. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-bad-url-token + */ + public const TOKEN_BAD_URL = 'bad-url-token'; + + /** + * Identifier tokens, such as `color`, `margin-top`, `red`, + * `inherit`, `--my-var`, `\x-escaped`, `über` (Unicode), etc. + * + * There are restrictions on the codepoints that start or are contained in + * an identifier, and identifiers may contain escape sequences. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-ident-token + */ + public const TOKEN_IDENT = 'ident-token'; + + /** + * CDC (Comment Delimiter Close) token: --> + * + * Legacy token from when CSS was embedded in HTML + * + * Modern CSS no longer needs these, but they're preserved for compatibility. + * In stylesheets, they're typically treated like whitespace. + * + * @see https://www.w3.org/TR/css-syntax-3/#typedef-CDC-token + */ + public const TOKEN_CDC = 'CDC-token'; + + /** + * CDO (Comment Delimiter Open) token: ) + * + * Comment Delimiter Close - legacy HTML comment syntax in CSS. + * + * @see https://www.w3.org/TR/css-syntax-3/#CDC-token-diagram + */ + if ( + $this->at + 2 < $this->length && + '-' === $this->css[ $this->at + 1 ] && + '>' === $this->css[ $this->at + 2 ] + ) { + // Consume them and return a . + $this->at += 3; + $this->token_type = self::TOKEN_CDC; + $this->token_length = 3; + return true; + } + + // Otherwise, if the input stream starts with an ident sequence, + // reconsume the current input code point, consume an ident-like + // token, and return it. + if ( $this->check_if_3_code_points_start_an_ident_sequence( $this->at ) ) { + return $this->consume_ident_like(); + } + + // Otherwise, return a with its value set to the current input code point. + ++$this->at; + $this->token_type = self::TOKEN_DELIM; + $this->token_length = 1; + return true; + } + + /* + * U+003C LESS-THAN SIGN (<) + * If followed by !--, this is a CDO token (\n", + "tokens": [ + { + "type": "CDC-token", + "raw": "-->", + "startIndex": 0, + "endIndex": 3, + "normalized": "-->", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0001": { + "css": "foo\n", + "tokens": [ + { + "type": "ident-token", + "raw": "foo", + "startIndex": 0, + "endIndex": 3, + "normalized": "foo", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0002": { + "css": "--\n", + "tokens": [ + { + "type": "ident-token", + "raw": "--", + "startIndex": 0, + "endIndex": 2, + "normalized": "--", + "value": "--" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 2, + "endIndex": 3, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0003": { + "css": "--0\n", + "tokens": [ + { + "type": "ident-token", + "raw": "--0", + "startIndex": 0, + "endIndex": 3, + "normalized": "--0", + "value": "--0" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0004": { + "css": "-\\\n", + "tokens": [ + { + "type": "delim-token", + "raw": "-", + "startIndex": 0, + "endIndex": 1, + "normalized": "-", + "value": "-" + }, + { + "type": "delim-token", + "raw": "\\", + "startIndex": 1, + "endIndex": 2, + "normalized": "\\", + "value": "\\" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 2, + "endIndex": 3, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0005": { + "css": "-\\ \n", + "tokens": [ + { + "type": "ident-token", + "raw": "-\\ ", + "startIndex": 0, + "endIndex": 3, + "normalized": "- ", + "value": "- " + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0006": { + "css": "--💅\n", + "tokens": [ + { + "type": "ident-token", + "raw": "--💅", + "startIndex": 0, + "endIndex": 6, + "normalized": "--💅", + "value": "--💅" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 6, + "endIndex": 7, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0007": { + "css": "-§\n", + "tokens": [ + { + "type": "ident-token", + "raw": "-§", + "startIndex": 0, + "endIndex": 3, + "normalized": "-§", + "value": "-§" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0008": { + "css": "-×\n", + "tokens": [ + { + "type": "ident-token", + "raw": "-×", + "startIndex": 0, + "endIndex": 3, + "normalized": "-×", + "value": "-×" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 3, + "endIndex": 4, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident/0009": { + "css": "--a𐀀\n", + "tokens": [ + { + "type": "ident-token", + "raw": "--a𐀀", + "startIndex": 0, + "endIndex": 7, + "normalized": "--a𐀀", + "value": "--a𐀀" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 7, + "endIndex": 8, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0001": { + "css": "url(foo)\n", + "tokens": [ + { + "type": "url-token", + "raw": "url(foo)", + "startIndex": 0, + "endIndex": 8, + "normalized": "url(foo)", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 8, + "endIndex": 9, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0002": { + "css": "\\75 Rl(foo)\n", + "tokens": [ + { + "type": "url-token", + "raw": "\\75 Rl(foo)", + "startIndex": 0, + "endIndex": 11, + "normalized": "uRl(foo)", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 11, + "endIndex": 12, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0003": { + "css": "uR\\6c (foo)\n", + "tokens": [ + { + "type": "url-token", + "raw": "uR\\6c (foo)", + "startIndex": 0, + "endIndex": 11, + "normalized": "uRl(foo)", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 11, + "endIndex": 12, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0004": { + "css": "url('foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "url(", + "startIndex": 0, + "endIndex": 4, + "normalized": "url(", + "value": "url" + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 4, + "endIndex": 9, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 9, + "endIndex": 10, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 10, + "endIndex": 11, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0005": { + "css": "url( 'foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "url(", + "startIndex": 0, + "endIndex": 4, + "normalized": "url(", + "value": "url" + }, + { + "type": "whitespace-token", + "raw": " ", + "startIndex": 4, + "endIndex": 5, + "normalized": " ", + "value": null + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 5, + "endIndex": 10, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 10, + "endIndex": 11, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 11, + "endIndex": 12, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0006": { + "css": "url( 'foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "url(", + "startIndex": 0, + "endIndex": 4, + "normalized": "url(", + "value": "url" + }, + { + "type": "whitespace-token", + "raw": " ", + "startIndex": 4, + "endIndex": 6, + "normalized": " ", + "value": null + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 6, + "endIndex": 11, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 11, + "endIndex": 12, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 12, + "endIndex": 13, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0007": { + "css": "url( 'foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "url(", + "startIndex": 0, + "endIndex": 4, + "normalized": "url(", + "value": "url" + }, + { + "type": "whitespace-token", + "raw": " ", + "startIndex": 4, + "endIndex": 7, + "normalized": " ", + "value": null + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 7, + "endIndex": 12, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 12, + "endIndex": 13, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 13, + "endIndex": 14, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0008": { + "css": "not-url( 'foo')\n", + "tokens": [ + { + "type": "function-token", + "raw": "not-url(", + "startIndex": 0, + "endIndex": 8, + "normalized": "not-url(", + "value": "not-url" + }, + { + "type": "whitespace-token", + "raw": " ", + "startIndex": 8, + "endIndex": 11, + "normalized": " ", + "value": null + }, + { + "type": "string-token", + "raw": "'foo'", + "startIndex": 11, + "endIndex": 16, + "normalized": "'foo'", + "value": "foo" + }, + { + "type": ")-token", + "raw": ")", + "startIndex": 16, + "endIndex": 17, + "normalized": ")", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 17, + "endIndex": 18, + "normalized": "\n", + "value": null + } + ] + }, + "tests/ident-like/0009": { + "css": "url( foo)\n", + "tokens": [ + { + "type": "url-token", + "raw": "url( foo)", + "startIndex": 0, + "endIndex": 11, + "normalized": "url( foo)", + "value": "foo" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 11, + "endIndex": 12, + "normalized": "\n", + "value": null + } + ] + }, + "tests/left-curly-bracket/0001": { + "css": "{\n", + "tokens": [ + { + "type": "{-token", + "raw": "{", + "startIndex": 0, + "endIndex": 1, + "normalized": "{", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 1, + "endIndex": 2, + "normalized": "\n", + "value": null + } + ] + }, + "tests/left-parenthesis/0001": { + "css": "(\n", + "tokens": [ + { + "type": "(-token", + "raw": "(", + "startIndex": 0, + "endIndex": 1, + "normalized": "(", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 1, + "endIndex": 2, + "normalized": "\n", + "value": null + } + ] + }, + "tests/left-square-bracket/0001": { + "css": "[\n", + "tokens": [ + { + "type": "[-token", + "raw": "[", + "startIndex": 0, + "endIndex": 1, + "normalized": "[", + "value": null + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 1, + "endIndex": 2, + "normalized": "\n", + "value": null + } + ] + }, + "tests/less-than/0001": { + "css": "<\n", + "tokens": [ + { + "type": "delim-token", + "raw": "<", + "startIndex": 0, + "endIndex": 1, + "normalized": "<", + "value": "<" + }, + { + "type": "whitespace-token", + "raw": "\n", + "startIndex": 1, + "endIndex": 2, + "normalized": "\n", + "value": null + } + ] + }, + "tests/less-than/0002": { + "css": " + + + + +wpCssProcessor.php +wpCssTokenProcessor.php +wpCssBuilder.php + + + + + diff --git a/tests/phpunit/tests/css-api/wpCssBuilder.php b/tests/phpunit/tests/css-api/wpCssBuilder.php new file mode 100644 index 0000000000000..91bae79b0bec7 --- /dev/null +++ b/tests/phpunit/tests/css-api/wpCssBuilder.php @@ -0,0 +1,150 @@ +assertSame( $expected, WP_CSS_Builder::string( $input ) ); + + /** + * Ensure that a single, equivalent CSS string is produced. + * + * CSS pre-processing normalization is applied to the input to ensure + * a match can be found. + * + * @see https://www.w3.org/TR/css-syntax-3/#input-preprocessing + */ + $processor = WP_CSS_Token_Processor::create( WP_CSS_Builder::string( $input ) ); + $processor->next_token(); + $this->assertSame( WP_CSS_Token_Processor::TOKEN_STRING, $processor->get_token_type() ); + $expected_decoded_value = strtr( + $input, + array( + "\r\n" => "\n", + "\r" => "\n", + "\f" => "\n", + "\0" => '�', + ) + ); + $this->assertSame( $expected_decoded_value, $processor->get_token_value() ); + $this->assertFalse( $processor->next_token() ); + } + + /** + * Data provider for string escaping cases. + * + * @return array + */ + public static function data_string_escaping(): array { + return array( + // Passthrough — no escaping needed, only quoting. + 'empty string' => array( '', '""' ), + 'simple ASCII' => array( 'Arial', '"Arial"' ), + 'spaces preserved' => array( 'Exo 2', '"Exo 2"' ), + 'leading/trailing spaces' => array( ' Arial ', '" Arial "' ), + 'whitespace-only' => array( ' ', '" "' ), + 'numbers pass through' => array( '12345', '"12345"' ), + 'non-ASCII passthrough' => array( 'café', '"café"' ), + + // Backslash escaping — must happen first to prevent double-escaping. + 'backslash' => array( 'Back\\Slash', '"Back\5C Slash"' ), + 'double backslash' => array( '\\\\', '"\5C \5C "' ), + 'backslash before quote' => array( "a\\'b", '"a\5C \27 b"' ), + + // NULL byte → U+FFFD replacement character. + 'null byte' => array( "a\0b", '"a�b"' ), + + // Newline normalization — all variants become \A escape. + 'LF' => array( "a\nb", '"a\A b"' ), + 'CR' => array( "a\rb", '"a\A b"' ), + 'CRLF as single escape' => array( "a\r\nb", '"a\A b"' ), + 'form feed' => array( "a\fb", '"a\A b"' ), + + // HTML-problematic characters. + 'HTML characters < > &' => array( 'ac&d', '"a\3C b\3E c\26 d"' ), + + // CSS-problematic characters. + 'CSS syntax , ; { }' => array( 'a,b;c{d}', '"a\2C b\3B c\7B d\7D "' ), + 'single quote' => array( "CSS's strings", '"CSS\27 s strings"' ), + 'double quote' => array( 'Say "Hi"', '"Say \22 Hi\22 "' ), + ); + } + + /** + * Tests the example from the class docblock. + * + * @covers ::string + */ + public function test_docblock_example(): void { + $value = 'CSS & a " ", ), 'expected' => array( - 'fontFamily' => '"Open Sans"', + 'fontFamily' => '"Open Sans\\3C /style\\3E \\3C script\\3E alert(\\27 XSS\\27 )\\3B \\3C /script\\3E "', 'fontStyle' => 'oblique 20deg 50deg', 'fontWeight' => '200', 'src' => 'https://example.com//stylescriptalert(\'XSS\');/script%20%20%20%20%20%20',