From 258a8efe737e14181ea5ecf0eb87f4a1ace602aa Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Sat, 13 Dec 2025 09:49:13 +0100 Subject: [PATCH 1/5] rustdoc: Add tree-sitter syntax highlighting for non-Rust code blocks Integrate arborium (tree-sitter based highlighting) to provide syntax highlighting for non-Rust code blocks in documentation. Previously, code blocks like ```python or ```javascript were rendered as plain text. Supported languages: bash, c, cpp, css, go, html, java, javascript, json, python, ruby, sql, toml, typescript, yaml. The highlighting uses custom HTML elements (a-k for keywords, a-s for strings, etc.) which are styled via CSS to match rustdoc's existing color scheme across all themes (light, dark, ayu). --- Cargo.lock | 258 +++++++++++++++++++++ src/librustdoc/Cargo.toml | 17 ++ src/librustdoc/html/highlight.rs | 26 +++ src/librustdoc/html/markdown.rs | 20 +- src/librustdoc/html/static/css/rustdoc.css | 29 +++ 5 files changed, 343 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d189eddc3b6bb..3fdf24751f164 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -170,6 +170,240 @@ dependencies = [ "object 0.37.3", ] +[[package]] +name = "arborium" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7ae3fd5b382cf9f09e1613da6214d8020c4868eb1358627eb3628bf87bb30fb" +dependencies = [ + "arborium-bash", + "arborium-c", + "arborium-cpp", + "arborium-css", + "arborium-go", + "arborium-highlight", + "arborium-html", + "arborium-java", + "arborium-javascript", + "arborium-json", + "arborium-python", + "arborium-ruby", + "arborium-sql", + "arborium-theme", + "arborium-toml", + "arborium-tree-sitter", + "arborium-typescript", + "arborium-yaml", + "dlmalloc", +] + +[[package]] +name = "arborium-bash" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4be4da1f6f020cbed670305d2942818323064f1e0667fc5a780cfe48638ee5a" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-c" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4931371886b262191c6679945103b3679d358a70697e6c7d59cdf3cbe8b6e4e9" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-cpp" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01a0e5539a2cc0e8ed13b20bd52fa7511e2843baac1a775a577013e6e645c644" +dependencies = [ + "arborium-c", + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-css" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5f81a6fa983b8f12118ec33cda27da8e06e63fdeaeb1c71532590c399513102" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-go" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12618f8df880cfbf268e2d35f392302ec8b26397addff3fb154db79e981ef391" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-highlight" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e185c18bb04a51252e2c60d0d19324c438a6d3a71468ffa63674a116685d45de" +dependencies = [ + "arborium-theme", + "arborium-tree-sitter", + "streaming-iterator", +] + +[[package]] +name = "arborium-html" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17704d9e8ebc4a338e810cb1cc2aaedd770041773762902a397f17ac245f371e" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-java" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ca33a5c1bbe5b3e38f47cf82461b04bdfad8b4fe0362824805f7447ea96a0f8" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-javascript" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2069654043459d341aded42e5061242ab1298806a1603617ad5286d4cf2af73" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-json" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c85dacbcb3dc01dd26ba6004307873d4d3a2376adc218793d28e55f14758a5e9" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-python" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "411a3f3a95b92f41b41c07b8b62cd0abbe42f5dcf7178223928b9f2e793b1f65" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-ruby" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45c9a92f48efc376408177358507e22406ad8338cb371d79b696fba94dd9ab20" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-sql" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d479480c7beac1e789be25235f1dea72e27ddb11919789befb84c51bb1139ba5" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-sysroot" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5942714de3eb1c3d0c5607e30284e314ddea3698a76a0cf3e7686274ac57802f" + +[[package]] +name = "arborium-theme" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac1ce1ffd21913c61768880c08567e3fbe3a966e7501ce1751f1238316b6fb45" +dependencies = [ + "toml 0.8.23", +] + +[[package]] +name = "arborium-toml" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8671c543a4c77cf32cd39e446c7d25e8cfd74cf6032873a25b35b76b663c9fe5" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-tree-sitter" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e056bac05e8c35fdc66754d7028e32f14bbf190e30ff6b2e5a67162749ccfe26" +dependencies = [ + "arborium-sysroot", + "cc", + "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "arborium-typescript" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "243273d737c8c0d26b5e7ed2a09c3eb772b95df508313678494058551e3ad53d" +dependencies = [ + "arborium-javascript", + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + +[[package]] +name = "arborium-yaml" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "748abb1f9d238ae3f3be5b9adb9416edf68a2b331fe7c2b5ef566c72fc20391f" +dependencies = [ + "arborium-sysroot", + "cc", + "tree-sitter-language", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -1265,6 +1499,17 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" +[[package]] +name = "dlmalloc" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6738d2e996274e499bc7b0d693c858b7720b9cd2543a0643a3087e6cb0a4fa16" +dependencies = [ + "cfg-if", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "dyn-clone" version = "1.0.20" @@ -4861,6 +5106,7 @@ dependencies = [ name = "rustdoc" version = "0.0.0" dependencies = [ + "arborium", "arrayvec", "askama", "base64", @@ -5329,6 +5575,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "string_cache" version = "0.8.9" @@ -5851,6 +6103,12 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tree-sitter-language" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" + [[package]] name = "twox-hash" version = "1.6.3" diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index dcfc1ffc251ec..fec4baa3955ab 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -9,6 +9,23 @@ path = "lib.rs" [dependencies] # tidy-alphabetical-start +arborium = { version = "1.2.3", default-features = false, features = [ + "lang-bash", + "lang-c", + "lang-cpp", + "lang-css", + "lang-go", + "lang-html", + "lang-java", + "lang-javascript", + "lang-json", + "lang-python", + "lang-ruby", + "lang-sql", + "lang-toml", + "lang-typescript", + "lang-yaml", +] } arrayvec = { version = "0.7", default-features = false } askama = { version = "0.14", default-features = false, features = ["alloc", "config", "derive"] } base64 = "0.21.7" diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 6f6345cd86664..e6e4fff133e18 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -1475,5 +1475,31 @@ fn string_without_closing_tag( } } +/// Highlights non-Rust code using arborium (tree-sitter based). +/// Returns `None` if the language is not supported, in which case +/// the caller should fall back to plain escaped text. +pub(crate) fn highlight_foreign_code(lang: &str, code: &str) -> Option { + use std::cell::RefCell; + + thread_local! { + static HIGHLIGHTER: RefCell = + RefCell::new(arborium::Highlighter::new()); + } + + // Map common language aliases to arborium grammar names + let lang = match lang { + "js" => "javascript", + "ts" => "typescript", + "py" => "python", + "rb" => "ruby", + "sh" | "shell" | "zsh" => "bash", + "yml" => "yaml", + "c++" | "cxx" => "cpp", + other => other, + }; + + HIGHLIGHTER.with_borrow_mut(|h| h.highlight_to_html(lang, code).ok()) +} + #[cfg(test)] mod tests; diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index a4d377432c914..2fe576c97a672 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -250,21 +250,27 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { LangString::parse_without_check(lang, self.check_error_codes); if !parse_result.rust { let added_classes = parse_result.added_classes; - let lang_string = if let Some(lang) = parse_result.unknown.first() { - format!("language-{lang}") - } else { - String::new() - }; + let lang = parse_result.unknown.first().map(|s| s.as_str()); + let lang_string = lang.map(|l| format!("language-{l}")).unwrap_or_default(); let whitespace = if added_classes.is_empty() { "" } else { " " }; + + // Try to highlight with arborium if we have a language + let code_html = lang + .and_then(|l| { + highlight::highlight_foreign_code(l, original_text.trim_suffix('\n')) + }) + .unwrap_or_else(|| { + Escape(original_text.trim_suffix('\n')).to_string() + }); + return Some(Event::Html( format!( "
\
\
-                                     {text}\
+                                     {code_html}\
                                  
\
", added_classes = added_classes.join(" "), - text = Escape(original_text.trim_suffix('\n')), ) .into(), )); diff --git a/src/librustdoc/html/static/css/rustdoc.css b/src/librustdoc/html/static/css/rustdoc.css index 69a79f2736e77..8326f14589c26 100644 --- a/src/librustdoc/html/static/css/rustdoc.css +++ b/src/librustdoc/html/static/css/rustdoc.css @@ -1685,6 +1685,35 @@ pre.rust .doccomment { color: var(--code-highlight-doc-comment-color); } +/* Arborium (tree-sitter) syntax highlighting for non-Rust code blocks */ +a-k { color: var(--code-highlight-kw-color); } +a-f { color: var(--code-highlight-kw-2-color); } +a-s { color: var(--code-highlight-string-color); } +a-c { color: var(--code-highlight-comment-color); } +a-t { color: var(--code-highlight-prelude-color); } +a-v { color: var(--code-highlight-self-color); } +a-co { color: var(--code-highlight-literal-color); } +a-n { color: var(--code-highlight-number-color); } +a-o { color: var(--code-highlight-kw-2-color); } +a-p { /* punctuation: inherit */ } +a-pr { color: var(--code-highlight-prelude-val-color); } +a-at { color: var(--code-highlight-attribute-color); } +a-tg { color: var(--code-highlight-attribute-color); } +a-m { color: var(--code-highlight-macro-color); } +a-l { color: var(--code-highlight-lifetime-color); } +a-ns { color: var(--code-highlight-prelude-color); } +a-cr { color: var(--code-highlight-prelude-val-color); } +a-tt { color: var(--code-highlight-doc-comment-color); font-weight: bold; } +a-st { font-weight: bold; } +a-em { font-style: italic; } +a-tu { color: var(--code-highlight-string-color); text-decoration: underline; } +a-tl { color: var(--code-highlight-literal-color); } +a-tx { text-decoration: line-through; } +a-da { color: #83a300; } +a-dd { color: #ee6868; } +a-eb { color: var(--code-highlight-macro-color); } +a-er { color: #ee6868; text-decoration: wavy underline; } + .rustdoc.src .example-wrap pre.rust a:not([data-nosnippet]) { background: var(--codeblock-link-background); } From 89da8706ba282e5c9c499ec8bf6c3c2385597ed2 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Sat, 13 Dec 2025 09:56:19 +0100 Subject: [PATCH 2/5] rustdoc: Add --highlight-foreign-code unstable flag Gate the arborium-based syntax highlighting behind an unstable flag (-Z unstable-options --highlight-foreign-code) so it can be tested before becoming the default behavior. The flag is threaded through: - RenderOptions in config.rs - SharedContext in context.rs - Markdown/MarkdownWithToc structs in markdown.rs - CodeBlocks iterator for actual highlighting --- src/librustdoc/config.rs | 4 +++ src/librustdoc/externalfiles.rs | 2 ++ src/librustdoc/html/markdown.rs | 42 +++++++++++++++++++++------ src/librustdoc/html/markdown/tests.rs | 3 ++ src/librustdoc/html/render/context.rs | 4 +++ src/librustdoc/html/render/mod.rs | 3 ++ src/librustdoc/html/render/sidebar.rs | 1 + src/librustdoc/lib.rs | 8 +++++ src/librustdoc/markdown.rs | 2 ++ 9 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/librustdoc/config.rs b/src/librustdoc/config.rs index e5a4593260a42..6ba2b9ffae878 100644 --- a/src/librustdoc/config.rs +++ b/src/librustdoc/config.rs @@ -312,6 +312,8 @@ pub(crate) struct RenderOptions { pub(crate) disable_minification: bool, /// If `true`, HTML source pages will generate the possibility to expand macros. pub(crate) generate_macro_expansion: bool, + /// If `true`, non-Rust code blocks will be syntax-highlighted using tree-sitter. + pub(crate) highlight_foreign_code: bool, } #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -805,6 +807,7 @@ impl Options { let no_capture = matches.opt_present("no-capture"); let generate_link_to_definition = matches.opt_present("generate-link-to-definition"); let generate_macro_expansion = matches.opt_present("generate-macro-expansion"); + let highlight_foreign_code = matches.opt_present("highlight-foreign-code"); let extern_html_root_takes_precedence = matches.opt_present("extern-html-root-takes-precedence"); let html_no_source = matches.opt_present("html-no-source"); @@ -916,6 +919,7 @@ impl Options { include_parts_dir, parts_out_dir, disable_minification, + highlight_foreign_code, }; Some((input, options, render_options, loaded_paths)) } diff --git a/src/librustdoc/externalfiles.rs b/src/librustdoc/externalfiles.rs index 42ade5b90048b..04a136c3e0cc7 100644 --- a/src/librustdoc/externalfiles.rs +++ b/src/librustdoc/externalfiles.rs @@ -47,6 +47,7 @@ impl ExternalHtml { edition, playground, heading_offset: HeadingOffset::H2, + highlight_foreign_code: false, } .write_into(&mut bc) .unwrap(); @@ -63,6 +64,7 @@ impl ExternalHtml { edition, playground, heading_offset: HeadingOffset::H2, + highlight_foreign_code: false, } .write_into(&mut ac) .unwrap(); diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 2fe576c97a672..0b66218d1aaec 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -20,6 +20,7 @@ //! edition: Edition::Edition2015, //! playground: &None, //! heading_offset: HeadingOffset::H2, +//! highlight_foreign_code: false, //! }; //! let mut html = String::new(); //! md.write_into(&mut html).unwrap(); @@ -99,6 +100,8 @@ pub struct Markdown<'a> { /// Offset at which we render headings. /// E.g. if `heading_offset: HeadingOffset::H2`, then `# something` renders an `

`. pub heading_offset: HeadingOffset, + /// Whether to syntax-highlight non-Rust code blocks using tree-sitter. + pub highlight_foreign_code: bool, } /// A struct like `Markdown` that renders the markdown with a table of contents. pub(crate) struct MarkdownWithToc<'a> { @@ -108,6 +111,7 @@ pub(crate) struct MarkdownWithToc<'a> { pub(crate) error_codes: ErrorCodes, pub(crate) edition: Edition, pub(crate) playground: &'a Option, + pub(crate) highlight_foreign_code: bool, } /// A tuple struct like `Markdown` that renders the markdown escaping HTML tags /// and includes no paragraph tags. @@ -210,6 +214,8 @@ struct CodeBlocks<'p, 'a, I: Iterator>> { // Information about the playground if a URL has been specified, containing an // optional crate name and the URL. playground: &'p Option, + // Whether to use tree-sitter highlighting for non-Rust code blocks. + highlight_foreign_code: bool, } impl<'p, 'a, I: Iterator>> CodeBlocks<'p, 'a, I> { @@ -218,8 +224,15 @@ impl<'p, 'a, I: Iterator>> CodeBlocks<'p, 'a, I> { error_codes: ErrorCodes, edition: Edition, playground: &'p Option, + highlight_foreign_code: bool, ) -> Self { - CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground } + CodeBlocks { + inner: iter, + check_error_codes: error_codes, + edition, + playground, + highlight_foreign_code, + } } } @@ -254,14 +267,17 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { let lang_string = lang.map(|l| format!("language-{l}")).unwrap_or_default(); let whitespace = if added_classes.is_empty() { "" } else { " " }; - // Try to highlight with arborium if we have a language - let code_html = lang - .and_then(|l| { + // Try to highlight with arborium if enabled and we have a language + let code_html = if self.highlight_foreign_code { + lang.and_then(|l| { highlight::highlight_foreign_code(l, original_text.trim_suffix('\n')) }) .unwrap_or_else(|| { Escape(original_text.trim_suffix('\n')).to_string() - }); + }) + } else { + Escape(original_text.trim_suffix('\n')).to_string() + }; return Some(Event::Html( format!( @@ -1354,6 +1370,7 @@ impl<'a> Markdown<'a> { edition, playground, heading_offset, + highlight_foreign_code, } = self; let replacer = move |broken_link: BrokenLink<'_>| { @@ -1371,7 +1388,7 @@ impl<'a> Markdown<'a> { let p = SpannedLinkReplacer::new(p, links); let p = footnotes::Footnotes::new(p, existing_footnotes); let p = TableWrapper::new(p.map(|(ev, _)| ev)); - CodeBlocks::new(p, codes, edition, playground) + CodeBlocks::new(p, codes, edition, playground, highlight_foreign_code) }) } @@ -1427,8 +1444,15 @@ impl<'a> Markdown<'a> { impl MarkdownWithToc<'_> { pub(crate) fn into_parts(self) -> (Toc, String) { - let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } = - self; + let MarkdownWithToc { + content: md, + links, + ids, + error_codes: codes, + edition, + playground, + highlight_foreign_code, + } = self; // This is actually common enough to special-case if md.is_empty() { @@ -1452,7 +1476,7 @@ impl MarkdownWithToc<'_> { let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1); let p = footnotes::Footnotes::new(p, existing_footnotes); let p = TableWrapper::new(p.map(|(ev, _)| ev)); - let p = CodeBlocks::new(p, codes, edition, playground); + let p = CodeBlocks::new(p, codes, edition, playground, highlight_foreign_code); html::push_html(&mut s, p); }); diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs index 61fd428746332..ad91d3477c349 100644 --- a/src/librustdoc/html/markdown/tests.rs +++ b/src/librustdoc/html/markdown/tests.rs @@ -306,6 +306,7 @@ fn test_header() { edition: DEFAULT_EDITION, playground: &None, heading_offset: HeadingOffset::H2, + highlight_foreign_code: false, } .write_into(&mut output) .unwrap(); @@ -359,6 +360,7 @@ fn test_header_ids_multiple_blocks() { edition: DEFAULT_EDITION, playground: &None, heading_offset: HeadingOffset::H2, + highlight_foreign_code: false, } .write_into(&mut output) .unwrap(); @@ -510,6 +512,7 @@ fn test_ascii_with_prepending_hashtag() { edition: DEFAULT_EDITION, playground: &None, heading_offset: HeadingOffset::H2, + highlight_foreign_code: false, } .write_into(&mut output) .unwrap(); diff --git a/src/librustdoc/html/render/context.rs b/src/librustdoc/html/render/context.rs index 3d4dff4a17d22..417d80a42e229 100644 --- a/src/librustdoc/html/render/context.rs +++ b/src/librustdoc/html/render/context.rs @@ -150,6 +150,8 @@ pub(crate) struct SharedContext<'tcx> { /// Controls whether we read / write to cci files in the doc root. Defaults read=true, /// write=true should_merge: ShouldMerge, + /// Whether to syntax-highlight non-Rust code blocks using tree-sitter. + pub(super) highlight_foreign_code: bool, } impl SharedContext<'_> { @@ -495,6 +497,7 @@ impl<'tcx> Context<'tcx> { call_locations, no_emit_shared, html_no_source, + highlight_foreign_code, .. } = options; @@ -580,6 +583,7 @@ impl<'tcx> Context<'tcx> { call_locations, should_merge: options.should_merge, expanded_codes, + highlight_foreign_code, }; let dst = output; diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 8740b5935973c..5c4a1f92e4dff 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -651,6 +651,7 @@ fn scrape_examples_help(shared: &SharedContext<'_>) -> String { edition: shared.edition(), playground: &shared.playground, heading_offset: HeadingOffset::H1, + highlight_foreign_code: shared.highlight_foreign_code, } .write_into(f)) ) @@ -693,6 +694,7 @@ fn render_markdown( edition: cx.shared.edition(), playground: &cx.shared.playground, heading_offset, + highlight_foreign_code: cx.shared.highlight_foreign_code, } .write_into(&mut *f)?; f.write_str("") @@ -2164,6 +2166,7 @@ fn render_impl( edition: cx.shared.edition(), playground: &cx.shared.playground, heading_offset: HeadingOffset::H4, + highlight_foreign_code: cx.shared.highlight_foreign_code, } .split_summary_and_content() }) diff --git a/src/librustdoc/html/render/sidebar.rs b/src/librustdoc/html/render/sidebar.rs index df9e8631bbdd6..021ef71c5f3f0 100644 --- a/src/librustdoc/html/render/sidebar.rs +++ b/src/librustdoc/html/render/sidebar.rs @@ -229,6 +229,7 @@ fn docblock_toc<'a>( error_codes: cx.shared.codes, edition: cx.shared.edition(), playground: &cx.shared.playground, + highlight_foreign_code: cx.shared.highlight_foreign_code, } .into_parts(); let links: Vec> = toc diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index c74c556b6210a..2785775ca393c 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -654,6 +654,14 @@ fn opts() -> Vec { "Add possibility to expand macros in the HTML source code pages", "", ), + opt( + Unstable, + Flag, + "", + "highlight-foreign-code", + "Syntax-highlight non-Rust code blocks using tree-sitter (arborium)", + "", + ), // deprecated / removed options opt( Stable, diff --git a/src/librustdoc/markdown.rs b/src/librustdoc/markdown.rs index 4ca2c104888bb..883cb5f64f04c 100644 --- a/src/librustdoc/markdown.rs +++ b/src/librustdoc/markdown.rs @@ -87,6 +87,7 @@ pub(crate) fn render_and_write>( error_codes, edition, playground: &playground, + highlight_foreign_code: options.highlight_foreign_code, } .write_into(f) } else { @@ -98,6 +99,7 @@ pub(crate) fn render_and_write>( edition, playground: &playground, heading_offset: HeadingOffset::H1, + highlight_foreign_code: options.highlight_foreign_code, } .write_into(f) } From bedc79f7413c4cbf3bd08c66c27360e90fb6c85c Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Sat, 13 Dec 2025 10:17:28 +0100 Subject: [PATCH 3/5] rustdoc: Add tests for foreign code highlighting Test cases: - Highlighting enabled: Python, JavaScript, JSON produce arborium tags - Language aliases work (py -> python, js -> javascript) - Highlighting disabled: no arborium tags produced - Unsupported languages fall back to plain escaped text --- src/librustdoc/html/markdown.rs | 9 +- src/librustdoc/html/markdown/tests.rs | 135 ++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 4 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 0b66218d1aaec..500867110da14 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -270,11 +270,12 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { // Try to highlight with arborium if enabled and we have a language let code_html = if self.highlight_foreign_code { lang.and_then(|l| { - highlight::highlight_foreign_code(l, original_text.trim_suffix('\n')) - }) - .unwrap_or_else(|| { - Escape(original_text.trim_suffix('\n')).to_string() + highlight::highlight_foreign_code( + l, + original_text.trim_suffix('\n'), + ) }) + .unwrap_or_else(|| Escape(original_text.trim_suffix('\n')).to_string()) } else { Escape(original_text.trim_suffix('\n')).to_string() }; diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs index ad91d3477c349..e12494bcfbe87 100644 --- a/src/librustdoc/html/markdown/tests.rs +++ b/src/librustdoc/html/markdown/tests.rs @@ -544,3 +544,138 @@ fn test_ascii_with_prepending_hashtag() { # hello", ); } + +#[test] +fn test_foreign_code_highlighting_enabled() { + fn t(input: &str, expected_contains: &str) { + let mut map = IdMap::new(); + let mut output = String::new(); + Markdown { + content: input, + links: &[], + ids: &mut map, + error_codes: ErrorCodes::Yes, + edition: DEFAULT_EDITION, + playground: &None, + heading_offset: HeadingOffset::H2, + highlight_foreign_code: true, + } + .write_into(&mut output) + .unwrap(); + assert!( + output.contains(expected_contains), + "expected output to contain {:?}, got: {}", + expected_contains, + output + ); + } + + // Python: keywords should be wrapped in + t( + r#"```python +def hello(): + pass +```"#, + "def", + ); + + // JavaScript: keywords and numbers + t( + r#"```javascript +let x = 42; +```"#, + "let", + ); + + // JSON: strings should be highlighted + t( + r#"```json +{"key": "value"} +```"#, + "", + ); + + // Language aliases should work + t( + r#"```py +def foo(): + pass +```"#, + "def", + ); + + t( + r#"```js +const x = 1; +```"#, + "const", + ); +} + +#[test] +fn test_foreign_code_highlighting_disabled() { + fn t(input: &str, should_not_contain: &str) { + let mut map = IdMap::new(); + let mut output = String::new(); + Markdown { + content: input, + links: &[], + ids: &mut map, + error_codes: ErrorCodes::Yes, + edition: DEFAULT_EDITION, + playground: &None, + heading_offset: HeadingOffset::H2, + highlight_foreign_code: false, + } + .write_into(&mut output) + .unwrap(); + assert!( + !output.contains(should_not_contain), + "expected output NOT to contain {:?}, got: {}", + should_not_contain, + output + ); + } + + // With highlighting disabled, no arborium tags should appear + t( + r#"```python +def hello(): + pass +```"#, + "", + ); + + t( + r#"```javascript +let x = 42; +```"#, + "", + ); +} + +#[test] +fn test_foreign_code_unsupported_language_fallback() { + // Unsupported languages should fall back to plain text even with highlighting enabled + let mut map = IdMap::new(); + let mut output = String::new(); + Markdown { + content: r#"```someunknownlang +hello world +```"#, + links: &[], + ids: &mut map, + error_codes: ErrorCodes::Yes, + edition: DEFAULT_EDITION, + playground: &None, + heading_offset: HeadingOffset::H2, + highlight_foreign_code: true, + } + .write_into(&mut output) + .unwrap(); + + // Should have the language class but no arborium tags + assert!(output.contains("language-someunknownlang")); + assert!(!output.contains(" Date: Sat, 13 Dec 2025 11:19:15 +0100 Subject: [PATCH 4/5] Update rustdoc output-default snapshot --- tests/run-make/rustdoc-default-output/output-default.stdout | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/run-make/rustdoc-default-output/output-default.stdout b/tests/run-make/rustdoc-default-output/output-default.stdout index 4e28be347cbb1..d078427175e24 100644 --- a/tests/run-make/rustdoc-default-output/output-default.stdout +++ b/tests/run-make/rustdoc-default-output/output-default.stdout @@ -201,6 +201,9 @@ Options: --generate-macro-expansion Add possibility to expand macros in the HTML source code pages + --highlight-foreign-code + Syntax-highlight non-Rust code blocks using + tree-sitter (arborium) --plugin-path DIR removed, see issue #44136 for From 58e1fec8c07c28d52a191ad6cb1313ecbbde65ca Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Sun, 14 Dec 2025 01:37:05 +0100 Subject: [PATCH 5/5] Upgrade to arborium 2.0.0, enable all grammars --- Cargo.lock | 89 +++++++++++++++++--------------- src/librustdoc/Cargo.toml | 2 +- src/librustdoc/html/highlight.rs | 2 +- 3 files changed, 48 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3fdf24751f164..16d79964238ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -172,9 +172,9 @@ dependencies = [ [[package]] name = "arborium" -version = "1.2.3" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7ae3fd5b382cf9f09e1613da6214d8020c4868eb1358627eb3628bf87bb30fb" +checksum = "f00a72aec131b90c1c6495c565c4e4842fdbaf137ea4cacec110d353937ab99d" dependencies = [ "arborium-bash", "arborium-c", @@ -199,9 +199,9 @@ dependencies = [ [[package]] name = "arborium-bash" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4be4da1f6f020cbed670305d2942818323064f1e0667fc5a780cfe48638ee5a" +checksum = "34c6fd5230512b1191480100dd7876e972d1d6fd8e04fc62950a46b2a185405a" dependencies = [ "arborium-sysroot", "cc", @@ -210,9 +210,9 @@ dependencies = [ [[package]] name = "arborium-c" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4931371886b262191c6679945103b3679d358a70697e6c7d59cdf3cbe8b6e4e9" +checksum = "6eb49d9d4d314d39e3ad41f891f3c46a417e21127c572621d3bb2b8acb0f67d2" dependencies = [ "arborium-sysroot", "cc", @@ -221,9 +221,9 @@ dependencies = [ [[package]] name = "arborium-cpp" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a0e5539a2cc0e8ed13b20bd52fa7511e2843baac1a775a577013e6e645c644" +checksum = "b3813b0fd9e61425fe387333eb77ff5a3ea890f66d89e1262a66372ecad1274f" dependencies = [ "arborium-c", "arborium-sysroot", @@ -233,9 +233,9 @@ dependencies = [ [[package]] name = "arborium-css" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5f81a6fa983b8f12118ec33cda27da8e06e63fdeaeb1c71532590c399513102" +checksum = "5d95540ee6ea2c33f40b45d9c40283a5c396e0ceb8529c4f2151932e43858a3b" dependencies = [ "arborium-sysroot", "cc", @@ -244,9 +244,9 @@ dependencies = [ [[package]] name = "arborium-go" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12618f8df880cfbf268e2d35f392302ec8b26397addff3fb154db79e981ef391" +checksum = "1d7f8df9adca3da8c9e36889e0f52ab359dd36d168bc677e65fce5f43ca66b0d" dependencies = [ "arborium-sysroot", "cc", @@ -255,9 +255,9 @@ dependencies = [ [[package]] name = "arborium-highlight" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e185c18bb04a51252e2c60d0d19324c438a6d3a71468ffa63674a116685d45de" +checksum = "2f2df668f0c80bfa2e437f74d45a4922a0e9256c2476560200774be4b60686f3" dependencies = [ "arborium-theme", "arborium-tree-sitter", @@ -266,10 +266,12 @@ dependencies = [ [[package]] name = "arborium-html" -version = "1.2.3" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17704d9e8ebc4a338e810cb1cc2aaedd770041773762902a397f17ac245f371e" +checksum = "a8d898910e534cddb0dca62ff3789b94637979b16d5354f153235438f58d29d6" dependencies = [ + "arborium-css", + "arborium-javascript", "arborium-sysroot", "cc", "tree-sitter-language", @@ -277,9 +279,9 @@ dependencies = [ [[package]] name = "arborium-java" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ca33a5c1bbe5b3e38f47cf82461b04bdfad8b4fe0362824805f7447ea96a0f8" +checksum = "b91053783a4c3806cdcf92a7009bfe765ebcf0a13cd49ee361751a68f1f2c10f" dependencies = [ "arborium-sysroot", "cc", @@ -288,9 +290,9 @@ dependencies = [ [[package]] name = "arborium-javascript" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2069654043459d341aded42e5061242ab1298806a1603617ad5286d4cf2af73" +checksum = "7220c71b1056508a028a4acfa4a10d8ca1713420ac3a36853dfd4a55bb335a4b" dependencies = [ "arborium-sysroot", "cc", @@ -299,9 +301,9 @@ dependencies = [ [[package]] name = "arborium-json" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85dacbcb3dc01dd26ba6004307873d4d3a2376adc218793d28e55f14758a5e9" +checksum = "775321ffff8b71819c6dfead9e62bcd1efc1616306e59f2e1317ba7bef282e1f" dependencies = [ "arborium-sysroot", "cc", @@ -310,9 +312,9 @@ dependencies = [ [[package]] name = "arborium-python" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "411a3f3a95b92f41b41c07b8b62cd0abbe42f5dcf7178223928b9f2e793b1f65" +checksum = "45c5db8f67870cc64b67933a644fb296b3c367e9bfd85aa222a1ff6d49883e25" dependencies = [ "arborium-sysroot", "cc", @@ -321,9 +323,9 @@ dependencies = [ [[package]] name = "arborium-ruby" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c9a92f48efc376408177358507e22406ad8338cb371d79b696fba94dd9ab20" +checksum = "7391c3561e43b892241ef216aafa974e1c3a59cd4e29d9cb9a7febc41645c0a8" dependencies = [ "arborium-sysroot", "cc", @@ -332,9 +334,9 @@ dependencies = [ [[package]] name = "arborium-sql" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d479480c7beac1e789be25235f1dea72e27ddb11919789befb84c51bb1139ba5" +checksum = "dc2c5bd1ed383d8ed8f0269d0cfddb475af9c98580d43f76cc4bb85a6f052633" dependencies = [ "arborium-sysroot", "cc", @@ -343,24 +345,25 @@ dependencies = [ [[package]] name = "arborium-sysroot" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5942714de3eb1c3d0c5607e30284e314ddea3698a76a0cf3e7686274ac57802f" +checksum = "6d25c6fe8f35b7803048ca9f0846432011510d5196eb1089cf3a4bb37c35d094" +dependencies = [ + "cc", + "dlmalloc", +] [[package]] name = "arborium-theme" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac1ce1ffd21913c61768880c08567e3fbe3a966e7501ce1751f1238316b6fb45" -dependencies = [ - "toml 0.8.23", -] +checksum = "9da38b2109b8af45b7e0bce0c96f7db1c17831a62a23ae586c5705efac635758" [[package]] name = "arborium-toml" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8671c543a4c77cf32cd39e446c7d25e8cfd74cf6032873a25b35b76b663c9fe5" +checksum = "b391b5bf276950b457d0b05efc0089c74b94e2f0939f8a63d98b4b84da5ebf12" dependencies = [ "arborium-sysroot", "cc", @@ -369,9 +372,9 @@ dependencies = [ [[package]] name = "arborium-tree-sitter" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e056bac05e8c35fdc66754d7028e32f14bbf190e30ff6b2e5a67162749ccfe26" +checksum = "936e30ab0ba24131c37823d6d087b4ab6b7d59c6dd26fd1f1470e50582dc07ba" dependencies = [ "arborium-sysroot", "cc", @@ -383,9 +386,9 @@ dependencies = [ [[package]] name = "arborium-typescript" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243273d737c8c0d26b5e7ed2a09c3eb772b95df508313678494058551e3ad53d" +checksum = "9cd96fae6737d469b2042e2f7c37e37e6276465ba1c15162182e4106c189fc41" dependencies = [ "arborium-javascript", "arborium-sysroot", @@ -395,9 +398,9 @@ dependencies = [ [[package]] name = "arborium-yaml" -version = "1.2.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "748abb1f9d238ae3f3be5b9adb9416edf68a2b331fe7c2b5ef566c72fc20391f" +checksum = "ef47437d2fe489ba8aa718dd1be3152b80b988b976183e84cfb1865a17cae1e8" dependencies = [ "arborium-sysroot", "cc", diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index fec4baa3955ab..069bd5a8b3610 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -9,7 +9,7 @@ path = "lib.rs" [dependencies] # tidy-alphabetical-start -arborium = { version = "1.2.3", default-features = false, features = [ +arborium = { version = "2.0.0", default-features = false, features = [ "lang-bash", "lang-c", "lang-cpp", diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index e6e4fff133e18..1a96b37ca4ea0 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -1498,7 +1498,7 @@ pub(crate) fn highlight_foreign_code(lang: &str, code: &str) -> Option { other => other, }; - HIGHLIGHTER.with_borrow_mut(|h| h.highlight_to_html(lang, code).ok()) + HIGHLIGHTER.with_borrow_mut(|h| h.highlight(lang, code).ok()) } #[cfg(test)]