[librustdoc] Reform lang string token splitting

Only split doctest lang strings on `,`, ` `, and `\t`. Additionally, to preserve backwards compatibility with pandoc-style langstrings, strip a surrounding `{}`, and remove leading `.`s from each token. Prior to this change, doctest lang strings were split on all non-alphanumeric characters except `-` or `_`, which limited future extensions to doctest lang string tokens, for example using `=` for key-value tokens. This is a breaking change, although it is not expected to be disruptive, because lang strings using separators other than `,` and ` ` are not very common
rust-lang · Feb 12, 2021 · 66f4883 · 66f4883
1 parent e9920ef
commit 66f4883
Show file tree

Hide file tree

Showing 6 changed files with 84 additions and 20 deletions.
diff --git a/compiler/rustc_error_codes/src/error_codes/E0761.md b/compiler/rustc_error_codes/src/error_codes/E0761.md
@@ -2,7 +2,7 @@ Multiple candidate files were found for an out-of-line module.
 
 Erroneous code example:
 
-```ignore (multiple source files required for compile_fail)
+```ignore (Multiple source files are required for compile_fail.)
 // file: ambiguous_module/mod.rs
 
 fn foo() {}

diff --git a/compiler/rustc_mir/src/dataflow/framework/mod.rs b/compiler/rustc_mir/src/dataflow/framework/mod.rs
@@ -10,7 +10,7 @@
 //! fixpoint solution to your dataflow problem, or implement the `ResultsVisitor` interface and use
 //! `visit_results`. The following example uses the `ResultsCursor` approach.
 //!
-//! ```ignore(cross-crate-imports)
+//! ```ignore (cross-crate-imports)
 //! use rustc_mir::dataflow::Analysis; // Makes `into_engine` available.
 //!
 //! fn do_my_analysis(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) {
@@ -211,7 +211,7 @@ pub trait Analysis<'tcx>: AnalysisDomain<'tcx> {
     /// default impl and the one for all `A: GenKillAnalysis` will do the right thing.
     /// Its purpose is to enable method chaining like so:
     ///
-    /// ```ignore(cross-crate-imports)
+    /// ```ignore (cross-crate-imports)
     /// let results = MyAnalysis::new(tcx, body)
     ///     .into_engine(tcx, body, def_id)
     ///     .iterate_to_fixpoint()

diff --git a/library/core/src/option.rs b/library/core/src/option.rs
@@ -336,7 +336,7 @@ impl<T> Option<T> {
     /// assert_eq!(x.expect("fruits are healthy"), "value");
     /// ```
     ///
-    /// ```{.should_panic}
+    /// ```should_panic
     /// let x: Option<&str> = None;
     /// x.expect("fruits are healthy"); // panics with `fruits are healthy`
     /// ```
@@ -372,7 +372,7 @@ impl<T> Option<T> {
     /// assert_eq!(x.unwrap(), "air");
     /// ```
     ///
-    /// ```{.should_panic}
+    /// ```should_panic
     /// let x: Option<&str> = None;
     /// assert_eq!(x.unwrap(), "air"); // fails
     /// ```
@@ -1114,7 +1114,7 @@ impl<T: fmt::Debug> Option<T> {
     /// }
     /// ```
     ///
-    /// ```{.should_panic}
+    /// ```should_panic
     /// #![feature(option_expect_none)]
     ///
     /// use std::collections::HashMap;
@@ -1156,7 +1156,7 @@ impl<T: fmt::Debug> Option<T> {
     /// }
     /// ```
     ///
-    /// ```{.should_panic}
+    /// ```should_panic
     /// #![feature(option_unwrap_none)]
     ///
     /// use std::collections::HashMap;

diff --git a/library/core/src/result.rs b/library/core/src/result.rs
@@ -112,7 +112,7 @@
 //! assert success with [`expect`]. This will panic if the
 //! write fails, providing a marginally useful message indicating why:
 //!
-//! ```{.no_run}
+//! ```no_run
 //! use std::fs::File;
 //! use std::io::prelude::*;
 //!
@@ -122,7 +122,7 @@
 //!
 //! You might also simply assert success:
 //!
-//! ```{.no_run}
+//! ```no_run
 //! # use std::fs::File;
 //! # use std::io::prelude::*;
 //! # let mut file = File::create("valuable_data.txt").unwrap();
@@ -984,7 +984,7 @@ impl<T, E: fmt::Debug> Result<T, E> {
     ///
     /// Basic usage:
     ///
-    /// ```{.should_panic}
+    /// ```should_panic
     /// let x: Result<u32, &str> = Err("emergency failure");
     /// x.expect("Testing expect"); // panics with `Testing expect: emergency failure`
     /// ```
@@ -1024,7 +1024,7 @@ impl<T, E: fmt::Debug> Result<T, E> {
     /// assert_eq!(x.unwrap(), 2);
     /// ```
     ///
-    /// ```{.should_panic}
+    /// ```should_panic
     /// let x: Result<u32, &str> = Err("emergency failure");
     /// x.unwrap(); // panics with `emergency failure`
     /// ```
@@ -1052,7 +1052,7 @@ impl<T: fmt::Debug, E> Result<T, E> {
     ///
     /// Basic usage:
     ///
-    /// ```{.should_panic}
+    /// ```should_panic
     /// let x: Result<u32, &str> = Ok(10);
     /// x.expect_err("Testing expect_err"); // panics with `Testing expect_err: 10`
     /// ```
@@ -1075,7 +1075,7 @@ impl<T: fmt::Debug, E> Result<T, E> {
     ///
     /// # Examples
     ///
-    /// ```{.should_panic}
+    /// ```should_panic
     /// let x: Result<u32, &str> = Ok(2);
     /// x.unwrap_err(); // panics with `2`
     /// ```

diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
@@ -779,6 +779,31 @@ impl LangString {
         Self::parse(string, allow_error_code_check, enable_per_target_ignores, None)
     }
 
+    fn tokens(string: &str) -> impl Iterator<Item = &str> {
+        // Pandoc, which Rust once used for generating documentation,
+        // expects lang strings to be surrounded by `{}` and for each token
+        // to be proceeded by a `.`. Since some of these lang strings are still
+        // loose in the wild, we strip a pair of surrounding `{}` from the lang
+        // string and a leading `.` from each token.
+
+        let string = string.trim();
+
+        let first = string.chars().next();
+        let last = string.chars().last();
+
+        let string = if first == Some('{') && last == Some('}') {
+            &string[1..string.len() - 1]
+        } else {
+            string
+        };
+
+        string
+            .split(|c| c == ',' || c == ' ' || c == '\t')
+            .map(str::trim)
+            .map(|token| if token.chars().next() == Some('.') { &token[1..] } else { token })
+            .filter(|token| !token.is_empty())
+    }
+
     fn parse(
         string: &str,
         allow_error_code_check: ErrorCodes,
@@ -792,11 +817,11 @@ impl LangString {
         let mut ignores = vec![];
 
         data.original = string.to_owned();
-        let tokens = string.split(|c: char| !(c == '_' || c == '-' || c.is_alphanumeric()));
+
+        let tokens = Self::tokens(string).collect::<Vec<&str>>();
 
         for token in tokens {
-            match token.trim() {
-                "" => {}
+            match token {
                 "should_panic" => {
                     data.should_panic = true;
                     seen_rust_tags = !seen_other_tags;
@@ -893,6 +918,7 @@ impl LangString {
                 _ => seen_other_tags = true,
             }
         }
+
         // ignore-foo overrides ignore
         if !ignores.is_empty() {
             data.ignore = Ignore::Some(ignores);

diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs
@@ -58,6 +58,9 @@ fn test_lang_string_parse() {
 
     t(Default::default());
     t(LangString { original: "rust".into(), ..Default::default() });
+    t(LangString { original: ".rust".into(), ..Default::default() });
+    t(LangString { original: "{rust}".into(), ..Default::default() });
+    t(LangString { original: "{.rust}".into(), ..Default::default() });
     t(LangString { original: "sh".into(), rust: false, ..Default::default() });
     t(LangString { original: "ignore".into(), ignore: Ignore::All, ..Default::default() });
     t(LangString {
@@ -75,16 +78,16 @@ fn test_lang_string_parse() {
         ..Default::default()
     });
     t(LangString { original: "allow_fail".into(), allow_fail: true, ..Default::default() });
-    t(LangString { original: "{.no_run .example}".into(), no_run: true, ..Default::default() });
+    t(LangString { original: "no_run,example".into(), no_run: true, ..Default::default() });
     t(LangString {
-        original: "{.sh .should_panic}".into(),
+        original: "sh,should_panic".into(),
         should_panic: true,
         rust: false,
         ..Default::default()
     });
-    t(LangString { original: "{.example .rust}".into(), ..Default::default() });
+    t(LangString { original: "example,rust".into(), ..Default::default() });
     t(LangString {
-        original: "{.test_harness .rust}".into(),
+        original: "test_harness,.rust".into(),
         test_harness: true,
         ..Default::default()
     });
@@ -100,6 +103,18 @@ fn test_lang_string_parse() {
         rust: false,
         ..Default::default()
     });
+    t(LangString {
+        original: "text,no_run, ".into(),
+        no_run: true,
+        rust: false,
+        ..Default::default()
+    });
+    t(LangString {
+        original: "text,no_run,".into(),
+        no_run: true,
+        rust: false,
+        ..Default::default()
+    });
     t(LangString {
         original: "edition2015".into(),
         edition: Some(Edition::Edition2015),
@@ -112,6 +127,29 @@ fn test_lang_string_parse() {
     });
 }
 
+#[test]
+fn test_lang_string_tokenizer() {
+    fn case(lang_string: &str, want: &[&str]) {
+        let have = LangString::tokens(lang_string).collect::<Vec<&str>>();
+        assert_eq!(have, want, "Unexpected lang string split for `{}`", lang_string);
+    }
+
+    case("", &[]);
+    case("foo", &["foo"]);
+    case("foo,bar", &["foo", "bar"]);
+    case(".foo,.bar", &["foo", "bar"]);
+    case("{.foo,.bar}", &["foo", "bar"]);
+    case("  {.foo,.bar}  ", &["foo", "bar"]);
+    case("foo bar", &["foo", "bar"]);
+    case("foo\tbar", &["foo", "bar"]);
+    case("foo\t, bar", &["foo", "bar"]);
+    case(" foo , bar ", &["foo", "bar"]);
+    case(",,foo,,bar,,", &["foo", "bar"]);
+    case("foo=bar", &["foo=bar"]);
+    case("a-b-c", &["a-b-c"]);
+    case("a_b_c", &["a_b_c"]);
+}
+
 #[test]
 fn test_header() {
     fn t(input: &str, expect: &str) {