From ca3e640f991081834db613ae4154daed4d734e81 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Fri, 24 Oct 2025 12:03:45 +0200
Subject: [PATCH 01/19] add tool to check sembr

---
 src/doc/rustc-dev-guide/.gitignore           |   1 +
 src/doc/rustc-dev-guide/ci/sembr/Cargo.lock  | 466 +++++++++++++++++++
 src/doc/rustc-dev-guide/ci/sembr/Cargo.toml  |  16 +
 src/doc/rustc-dev-guide/ci/sembr/src/main.rs | 254 ++++++++++
 4 files changed, 737 insertions(+)
 create mode 100644 src/doc/rustc-dev-guide/ci/sembr/Cargo.lock
 create mode 100644 src/doc/rustc-dev-guide/ci/sembr/Cargo.toml
 create mode 100644 src/doc/rustc-dev-guide/ci/sembr/src/main.rs

diff --git a/src/doc/rustc-dev-guide/.gitignore b/src/doc/rustc-dev-guide/.gitignore
index f03fcae753f41..f2e57fc68ffbf 100644
--- a/src/doc/rustc-dev-guide/.gitignore
+++ b/src/doc/rustc-dev-guide/.gitignore
@@ -1,6 +1,7 @@
 book
 
 ci/date-check/target/
+ci/sembr/target/
 
 # Generated by check-in.sh
 pulls.json
diff --git a/src/doc/rustc-dev-guide/ci/sembr/Cargo.lock b/src/doc/rustc-dev-guide/ci/sembr/Cargo.lock
new file mode 100644
index 0000000000000..1e690cc7f1e7c
--- /dev/null
+++ b/src/doc/rustc-dev-guide/ci/sembr/Cargo.lock
@@ -0,0 +1,466 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
+dependencies = [
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
+
+[[package]]
+name = "bstr"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.49"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "globset"
+version = "0.4.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3"
+dependencies = [
+ "aho-corasick",
+ "bstr",
+ "log",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
+]
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "ignore"
+version = "0.4.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81776e6f9464432afcc28d03e52eb101c93b6f0566f52aef2427663e700f0403"
+dependencies = [
+ "crossbeam-deque",
+ "globset",
+ "log",
+ "memchr",
+ "regex-automata",
+ "same-file",
+ "walkdir",
+ "winapi-util",
+]
+
+[[package]]
+name = "imara-diff"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c"
+dependencies = [
+ "hashbrown",
+ "memchr",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "log"
+version = "0.4.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
+
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "sembr"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "ignore",
+ "imara-diff",
+ "regex",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
diff --git a/src/doc/rustc-dev-guide/ci/sembr/Cargo.toml b/src/doc/rustc-dev-guide/ci/sembr/Cargo.toml
new file mode 100644
index 0000000000000..00818e2b3c2df
--- /dev/null
+++ b/src/doc/rustc-dev-guide/ci/sembr/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "sembr"
+edition = "2024"
+
+[dependencies]
+anyhow = "1"
+ignore = "0.4"
+imara-diff = "0.2"
+
+[dependencies.regex]
+version = "1"
+features = ["pattern"]
+
+[dependencies.clap]
+version = "4"
+features = ["derive"]
diff --git a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
new file mode 100644
index 0000000000000..6b3753d8b82fd
--- /dev/null
+++ b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
@@ -0,0 +1,254 @@
+use std::path::PathBuf;
+use std::sync::LazyLock;
+use std::{fs, process};
+
+use anyhow::Result;
+use clap::Parser;
+use ignore::Walk;
+use imara_diff::{Algorithm, BasicLineDiffPrinter, Diff, InternedInput, UnifiedDiffConfig};
+use regex::Regex;
+
+#[derive(Parser)]
+struct Cli {
+    root_dir: PathBuf,
+    #[arg(long)]
+    overwrite: bool,
+    #[arg(long, default_value_t = 100)]
+    line_length_limit: usize,
+    #[arg(long)]
+    show_diff: bool,
+}
+
+static REGEX_IGNORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\d\.|\-|\*)\s+").unwrap());
+static REGEX_IGNORE_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)$").unwrap());
+static REGEX_SPLIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)\s+").unwrap());
+
+fn main() -> Result<()> {
+    let cli = Cli::parse();
+    let mut compliant = Vec::new();
+    let mut not_compliant = Vec::new();
+    let mut made_compliant = Vec::new();
+    for result in Walk::new(cli.root_dir) {
+        let entry = result?;
+        if entry.file_type().expect("no stdin").is_dir() {
+            continue;
+        }
+        let path = entry.into_path();
+        if let Some(extension) = path.extension() {
+            if extension != "md" {
+                continue;
+            }
+            let old = fs::read_to_string(&path)?;
+            let new = lengthen_lines(&comply(&old), cli.line_length_limit);
+            if new == old {
+                compliant.push(path.clone());
+            } else if cli.overwrite {
+                fs::write(&path, new)?;
+                made_compliant.push(path.clone());
+            } else if cli.show_diff {
+                println!("{}:", path.display());
+                show_diff(&old, &new);
+                println!("---");
+            } else {
+                not_compliant.push(path.clone());
+            }
+        }
+    }
+    if !compliant.is_empty() {
+        display("compliant", &compliant);
+    }
+    if !made_compliant.is_empty() {
+        display("made compliant", &made_compliant);
+    }
+    if !not_compliant.is_empty() {
+        display("not compliant", &not_compliant);
+        process::exit(1);
+    }
+    Ok(())
+}
+
+fn show_diff(old: &str, new: &str) {
+    let input = InternedInput::new(old, new);
+    let mut diff = Diff::compute(Algorithm::Histogram, &input);
+    diff.postprocess_lines(&input);
+    let diff = diff
+        .unified_diff(&BasicLineDiffPrinter(&input.interner), UnifiedDiffConfig::default(), &input)
+        .to_string();
+    print!("{diff}");
+}
+
+fn display(header: &str, paths: &[PathBuf]) {
+    println!("{header}:");
+    for element in paths {
+        println!("- {}", element.display());
+    }
+}
+
+fn ignore(line: &str, in_code_block: bool) -> bool {
+    in_code_block
+        || line.contains("e.g.")
+        || line.contains("i.e.")
+        || line.contains('|')
+        || line.trim_start().starts_with('>')
+        || line.starts_with('#')
+        || line.trim().is_empty()
+        || REGEX_IGNORE.is_match(line)
+}
+
+fn comply(content: &str) -> String {
+    let content: Vec<_> = content.lines().map(std::borrow::ToOwned::to_owned).collect();
+    let mut new_content = content.clone();
+    let mut new_n = 0;
+    let mut in_code_block = false;
+    for (n, line) in content.into_iter().enumerate() {
+        if n != 0 {
+            new_n += 1;
+        }
+        if line.trim_start().starts_with("```") {
+            in_code_block = !in_code_block;
+            continue;
+        }
+        if ignore(&line, in_code_block) {
+            continue;
+        }
+        if REGEX_SPLIT.is_match(&line) {
+            let indent = line.find(|ch: char| !ch.is_whitespace()).unwrap();
+            let new_lines: Vec<_> = line
+                .split_inclusive(&*REGEX_SPLIT)
+                .map(|portion| format!("{:indent$}{}", "", portion.trim()))
+                .collect();
+            new_content.splice(new_n..=new_n, new_lines.clone());
+            new_n += new_lines.len() - 1;
+        }
+    }
+    new_content.join("\n") + "\n"
+}
+
+fn lengthen_lines(content: &str, limit: usize) -> String {
+    let content: Vec<_> = content.lines().map(std::borrow::ToOwned::to_owned).collect();
+    let mut new_content = content.clone();
+    let mut new_n = 0;
+    let mut in_code_block = false;
+    let mut skip_next = false;
+    for (n, line) in content.iter().enumerate() {
+        if skip_next {
+            skip_next = false;
+            continue;
+        }
+        if n != 0 {
+            new_n += 1;
+        }
+        if line.trim_start().starts_with("```") {
+            in_code_block = !in_code_block;
+            continue;
+        }
+        if ignore(line, in_code_block) || REGEX_SPLIT.is_match(line) {
+            continue;
+        }
+        let Some(next_line) = content.get(n + 1) else {
+            continue;
+        };
+        if ignore(next_line, in_code_block) || REGEX_IGNORE_END.is_match(line) {
+            continue;
+        }
+        if line.len() + next_line.len() < limit {
+            new_content[new_n] = format!("{line} {}", next_line.trim_start());
+            new_content.remove(new_n + 1);
+            skip_next = true;
+        }
+    }
+    new_content.join("\n") + "\n"
+}
+
+#[test]
+fn test_sembr() {
+    let original = "\
+# some. heading
+must! be; split?  and.   normalizes space
+1. ignore numbered
+ignore | tables
+ignore e.g. and i.e.
+- ignore. list
+* ignore. list
+```
+some code. block
+```
+some more text.
+";
+    let expected = "\
+# some. heading
+must!
+be;
+split?
+and.
+normalizes space
+1. ignore numbered
+ignore | tables
+ignore e.g. and i.e.
+- ignore. list
+* ignore. list
+```
+some code. block
+```
+some more text.
+";
+    assert_eq!(expected, comply(original));
+}
+
+#[test]
+fn test_prettify() {
+    let original = "\
+do not split
+short sentences
+";
+    let expected = "\
+do not split short sentences
+";
+    assert_eq!(expected, lengthen_lines(original, 50));
+}
+
+#[test]
+fn test_prettify_prefix_spaces() {
+    let original = "\
+ do not split
+ short sentences
+";
+    let expected = "\
+ do not split short sentences
+";
+    assert_eq!(expected, lengthen_lines(original, 50));
+}
+
+#[test]
+fn test_sembr_then_prettify() {
+    let original = "\
+hi there. do
+not split
+short sentences.
+hi again.
+";
+    let expected = "\
+hi there.
+do
+not split
+short sentences.
+hi again.
+";
+    let processed = comply(original);
+    assert_eq!(expected, processed);
+    let expected = "\
+hi there.
+do not split
+short sentences.
+hi again.
+";
+    let processed = lengthen_lines(&processed, 50);
+    assert_eq!(expected, processed);
+    let expected = "\
+hi there.
+do not split short sentences.
+hi again.
+";
+    let processed = lengthen_lines(&processed, 50);
+    assert_eq!(expected, processed);
+}

From 7dd4509c11e659c7986fe4fc66f7418dcc28ccac Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Mon, 27 Oct 2025 16:17:32 +0200
Subject: [PATCH 02/19] sample output

---
 src/doc/rustc-dev-guide/src/walkthrough.md | 207 +++++++++++----------
 1 file changed, 111 insertions(+), 96 deletions(-)

diff --git a/src/doc/rustc-dev-guide/src/walkthrough.md b/src/doc/rustc-dev-guide/src/walkthrough.md
index b4c3379347ed2..57d59d4d166a6 100644
--- a/src/doc/rustc-dev-guide/src/walkthrough.md
+++ b/src/doc/rustc-dev-guide/src/walkthrough.md
@@ -1,10 +1,10 @@
 # Walkthrough: a typical contribution
 
 There are _a lot_ of ways to contribute to the Rust compiler, including fixing
-bugs, improving performance, helping design features, providing feedback on
-existing features, etc. This chapter does not claim to scratch the surface.
-Instead, it walks through the design and implementation of a new feature. Not
-all of the steps and processes described here are needed for every
+bugs, improving performance, helping design features, providing feedback on existing features, etc.
+This chapter does not claim to scratch the surface.
+Instead, it walks through the design and implementation of a new feature.
+Not all of the steps and processes described here are needed for every
 contribution, and I will try to point those out as they arise.
 
 In general, if you are interested in making a contribution and aren't sure
@@ -12,8 +12,8 @@ where to start, please feel free to ask!
 
 ## Overview
 
-The feature I will discuss in this chapter is the `?` Kleene operator for
-macros. Basically, we want to be able to write something like this:
+The feature I will discuss in this chapter is the `?` Kleene operator for macros.
+Basically, we want to be able to write something like this:
 
 ```rust,ignore
 macro_rules! foo {
@@ -36,25 +36,30 @@ fn main() {
 So basically, the `$(pat)?` matcher in the macro means "this pattern can occur
 0 or 1 times", similar to other regex syntaxes.
 
-There were a number of steps to go from an idea to stable Rust feature. Here is
-a quick list.  We will go through each of these in order below. As I mentioned
-before, not all of these are needed for every type of contribution.
+There were a number of steps to go from an idea to stable Rust feature.
+Here is a quick list.
+We will go through each of these in order below.
+As I mentioned before, not all of these are needed for every type of contribution.
 
 - **Idea discussion/Pre-RFC**  A Pre-RFC is an early draft or design discussion
-  of a feature. This stage is intended to flesh out the design space a bit and
-  get a grasp on the different merits and problems with an idea. It's a great
-  way to get early feedback on your idea before presenting it to the wider
-  audience. You can find the original discussion [here][prerfc].
+  of a feature.
+  This stage is intended to flesh out the design space a bit and
+  get a grasp on the different merits and problems with an idea.
+  It's a great way to get early feedback on your idea before presenting it to the wider
+  audience.
+  You can find the original discussion [here][prerfc].
 - **RFC**  This is when you formally present your idea to the community for
-  consideration. You can find the RFC [here][rfc].
+  consideration.
+  You can find the RFC [here][rfc].
 - **Implementation** Implement your idea unstably in the compiler. You can
   find the original implementation [here][impl1].
 - **Possibly iterate/refine** As the community gets experience with your
   feature on the nightly compiler and in `std`, there may be additional
-  feedback about design choice that might be adjusted. This particular feature
-  went [through][impl2] a [number][impl3] of [iterations][impl4].
+  feedback about design choice that might be adjusted.
+  This particular feature went [through][impl2] a [number][impl3] of [iterations][impl4].
 - **Stabilization** When your feature has baked enough, a Rust team member may
-  [propose to stabilize it][merge]. If there is consensus, this is done.
+  [propose to stabilize it][merge].
+  If there is consensus, this is done.
 - **Relax** Your feature is now a stable Rust feature!
 
 [prerfc]: https://internals.rust-lang.org/t/pre-rfc-at-most-one-repetition-macro-patterns/6557
@@ -75,58 +80,63 @@ before, not all of these are needed for every type of contribution.
 
 [rfcwhen]: https://github.com/rust-lang/rfcs#when-you-need-to-follow-this-process
 
-An RFC is a document that describes the feature or change you are proposing in
-detail. Anyone can write an RFC; the process is the same for everyone,
-including Rust team members.
+An RFC is a document that describes the feature or change you are proposing in detail.
+Anyone can write an RFC;
+the process is the same for everyone, including Rust team members.
 
-To open an RFC, open a PR on the
-[rust-lang/rfcs](https://github.com/rust-lang/rfcs) repo on GitHub. You can
-find detailed instructions in the
+To open an RFC, open a PR on the [rust-lang/rfcs](https://github.com/rust-lang/rfcs) repo on GitHub.
+You can find detailed instructions in the
 [README](https://github.com/rust-lang/rfcs#what-the-process-is).
 
 Before opening an RFC, you should do the research to "flesh out" your idea.
-Hastily-proposed RFCs tend not to be accepted. You should generally have a good
-description of the motivation, impact, disadvantages, and potential
+Hastily-proposed RFCs tend not to be accepted.
+You should generally have a good description of the motivation, impact, disadvantages, and potential
 interactions with other features.
 
-If that sounds like a lot of work, it's because it is. But no fear! Even if
-you're not a compiler hacker, you can get great feedback by doing a _pre-RFC_.
-This is an _informal_ discussion of the idea. The best place to do this is
-internals.rust-lang.org. Your post doesn't have to follow any particular
-structure.  It doesn't even need to be a cohesive idea. Generally, you will get
-tons of feedback that you can integrate back to produce a good RFC.
+If that sounds like a lot of work, it's because it is.
+But no fear!
+Even if you're not a compiler hacker, you can get great feedback by doing a _pre-RFC_.
+This is an _informal_ discussion of the idea.
+The best place to do this is internals.rust-lang.org.
+Your post doesn't have to follow any particular structure.
+It doesn't even need to be a cohesive idea.
+Generally, you will get tons of feedback that you can integrate back to produce a good RFC.
 
-(Another pro-tip: try searching the RFCs repo and internals for prior related
-ideas. A lot of times an idea has already been considered and was either
-rejected or postponed to be tried again later. This can save you and everybody
-else some time)
+(Another pro-tip: try searching the RFCs repo and internals for prior related ideas.
+A lot of times an idea has already been considered and was either
+rejected or postponed to be tried again later.
+This can save you and everybody else some time)
 
 In the case of our example, a participant in the pre-RFC thread pointed out a
-syntax ambiguity and a potential resolution. Also, the overall feedback seemed
-positive. In this case, the discussion converged pretty quickly, but for some
+syntax ambiguity and a potential resolution.
+Also, the overall feedback seemed positive.
+In this case, the discussion converged pretty quickly, but for some
 ideas, a lot more discussion can happen (e.g. see [this RFC][nonascii] which
-received a whopping 684 comments!). If that happens, don't be discouraged; it
-means the community is interested in your idea, but it perhaps needs some
+received a whopping 684 comments!).
+If that happens, don't be discouraged;
+it means the community is interested in your idea, but it perhaps needs some
 adjustments.
 
 [nonascii]: https://github.com/rust-lang/rfcs/pull/2457
 
-The RFC for our `?` macro feature did receive some discussion on the RFC thread
-too.  As with most RFCs, there were a few questions that we couldn't answer by
-discussion: we needed experience using the feature to decide. Such questions
-are listed in the "Unresolved Questions" section of the RFC. Also, over the
-course of the RFC discussion, you will probably want to update the RFC document
+The RFC for our `?` macro feature did receive some discussion on the RFC thread too.
+As with most RFCs, there were a few questions that we couldn't answer by
+discussion: we needed experience using the feature to decide.
+Such questions are listed in the "Unresolved Questions" section of the RFC.
+Also, over the course of the RFC discussion, you will probably want to update the RFC document
 itself to reflect the course of the discussion (e.g. new alternatives or prior
 work may be added or you may decide to change parts of the proposal itself).
 
 In the end, when the discussion seems to reach a consensus and die down a bit,
 a Rust team member may propose to move to "final comment period" (FCP) with one
-of three possible dispositions. This means that they want the other members of
-the appropriate teams to review and comment on the RFC. More discussion may
-ensue, which may result in more changes or unresolved questions being added. At
-some point, when everyone is satisfied, the RFC enters the FCP, which is the
-last chance for people to bring up objections. When the FCP is over, the
-disposition is adopted. Here are the three possible dispositions:
+of three possible dispositions.
+This means that they want the other members of
+the appropriate teams to review and comment on the RFC.
+More discussion may ensue, which may result in more changes or unresolved questions being added.
+At some point, when everyone is satisfied, the RFC enters the FCP, which is the
+last chance for people to bring up objections.
+When the FCP is over, the disposition is adopted.
+Here are the three possible dispositions:
 
 - _Merge_: accept the feature. Here is the proposal to merge for our [`?` macro
   feature][rfcmerge].
@@ -136,14 +146,14 @@ disposition is adopted. Here are the three possible dispositions:
   will go a different direction.
 - _Postpone_: there is interest in going this direction but not at the moment.
   This happens most often because the appropriate Rust team doesn't have the
-  bandwidth to shepherd the feature through the process to stabilization. Often
-  this is the case when the feature doesn't fit into the team's roadmap.
+  bandwidth to shepherd the feature through the process to stabilization.
+  Often this is the case when the feature doesn't fit into the team's roadmap.
   Postponed ideas may be revisited later.
 
 [rfcmerge]: https://github.com/rust-lang/rfcs/pull/2298#issuecomment-360582667
 
-When an RFC is merged, the PR is merged into the RFCs repo. A new _tracking
-issue_ is created in the [rust-lang/rust] repo to track progress on the feature
+When an RFC is merged, the PR is merged into the RFCs repo.
+A new _tracking issue_ is created in the [rust-lang/rust] repo to track progress on the feature
 and discuss unresolved questions, implementation progress and blockers, etc.
 Here is the tracking issue on for our [`?` macro feature][tracking].
 
@@ -158,93 +168,98 @@ To make a change to the compiler, open a PR against the [rust-lang/rust] repo.
 [rust-lang/rust]: https://github.com/rust-lang/rust
 
 Depending on the feature/change/bug fix/improvement, implementation may be
-relatively-straightforward or it may be a major undertaking. You can always ask
-for help or mentorship from more experienced compiler devs.  Also, you don't
-have to be the one to implement your feature; but keep in mind that if you
-don't, it might be a while before someone else does.
+relatively-straightforward or it may be a major undertaking.
+You can always ask for help or mentorship from more experienced compiler devs.
+Also, you don't have to be the one to implement your feature;
+but keep in mind that if you don't, it might be a while before someone else does.
 
 For the `?` macro feature, I needed to go understand the relevant parts of
-macro expansion in the compiler. Personally, I find that [improving the
+macro expansion in the compiler.
+Personally, I find that [improving the
 comments][comments] in the code is a helpful way of making sure I understand
 it, but you don't have to do that if you don't want to.
 
 [comments]: https://github.com/rust-lang/rust/pull/47732
 
-I then [implemented][impl1] the original feature, as described in the RFC. When
-a new feature is implemented, it goes behind a _feature gate_, which means that
-you have to use `#![feature(my_feature_name)]` to use the feature. The feature
-gate is removed when the feature is stabilized.
+I then [implemented][impl1] the original feature, as described in the RFC.
+When a new feature is implemented, it goes behind a _feature gate_, which means that
+you have to use `#![feature(my_feature_name)]` to use the feature.
+The feature gate is removed when the feature is stabilized.
 
 **Most bug fixes and improvements** don't require a feature gate. You can just
 make your changes/improvements.
 
-When you open a PR on the [rust-lang/rust], a bot will assign your PR to a
-reviewer. If there is a particular Rust team member you are working with, you can
+When you open a PR on the [rust-lang/rust], a bot will assign your PR to a reviewer.
+If there is a particular Rust team member you are working with, you can
 request that reviewer by leaving a comment on the thread with `r?
 @reviewer-github-id` (e.g. `r? @eddyb`). If you don't know who to request,
-don't request anyone; the bot will assign someone automatically based on which files you changed.
+don't request anyone;
+the bot will assign someone automatically based on which files you changed.
 
 The reviewer may request changes before they approve your PR, they may mark the PR with label 
 "S-waiting-on-author" after leaving comments, this means that the PR is blocked on you to make 
-some requested changes. When you finished iterating on the changes, you can mark the PR as 
+some requested changes.
+When you finished iterating on the changes, you can mark the PR as
 `S-waiting-on-review` again by leaving a comment with `@rustbot ready`, this will remove the 
 `S-waiting-on-author` label and add the `S-waiting-on-review` label.
 
-Feel free to ask questions or discuss things you don't understand or disagree with. However,
-recognize that the PR won't be merged unless someone on the Rust team approves
-it. If a reviewer leave a comment like `r=me after fixing ...`, that means they approve the PR and 
+Feel free to ask questions or discuss things you don't understand or disagree with.
+However, recognize that the PR won't be merged unless someone on the Rust team approves
+it.
+If a reviewer leave a comment like `r=me after fixing ...`, that means they approve the PR and
 you can merge it with comment with `@bors r=reviewer-github-id`(e.g. `@bors r=eddyb`) to merge it 
-after fixing trivial issues. Note that `r=someone` requires permission and bors could say 
-something like "🔑 Insufficient privileges..." when commenting `r=someone`. In that case, 
-you have to ask the reviewer to revisit your PR.
-
-When your reviewer approves the PR, it will go into a queue for yet another bot
-called `@bors`. `@bors` manages the CI build/merge queue. When your PR reaches
-the head of the `@bors` queue, `@bors` will test out the merge by running all
-tests against your PR on GitHub Actions. This takes a lot of time to
-finish. If all tests pass, the PR is merged and becomes part of the next
-nightly compiler!
+after fixing trivial issues.
+Note that `r=someone` requires permission and bors could say
+something like "🔑 Insufficient privileges..." when commenting `r=someone`.
+In that case, you have to ask the reviewer to revisit your PR.
+
+When your reviewer approves the PR, it will go into a queue for yet another bot called `@bors`.
+`@bors` manages the CI build/merge queue.
+When your PR reaches the head of the `@bors` queue, `@bors` will test out the merge by running all
+tests against your PR on GitHub Actions.
+This takes a lot of time to finish.
+If all tests pass, the PR is merged and becomes part of the next nightly compiler!
 
 There are a couple of things that may happen for some PRs during the review process
 
 - If the change is substantial enough, the reviewer may request an FCP on
-  the PR. This gives all members of the appropriate team a chance to review the
-  changes.
+  the PR.
+  This gives all members of the appropriate team a chance to review the changes.
 - If the change may cause breakage, the reviewer may request a [crater] run.
   This compiles the compiler with your changes and then attempts to compile all
-  crates on crates.io with your modified compiler. This is a great smoke test
+  crates on crates.io with your modified compiler.
+  This is a great smoke test
   to check if you introduced a change to compiler behavior that affects a large
   portion of the ecosystem.
 - If the diff of your PR is large or the reviewer is busy, your PR may have
-  some merge conflicts with other PRs that happen to get merged first. You
-  should fix these merge conflicts using the normal git procedures.
+  some merge conflicts with other PRs that happen to get merged first.
+  You should fix these merge conflicts using the normal git procedures.
 
 [crater]: ./tests/crater.html
 
 If you are not doing a new feature or something like that (e.g. if you are
-fixing a bug), then that's it! Thanks for your contribution :)
+fixing a bug), then that's it!
+Thanks for your contribution :)
 
 ## Refining your implementation
 
 As people get experience with your new feature on nightly, slight changes may
-be proposed and unresolved questions may become resolved. Updates/changes go
-through the same process for implementing any other changes, as described
+be proposed and unresolved questions may become resolved.
+Updates/changes go through the same process for implementing any other changes, as described
 above (i.e. submit a PR, go through review, wait for `@bors`, etc).
 
-Some changes may be major enough to require an FCP and some review by Rust team
-members.
+Some changes may be major enough to require an FCP and some review by Rust team members.
 
 For the `?` macro feature, we went through a few different iterations after the
 original implementation: [1][impl2], [2][impl3], [3][impl4].
 
 Along the way, we decided that `?` should not take a separator, which was
-previously an unresolved question listed in the RFC. We also changed the
-disambiguation strategy: we decided to remove the ability to use `?` as a
+previously an unresolved question listed in the RFC.
+We also changed the disambiguation strategy: we decided to remove the ability to use `?` as a
 separator token for other repetition operators (e.g. `+` or `*`). However,
 since this was a breaking change, we decided to do it over an edition boundary.
 Thus, the new feature can be enabled only in edition 2018. These deviations
-from the original RFC required [another
-FCP](https://github.com/rust-lang/rust/issues/51934).
+from the original RFC required [another FCP](https://github.com/rust-lang/rust/issues/51934).
 
 ## Stabilization
 
@@ -264,8 +279,8 @@ The stabilization report for our feature is [here][stabrep].
 [stabrep]: https://github.com/rust-lang/rust/issues/48075#issuecomment-433243048
 
 After this, [a PR is made][stab] to remove the feature gate, enabling the feature by
-default (on the 2018 edition). A note is added to the [Release notes][relnotes]
-about the feature.
+default (on the 2018 edition).
+A note is added to the [Release notes][relnotes] about the feature.
 
 [stab]: https://github.com/rust-lang/rust/pull/56245
 

From cf99a9ec4f59b389a9d7acf9d556bd675055bd6c Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Fri, 24 Oct 2025 10:53:49 +0200
Subject: [PATCH 03/19] check on ci

---
 src/doc/rustc-dev-guide/.github/workflows/ci.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/doc/rustc-dev-guide/.github/workflows/ci.yml b/src/doc/rustc-dev-guide/.github/workflows/ci.yml
index 79d03080dce38..30e93c42a0ab3 100644
--- a/src/doc/rustc-dev-guide/.github/workflows/ci.yml
+++ b/src/doc/rustc-dev-guide/.github/workflows/ci.yml
@@ -82,3 +82,9 @@ jobs:
           git add .
           git commit -m "Deploy ${GITHUB_SHA} to gh-pages"
           git push --quiet -f "https://x-token:${{ secrets.GITHUB_TOKEN }}@github.com/${GITHUB_REPOSITORY}" HEAD:gh-pages
+
+      - name: Check if files comply with semantic line breaks
+        continue-on-error: true
+        run: |
+          # using split_inclusive that uses regex feature that uses an unstable feature
+          RUSTC_BOOTSTRAP=true cargo run --manifest-path ci/sembr/Cargo.toml src

From 057f15e928b911667d1049adcf2abfaaff6c3e06 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Mon, 27 Oct 2025 16:33:58 +0200
Subject: [PATCH 04/19] sample output

---
 .../src/tests/codegen-backend-tests/cg_gcc.md    | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/doc/rustc-dev-guide/src/tests/codegen-backend-tests/cg_gcc.md b/src/doc/rustc-dev-guide/src/tests/codegen-backend-tests/cg_gcc.md
index 6bd6007c8969e..69db2094838d9 100644
--- a/src/doc/rustc-dev-guide/src/tests/codegen-backend-tests/cg_gcc.md
+++ b/src/doc/rustc-dev-guide/src/tests/codegen-backend-tests/cg_gcc.md
@@ -21,7 +21,8 @@ you can use the following command to run UI tests locally using the GCC backend,
 
 If a different test suite has failed on CI, you will have to modify the `tests/ui` part.
 
-To reproduce the whole CI job locally, you can run `cargo run --manifest-path src/ci/citool/Cargo.toml run-local x86_64-gnu-gcc`. See [Testing with Docker](../docker.md) for more information.
+To reproduce the whole CI job locally, you can run `cargo run --manifest-path src/ci/citool/Cargo.toml run-local x86_64-gnu-gcc`.
+See [Testing with Docker](../docker.md) for more information.
 
 ### What to do in case of a GCC job failure?
 
@@ -32,15 +33,16 @@ If fixing a compiler test that fails with the GCC backend is non-trivial, you ca
 ## Choosing which codegen backends are built
 
 The `rust.codegen-backends = [...]` bootstrap option affects which codegen backends will be built and
-included in the sysroot of the produced `rustc`. To use the GCC codegen backend, `"gcc"` has to
-be included in this array in `bootstrap.toml`:
+included in the sysroot of the produced `rustc`.
+To use the GCC codegen backend, `"gcc"` has to be included in this array in `bootstrap.toml`:
 
 ```toml
 rust.codegen-backends = ["llvm", "gcc"]
 ```
 
 If you don't want to change your `bootstrap.toml` file, you can alternatively run your `x`
-commands with `--set 'rust.codegen-backends=["llvm", "gcc"]'`. For example:
+commands with `--set 'rust.codegen-backends=["llvm", "gcc"]'`.
+For example:
 
 ```bash
 ./x build --set 'rust.codegen-backends=["llvm", "gcc"]'
@@ -48,7 +50,8 @@ commands with `--set 'rust.codegen-backends=["llvm", "gcc"]'`. For example:
 
 The first backend in the `codegen-backends` array will determine which backend will be used as the
 *default backend* of the built `rustc`. This also determines which backend will be used to compile the
-stage 1 standard library (or anything built in stage 2+). To produce `rustc` that uses the GCC backend
+stage 1 standard library (or anything built in stage 2+).
+To produce `rustc` that uses the GCC backend
 by default, you can thus put `"gcc"` as the first element of this array:
 
 ```bash
@@ -69,7 +72,8 @@ Note that in order for this to work, the tested compiler must have the GCC codeg
 ## Downloading GCC from CI
 
 The `gcc.download-ci-gcc` bootstrap option controls if GCC (which is a dependency of the GCC codegen backend)
-will be downloaded from CI or built locally. The default value is `true`, which will download GCC from CI
+will be downloaded from CI or built locally.
+The default value is `true`, which will download GCC from CI
 if there are no local changes to the GCC sources and the given host target is available on CI.
 
 ## Running tests of the backend itself

From 4c0120e53256e21150fcdcd3cae93b987685c74d Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Mon, 27 Oct 2025 17:21:26 +0200
Subject: [PATCH 05/19] handle a corner case

---
 src/doc/rustc-dev-guide/ci/sembr/src/main.rs              | 8 +++++---
 .../src/tests/codegen-backend-tests/cg_gcc.md             | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
index 6b3753d8b82fd..7f07d6e81420a 100644
--- a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
+++ b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
@@ -19,7 +19,8 @@ struct Cli {
     show_diff: bool,
 }
 
-static REGEX_IGNORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\d\.|\-|\*)\s+").unwrap());
+static REGEX_IGNORE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"^\s*(\d\.|\-|\*)\s+").unwrap());
 static REGEX_IGNORE_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)$").unwrap());
 static REGEX_SPLIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)\s+").unwrap());
 
@@ -173,7 +174,7 @@ ignore e.g. and i.e.
 ```
 some code. block
 ```
-some more text.
+sentence with *italics* should not be ignored. truly.
 ";
     let expected = "\
 # some. heading
@@ -190,7 +191,8 @@ ignore e.g. and i.e.
 ```
 some code. block
 ```
-some more text.
+sentence with *italics* should not be ignored.
+truly.
 ";
     assert_eq!(expected, comply(original));
 }
diff --git a/src/doc/rustc-dev-guide/src/tests/codegen-backend-tests/cg_gcc.md b/src/doc/rustc-dev-guide/src/tests/codegen-backend-tests/cg_gcc.md
index 69db2094838d9..4325cc58797f8 100644
--- a/src/doc/rustc-dev-guide/src/tests/codegen-backend-tests/cg_gcc.md
+++ b/src/doc/rustc-dev-guide/src/tests/codegen-backend-tests/cg_gcc.md
@@ -49,7 +49,8 @@ For example:
 ```
 
 The first backend in the `codegen-backends` array will determine which backend will be used as the
-*default backend* of the built `rustc`. This also determines which backend will be used to compile the
+*default backend* of the built `rustc`.
+This also determines which backend will be used to compile the
 stage 1 standard library (or anything built in stage 2+).
 To produce `rustc` that uses the GCC backend
 by default, you can thus put `"gcc"` as the first element of this array:

From 14a99007fbf1a1e45a13cb04ee2855150e01e6bf Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Mon, 27 Oct 2025 17:33:51 +0200
Subject: [PATCH 06/19] fix env use

---
 src/doc/rustc-dev-guide/.github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/doc/rustc-dev-guide/.github/workflows/ci.yml b/src/doc/rustc-dev-guide/.github/workflows/ci.yml
index 30e93c42a0ab3..c9c23bf9935a9 100644
--- a/src/doc/rustc-dev-guide/.github/workflows/ci.yml
+++ b/src/doc/rustc-dev-guide/.github/workflows/ci.yml
@@ -87,4 +87,4 @@ jobs:
         continue-on-error: true
         run: |
           # using split_inclusive that uses regex feature that uses an unstable feature
-          RUSTC_BOOTSTRAP=true cargo run --manifest-path ci/sembr/Cargo.toml src
+          RUSTC_BOOTSTRAP=1 cargo run --manifest-path ci/sembr/Cargo.toml src

From cc1ab9325cdcfc7543d79391db2d003af33c75ef Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Mon, 27 Oct 2025 17:49:09 +0200
Subject: [PATCH 07/19] sample output

---
 src/doc/rustc-dev-guide/src/tests/ci.md | 230 +++++++++++++-----------
 1 file changed, 121 insertions(+), 109 deletions(-)

diff --git a/src/doc/rustc-dev-guide/src/tests/ci.md b/src/doc/rustc-dev-guide/src/tests/ci.md
index 6c0b5c2e84554..0be87c0645375 100644
--- a/src/doc/rustc-dev-guide/src/tests/ci.md
+++ b/src/doc/rustc-dev-guide/src/tests/ci.md
@@ -7,18 +7,18 @@ From a high-level point of view, when you open a pull request at
 `rust-lang/rust`, the following will happen:
 
 - A small [subset](#pull-request-builds) of tests and checks are run after each
-  push to the PR. This should help catch common errors.
+  push to the PR.
+  This should help catch common errors.
 - When the PR is approved, the [bors] bot enqueues the PR into a [merge queue].
 - Once the PR gets to the front of the queue, bors will create a merge commit
-  and run the [full test suite](#auto-builds) on it. The merge commit either
-  contains only one specific PR or it can be a ["rollup"](#rollups) which
+  and run the [full test suite](#auto-builds) on it.
+  The merge commit either contains only one specific PR or it can be a ["rollup"](#rollups) which
   combines multiple PRs together, to reduce CI costs and merge delays.
 - Once the whole test suite finishes, two things can happen. Either CI fails
   with an error that needs to be addressed by the developer, or CI succeeds and
   the merge commit is then pushed to the `master` branch.
 
-If you want to modify what gets executed on CI, see [Modifying CI
-jobs](#modifying-ci-jobs).
+If you want to modify what gets executed on CI, see [Modifying CI jobs](#modifying-ci-jobs).
 
 ## CI workflow
 
@@ -26,10 +26,10 @@ jobs](#modifying-ci-jobs).
 
 Our CI is primarily executed on [GitHub Actions], with a single workflow defined
 in [`.github/workflows/ci.yml`], which contains a bunch of steps that are
-unified for all CI jobs that we execute. When a commit is pushed to a
-corresponding branch or a PR, the workflow executes the
-[`src/ci/citool`] crate, which dynamically generates the specific CI
-jobs that should be executed. This script uses the [`jobs.yml`] file as an
+unified for all CI jobs that we execute.
+When a commit is pushed to a corresponding branch or a PR, the workflow executes the
+[`src/ci/citool`] crate, which dynamically generates the specific CI jobs that should be executed.
+This script uses the [`jobs.yml`] file as an
 input, which contains a declarative configuration of all our CI jobs.
 
 > Almost all build steps shell out to separate scripts. This keeps the CI fairly
@@ -38,21 +38,22 @@ input, which contains a declarative configuration of all our CI jobs.
 > orchestrating the scripts that drive the process.
 
 In essence, all CI jobs run `./x test`, `./x dist` or some other command with
-different configurations, across various operating systems, targets, and
-platforms. There are two broad categories of jobs that are executed, `dist` and
-non-`dist` jobs.
+different configurations, across various operating systems, targets, and platforms.
+There are two broad categories of jobs that are executed, `dist` and non-`dist` jobs.
 
 - Dist jobs build a full release of the compiler for a specific platform,
-  including all the tools we ship through rustup. Those builds are then uploaded
+  including all the tools we ship through rustup.
+  Those builds are then uploaded
   to the `rust-lang-ci2` S3 bucket and are available to be locally installed
-  with the [rustup-toolchain-install-master] tool. The same builds are also used
+  with the [rustup-toolchain-install-master] tool.
+  The same builds are also used
   for actual releases: our release process basically consists of copying those
   artifacts from `rust-lang-ci2` to the production endpoint and signing them.
 - Non-dist jobs run our full test suite on the platform, and the test suite of
-  all the tools we ship through rustup; The amount of stuff we test depends on
+  all the tools we ship through rustup;
+  The amount of stuff we test depends on
   the platform (for example some tests are run only on Tier 1 platforms), and
-  some quicker platforms are grouped together on the same builder to avoid
-  wasting CI resources.
+  some quicker platforms are grouped together on the same builder to avoid wasting CI resources.
 
 Based on an input event (usually a push to a branch), we execute one of three
 kinds of builds (sets of jobs).
@@ -65,13 +66,15 @@ kinds of builds (sets of jobs).
 
 ### Pull Request builds
 
-After each push to a pull request, a set of `pr` jobs are executed. Currently,
-these execute the `x86_64-gnu-llvm-X`, `x86_64-gnu-tools`, `pr-check-1`, `pr-check-2`
-and `tidy` jobs, all running on Linux. These execute a relatively short
-(~40 minutes) and lightweight test suite that should catch common issues. More
-specifically, they run a set of lints, they try to perform a cross-compile check
+After each push to a pull request, a set of `pr` jobs are executed.
+Currently, these execute the `x86_64-gnu-llvm-X`, `x86_64-gnu-tools`, `pr-check-1`, `pr-check-2`
+and `tidy` jobs, all running on Linux.
+These execute a relatively short
+(~40 minutes) and lightweight test suite that should catch common issues.
+More specifically, they run a set of lints, they try to perform a cross-compile check
 build to Windows mingw (without producing any artifacts), and they test the
-compiler using a *system* version of LLVM. Unfortunately, it would take too many
+compiler using a *system* version of LLVM.
+Unfortunately, it would take too many
 resources to run the full test suite for each commit on every PR.
 
 > **Note on doc comments**
@@ -84,27 +87,28 @@ resources to run the full test suite for each commit on every PR.
 > Thus, it is a good idea to run `./x doc xxx` locally for any doc comment
 > changes to help catch these early.
 
-PR jobs are defined in the `pr` section of [`jobs.yml`]. Their results can be observed
+PR jobs are defined in the `pr` section of [`jobs.yml`].
+Their results can be observed
 directly on the PR, in the "CI checks" section at the bottom of the PR page.
 
 ### Auto builds
 
-Before a commit can be merged into the `master` branch, it needs to pass our
-complete test suite. We call this an `auto` build. This build runs tens of CI
-jobs that exercise various tests across operating systems and targets. The full
-test suite is quite slow; it can take several hours until all the `auto` CI
-jobs finish.
+Before a commit can be merged into the `master` branch, it needs to pass our complete test suite.
+We call this an `auto` build.
+This build runs tens of CI jobs that exercise various tests across operating systems and targets.
+The full test suite is quite slow;
+it can take several hours until all the `auto` CI jobs finish.
 
 Most platforms only run the build steps, some run a restricted set of tests;
 only a subset run the full suite of tests (see Rust's [platform tiers]).
 
-Auto jobs are defined in the `auto` section of [`jobs.yml`]. They are executed
-on the `auto` branch under the `rust-lang/rust` repository,
+Auto jobs are defined in the `auto` section of [`jobs.yml`].
+They are executed on the `auto` branch under the `rust-lang/rust` repository,
 and the final result will be reported via a comment made by bors on the corresponding PR.
 The live results can be seen on [the GitHub Actions workflows page].
 
-At any given time, at most a single `auto` build is being executed. Find out
-more in [Merging PRs serially with bors](#merging-prs-serially-with-bors).
+At any given time, at most a single `auto` build is being executed.
+Find out more in [Merging PRs serially with bors](#merging-prs-serially-with-bors).
 
 [platform tiers]: https://forge.rust-lang.org/release/platform-support.html#rust-platform-support
 
@@ -112,7 +116,8 @@ more in [Merging PRs serially with bors](#merging-prs-serially-with-bors).
 
 Sometimes we want to run a subset of the test suite on CI for a given PR, or
 build a set of compiler artifacts from that PR, without attempting to merge it.
-We call this a "try build". A try build is started after a user with the proper
+We call this a "try build".
+A try build is started after a user with the proper
 permissions posts a PR comment with the `@bors try` command.
 
 There are several use-cases for try builds:
@@ -121,9 +126,9 @@ There are several use-cases for try builds:
   For this, a working compiler build is needed, which can be generated with a
   try build that runs the [dist-x86_64-linux] CI job, which builds an optimized
   version of the compiler on Linux (this job is currently executed by default
-  when you start a try build). To create a try build and schedule it for a
-  performance benchmark, you can use the `@bors try @rust-timer queue` command
-  combination.
+  when you start a try build).
+  To create a try build and schedule it for a
+  performance benchmark, you can use the `@bors try @rust-timer queue` command combination.
 - Check the impact of the PR across the Rust ecosystem, using a [Crater](crater.md) run.
   Again, a working compiler build is needed for this, which can be produced by
   the [dist-x86_64-linux] CI job.
@@ -131,25 +136,32 @@ There are several use-cases for try builds:
   passes the test suite executed by that job.
 
 By default, if you send a comment with `@bors try`, the jobs defined in the `try` section of
-[`jobs.yml`] will be executed. We call this mode a "fast try build". Such a try build
-will not execute any tests, and it will allow compilation warnings. It is useful when you want to
+[`jobs.yml`] will be executed.
+We call this mode a "fast try build".
+Such a try build will not execute any tests, and it will allow compilation warnings.
+It is useful when you want to
 get an optimized toolchain as fast as possible, for a Crater run or performance benchmarks,
-even if it might not be working fully correctly. If you want to do a full build for the default try job,
+even if it might not be working fully correctly.
+If you want to do a full build for the default try job,
 specify its job name in a job pattern (explained below).
 
 If you want to run custom CI jobs in a try build and make sure that they pass all tests and do
 not produce any compilation warnings, you can select CI jobs to be executed by specifying a *job pattern*,
 which can be used in one of two ways:
 - You can add a set of `try-job: <job pattern>` directives to the PR description (described below) and then
-  simply run `@bors try`. CI will read these directives and run the jobs that you have specified. This is
+  simply run `@bors try`.
+  CI will read these directives and run the jobs that you have specified.
+  This is
   useful if you want to rerun the same set of try jobs multiple times, after incrementally modifying a PR.
 - You can specify the job pattern using the `jobs` parameter of the try command: `@bors try jobs=<job pattern>`.
-  This is useful for one-off try builds with specific jobs. Note that the `jobs` parameter has a higher priority
-  than the PR description directives.
+  This is useful for one-off try builds with specific jobs.
+  Note that the `jobs` parameter has a higher priority than the PR description directives.
   - There can also be multiple patterns specified, e.g. `@bors try jobs=job1,job2,job3`.
 
 Each job pattern can either be an exact name of a job or a glob pattern that matches multiple jobs,
-for example `*msvc*` or `*-alt`. You can start at most 20 jobs in a single try build. When using
+for example `*msvc*` or `*-alt`.
+You can start at most 20 jobs in a single try build.
+When using
 glob patterns in the PR description, you can optionally wrap them in backticks (`` ` ``) to avoid GitHub rendering
 the pattern as Markdown if it contains e.g. an asterisk. Note that this escaping will not work when using
 the `@bors jobs=` parameter.
@@ -198,8 +210,7 @@ a single try build running on a single PR at any given time.
 
 Note that try builds are handled using the [new bors] implementation.
 
-[rustc-perf]: https://github.com/rust-lang/rustc-perf
-[new bors]: https://github.com/rust-lang/bors
+[rustc-perf]: https://github.com/rust-lang/rustc-perf [new bors]: https://github.com/rust-lang/bors
 
 ### Modifying CI jobs
 
@@ -208,20 +219,20 @@ If you want to modify what gets executed on our CI, you can simply modify the
 
 You can also modify what gets executed temporarily, for example to test a
 particular platform or configuration that is challenging to test locally (for
-example, if a Windows build fails, but you don't have access to a Windows
-machine). Don't hesitate to use CI resources in such situations.
+example, if a Windows build fails, but you don't have access to a Windows machine).
+Don't hesitate to use CI resources in such situations.
 
 You can perform an arbitrary CI job in two ways:
 - Use the [try build](#try-builds) functionality, and specify the CI jobs that
   you want to be executed in try builds in your PR description.
 - Modify the [`pr`](#pull-request-builds) section of `jobs.yml` to specify which
-  CI jobs should be executed after each push to your PR. This might be faster
-  than repeatedly starting try builds.
+  CI jobs should be executed after each push to your PR.
+  This might be faster than repeatedly starting try builds.
 
 To modify the jobs executed after each push to a PR, you can simply copy one of
-the job definitions from the `auto` section to the `pr` section. For example,
-the `x86_64-msvc` job is responsible for running the 64-bit MSVC tests. You can
-copy it to the `pr` section to cause it to be executed after a commit is pushed
+the job definitions from the `auto` section to the `pr` section.
+For example, the `x86_64-msvc` job is responsible for running the 64-bit MSVC tests.
+You can copy it to the `pr` section to cause it to be executed after a commit is pushed
 to your PR, like this:
 
 ```yaml
@@ -238,8 +249,8 @@ pr:
     <<: *job-windows-8c
 ```
 
-Then you can commit the file and push it to your PR branch on GitHub. GitHub
-Actions should then execute this CI job after each push to your PR.
+Then you can commit the file and push it to your PR branch on GitHub.
+GitHub Actions should then execute this CI job after each push to your PR.
 
 <div class="warning">
 
@@ -247,12 +258,12 @@ Actions should then execute this CI job after each push to your PR.
 you have made to `jobs.yml`, if they were supposed to be temporary!**
 
 A good practice is to prefix `[WIP]` in PR title while still running try jobs
-and `[DO NOT MERGE]` in the commit that modifies the CI jobs for testing
-purposes.
+and `[DO NOT MERGE]` in the commit that modifies the CI jobs for testing purposes.
 </div>
 
 Although you are welcome to use CI, just be conscious that this is a shared
-resource with limited concurrency. Try not to enable too many jobs at once;
+resource with limited concurrency.
+Try not to enable too many jobs at once;
 one or two should be sufficient in most cases.
 
 ## Merging PRs serially with bors
@@ -265,26 +276,28 @@ after the build happened.
 
 To ensure a `master` branch that works all the time, we forbid manual merges.
 Instead, all PRs have to be approved through our bot, [bors] (the software
-behind it is called [homu]). All the approved PRs are put in a [merge queue]
-(sorted by priority and creation date) and are automatically tested one at the
-time. If all the builders are green, the PR is merged, otherwise the failure is
+behind it is called [homu]).
+All the approved PRs are put in a [merge queue]
+(sorted by priority and creation date) and are automatically tested one at the time.
+If all the builders are green, the PR is merged, otherwise the failure is
 recorded and the PR will have to be re-approved again.
 
 Bors doesn’t interact with CI services directly, but it works by pushing the
 merge commit it wants to test to specific branches (like `auto` or `try`), which
-are configured to execute CI checks. Bors then detects the outcome of the build
-by listening for either Commit Statuses or Check Runs. Since the merge commit is
+are configured to execute CI checks.
+Bors then detects the outcome of the build by listening for either Commit Statuses or Check Runs.
+Since the merge commit is
 based on the latest `master` and only one can be tested at the same time, when
 the results are green, `master` is fast-forwarded to that merge commit.
 
 Unfortunately, testing a single PR at a time, combined with our long CI (~2
 hours for a full run), means we can’t merge a lot of PRs in a single day, and a
-single failure greatly impacts our throughput. The maximum number of
-PRs we can merge in a day is around ~10.
+single failure greatly impacts our throughput.
+The maximum number of PRs we can merge in a day is around ~10.
 
 The long CI run times, and requirement for a large builder pool, is largely due
-to the fact that full release artifacts are built in the `dist-` builders. This
-is worth it because these release artifacts:
+to the fact that full release artifacts are built in the `dist-` builders.
+This is worth it because these release artifacts:
 
 - Allow perf testing even at a later date.
 - Allow bisection when bugs are discovered later.
@@ -295,23 +308,23 @@ is worth it because these release artifacts:
 
 Some PRs don’t need the full test suite to be executed: trivial changes like
 typo fixes or README improvements *shouldn’t* break the build, and testing every
-single one of them for 2+ hours would be wasteful. To solve this, we
-regularly create a "rollup", a PR where we merge several pending trivial PRs so
-they can be tested together. Rollups are created manually by a team member using
-the "create a rollup" button on the [merge queue]. The team member uses their
-judgment to decide if a PR is risky or not.
+single one of them for 2+ hours would be wasteful.
+To solve this, we regularly create a "rollup", a PR where we merge several pending trivial PRs so
+they can be tested together.
+Rollups are created manually by a team member using
+the "create a rollup" button on the [merge queue].
+The team member uses their judgment to decide if a PR is risky or not.
 
 ## Docker
 
 All CI jobs, except those on macOS and Windows, are executed inside that
-platform’s custom [Docker container]. This has a lot of advantages for us:
+platform’s custom [Docker container].
+This has a lot of advantages for us:
 
 - The build environment is consistent regardless of the changes of the
-  underlying image (switching from the trusty image to xenial was painless for
-  us).
+  underlying image (switching from the trusty image to xenial was painless for us).
 - We can use ancient build environments to ensure maximum binary compatibility,
-  for example [using older CentOS releases][dist-x86_64-linux] on our Linux
-  builders.
+  for example [using older CentOS releases][dist-x86_64-linux] on our Linux builders.
 - We can avoid reinstalling tools (like QEMU or the Android emulator) every time,
   thanks to Docker image caching.
 - Users can run the same tests in the same environment locally by just running this command:
@@ -325,13 +338,11 @@ platform’s custom [Docker container]. This has a lot of advantages for us:
 The Docker images prefixed with `dist-` are used for building artifacts while
 those without that prefix run tests and checks.
 
-We also run tests for less common architectures (mainly Tier 2 and Tier 3
-platforms) in CI. Since those platforms are not x86, we either run everything
-inside QEMU, or we just cross-compile if we don’t want to run the tests for that
-platform.
+We also run tests for less common architectures (mainly Tier 2 and Tier 3 platforms) in CI.
+Since those platforms are not x86, we either run everything
+inside QEMU, or we just cross-compile if we don’t want to run the tests for that platform.
 
-These builders are running on a special pool of builders set up and maintained
-for us by GitHub.
+These builders are running on a special pool of builders set up and maintained for us by GitHub.
 
 [Docker container]: https://github.com/rust-lang/rust/tree/master/src/ci/docker
 
@@ -341,16 +352,16 @@ Our CI workflow uses various caching mechanisms, mainly for two things:
 
 ### Docker images caching
 
-The Docker images we use to run most of the Linux-based builders take a *long*
-time to fully build. To speed up the build, we cache them using [Docker registry
-caching], with the intermediate artifacts being stored on [ghcr.io]. We also
-push the built Docker images to ghcr, so that they can be reused by other tools
-(rustup) or by developers running the Docker build locally (to speed up their
-build).
+The Docker images we use to run most of the Linux-based builders take a *long* time to fully build.
+To speed up the build, we cache them using [Docker registry
+caching], with the intermediate artifacts being stored on [ghcr.io].
+We also push the built Docker images to ghcr, so that they can be reused by other tools
+(rustup) or by developers running the Docker build locally (to speed up their build).
 
 Since we test multiple, diverged branches (`master`, `beta` and `stable`), we
 can’t rely on a single cache for the images, otherwise builds on a branch would
-override the cache for the others. Instead, we store the images under different
+override the cache for the others.
+Instead, we store the images under different
 tags, identifying them with a custom hash made from the contents of all the
 Dockerfiles and related scripts.
 
@@ -367,17 +378,17 @@ invalidated if one of the following changes:
 ### LLVM caching with Sccache
 
 We build some C/C++ stuff in various CI jobs, and we rely on [Sccache] to cache
-the intermediate LLVM artifacts. Sccache is a distributed ccache developed by
+the intermediate LLVM artifacts.
+Sccache is a distributed ccache developed by
 Mozilla, which can use an object storage bucket as the storage backend.
 
-With Sccache there's no need to calculate the hash key ourselves. Sccache
-invalidates the cache automatically when it detects changes to relevant inputs,
-such as the source code, the version of the compiler, and important environment
-variables.
+With Sccache there's no need to calculate the hash key ourselves.
+Sccache invalidates the cache automatically when it detects changes to relevant inputs,
+such as the source code, the version of the compiler, and important environment variables.
 So we just pass the Sccache wrapper on top of Cargo and Sccache does the rest.
 
-We store the persistent artifacts on the S3 bucket, `rust-lang-ci-sccache2`. So
-when the CI runs, if Sccache sees that LLVM is being compiled with the same C/C++
+We store the persistent artifacts on the S3 bucket, `rust-lang-ci-sccache2`.
+So when the CI runs, if Sccache sees that LLVM is being compiled with the same C/C++
 compiler and the LLVM source code is the same, Sccache retrieves the individual
 compiled translation units from S3.
 
@@ -396,26 +407,28 @@ receives the build logs on failure, and extracts the error message automatically
 posting it on the PR thread.
 
 The bot is not hardcoded to look for error strings, but was trained with a bunch
-of build failures to recognize which lines are common between builds and which
-are not. While the generated snippets can be weird sometimes, the bot is pretty
-good at identifying the relevant lines, even if it’s an error we've never seen
-before.
+of build failures to recognize which lines are common between builds and which are not.
+While the generated snippets can be weird sometimes, the bot is pretty
+good at identifying the relevant lines, even if it’s an error we've never seen before.
 
 [rla]: https://github.com/rust-lang/rust-log-analyzer
 
 ### Toolstate to support allowed failures
 
 The `rust-lang/rust` repo doesn’t only test the compiler on its CI, but also a
-variety of tools and documentation. Some documentation is pulled in via git
-submodules. If we blocked merging rustc PRs on the documentation being fixed, we
+variety of tools and documentation.
+Some documentation is pulled in via git submodules.
+If we blocked merging rustc PRs on the documentation being fixed, we
 would be stuck in a chicken-and-egg problem, because the documentation's CI
 would not pass since updating it would need the not-yet-merged version of rustc
 to test against (and we usually require CI to be passing).
 
 To avoid the problem, submodules are allowed to fail, and their status is
-recorded in [rust-toolstate]. When a submodule breaks, a bot automatically pings
+recorded in [rust-toolstate].
+When a submodule breaks, a bot automatically pings
 the maintainers so they know about the breakage, and it records the failure on
-the toolstate repository. The release process will then ignore broken tools on
+the toolstate repository.
+The release process will then ignore broken tools on
 nightly, removing them from the shipped nightlies.
 
 While tool failures are allowed most of the time, they’re automatically
@@ -448,8 +461,8 @@ To learn more about the dashboard, see the [Datadog CI docs].
 ## Determining the CI configuration
 
 If you want to determine which `bootstrap.toml` settings are used in CI for a
-particular job, it is probably easiest to just look at the build log. To do
-this:
+particular job, it is probably easiest to just look at the build log.
+To do this:
 
 1. Go to
    <https://github.com/rust-lang/rust/actions?query=branch%3Aauto+is%3Asuccess>
@@ -463,8 +476,7 @@ this:
 [`jobs.yml`]: https://github.com/rust-lang/rust/blob/master/src/ci/github-actions/jobs.yml
 [`.github/workflows/ci.yml`]: https://github.com/rust-lang/rust/blob/master/.github/workflows/ci.yml
 [`src/ci/citool`]: https://github.com/rust-lang/rust/blob/master/src/ci/citool
-[bors]: https://github.com/bors
-[homu]: https://github.com/rust-lang/homu
+[bors]: https://github.com/bors [homu]: https://github.com/rust-lang/homu
 [merge queue]: https://bors.rust-lang.org/queue/rust
 [dist-x86_64-linux]: https://github.com/rust-lang/rust/blob/master/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile
 [the GitHub Actions workflows page]: https://github.com/rust-lang/rust/actions

From 0c218410b46728d425981c82727e10100aa0aefc Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Mon, 27 Oct 2025 18:08:34 +0200
Subject: [PATCH 08/19] fix corner case

---
 src/doc/rustc-dev-guide/ci/sembr/src/main.rs | 12 ++++++++++++
 src/doc/rustc-dev-guide/src/tests/ci.md      |  6 ++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
index 7f07d6e81420a..accdcb68c3199 100644
--- a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
+++ b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
@@ -22,6 +22,8 @@ struct Cli {
 static REGEX_IGNORE: LazyLock<Regex> =
     LazyLock::new(|| Regex::new(r"^\s*(\d\.|\-|\*)\s+").unwrap());
 static REGEX_IGNORE_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)$").unwrap());
+static REGEX_IGNORE_LINK_TARGETS: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"^\[.+\]: ").unwrap());
 static REGEX_SPLIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)\s+").unwrap());
 
 fn main() -> Result<()> {
@@ -94,6 +96,7 @@ fn ignore(line: &str, in_code_block: bool) -> bool {
         || line.starts_with('#')
         || line.trim().is_empty()
         || REGEX_IGNORE.is_match(line)
+        || REGEX_IGNORE_LINK_TARGETS.is_match(line)
 }
 
 fn comply(content: &str) -> String {
@@ -221,6 +224,15 @@ fn test_prettify_prefix_spaces() {
     assert_eq!(expected, lengthen_lines(original, 50));
 }
 
+#[test]
+fn test_prettify_ignore_link_targets() {
+    let original = "\
+[a target]: https://example.com
+[another target]: https://example.com
+";
+    assert_eq!(original, lengthen_lines(original, 100));
+}
+
 #[test]
 fn test_sembr_then_prettify() {
     let original = "\
diff --git a/src/doc/rustc-dev-guide/src/tests/ci.md b/src/doc/rustc-dev-guide/src/tests/ci.md
index 0be87c0645375..c0e48b314403a 100644
--- a/src/doc/rustc-dev-guide/src/tests/ci.md
+++ b/src/doc/rustc-dev-guide/src/tests/ci.md
@@ -210,7 +210,8 @@ a single try build running on a single PR at any given time.
 
 Note that try builds are handled using the [new bors] implementation.
 
-[rustc-perf]: https://github.com/rust-lang/rustc-perf [new bors]: https://github.com/rust-lang/bors
+[rustc-perf]: https://github.com/rust-lang/rustc-perf
+[new bors]: https://github.com/rust-lang/bors
 
 ### Modifying CI jobs
 
@@ -476,7 +477,8 @@ To do this:
 [`jobs.yml`]: https://github.com/rust-lang/rust/blob/master/src/ci/github-actions/jobs.yml
 [`.github/workflows/ci.yml`]: https://github.com/rust-lang/rust/blob/master/.github/workflows/ci.yml
 [`src/ci/citool`]: https://github.com/rust-lang/rust/blob/master/src/ci/citool
-[bors]: https://github.com/bors [homu]: https://github.com/rust-lang/homu
+[bors]: https://github.com/bors
+[homu]: https://github.com/rust-lang/homu
 [merge queue]: https://bors.rust-lang.org/queue/rust
 [dist-x86_64-linux]: https://github.com/rust-lang/rust/blob/master/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile
 [the GitHub Actions workflows page]: https://github.com/rust-lang/rust/actions

From 08235dd08ad891f5c1a70cdcd92f5d903bdbcb46 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Mon, 27 Oct 2025 18:33:21 +0200
Subject: [PATCH 09/19] about sembr tool

---
 src/doc/rustc-dev-guide/src/contributing.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/doc/rustc-dev-guide/src/contributing.md b/src/doc/rustc-dev-guide/src/contributing.md
index 44d630080bbfa..56583525e7390 100644
--- a/src/doc/rustc-dev-guide/src/contributing.md
+++ b/src/doc/rustc-dev-guide/src/contributing.md
@@ -391,6 +391,13 @@ Just a few things to keep in mind:
 - Please try to avoid overly long lines and use semantic line breaks (where you break the line after each sentence).
   There is no strict limit on line lengths; let the sentence or part of the sentence flow to its proper end on the same line.
 
+  You can use a tool in ci/sembr to help with this.
+  Its help output can be seen with this command:
+
+  ```console
+  cargo run --manifest-path ci/sembr/Cargo.toml -- --help
+  ```
+
 - When contributing text to the guide, please contextualize the information with some time period
   and/or a reason so that the reader knows how much to trust the information.
   Aim to provide a reasonable amount of context, possibly including but not limited to:

From 8654d0e6784c8fb1e6545c3664c0d2cb28aa0861 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Mon, 27 Oct 2025 18:34:29 +0200
Subject: [PATCH 10/19] sembr tool: add some hints on usage

---
 src/doc/rustc-dev-guide/ci/sembr/src/main.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
index accdcb68c3199..edf5b6401cd9b 100644
--- a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
+++ b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
@@ -10,9 +10,12 @@ use regex::Regex;
 
 #[derive(Parser)]
 struct Cli {
-    root_dir: PathBuf,
+    /// File or directory to check
+    path: PathBuf,
     #[arg(long)]
+    /// Modify files that do not comply
     overwrite: bool,
+    /// Applies to lines that are to be split
     #[arg(long, default_value_t = 100)]
     line_length_limit: usize,
     #[arg(long)]
@@ -31,7 +34,7 @@ fn main() -> Result<()> {
     let mut compliant = Vec::new();
     let mut not_compliant = Vec::new();
     let mut made_compliant = Vec::new();
-    for result in Walk::new(cli.root_dir) {
+    for result in Walk::new(cli.path) {
         let entry = result?;
         if entry.file_type().expect("no stdin").is_dir() {
             continue;

From a9c7730fe0084f795acc5e8078b83eceef6774b9 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Tue, 28 Oct 2025 06:10:06 +0200
Subject: [PATCH 11/19] contributing.md: ease copy-paste

Also, tidy does a lot more than follow Rust Style Guide
---
 src/doc/rustc-dev-guide/src/contributing.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/doc/rustc-dev-guide/src/contributing.md b/src/doc/rustc-dev-guide/src/contributing.md
index 1ade4953d2e65..43e3fb5b0a164 100644
--- a/src/doc/rustc-dev-guide/src/contributing.md
+++ b/src/doc/rustc-dev-guide/src/contributing.md
@@ -269,15 +269,15 @@ Be patient; this can take a while and the queue can sometimes be long. PRs are n
 
 ### Opening a PR
 
-You are now ready to file a pull request? Great! Here are a few points you
-should be aware of.
+You are now ready to file a pull request (PR)?
+Great! Here are a few points you should be aware of.
 
 All pull requests should be filed against the `master` branch,
 unless you know for sure that you should target a different branch.
 
-Make sure your pull request is in compliance with Rust's style guidelines by running
+Run some style checks before you submit the PR:
 
-    $ ./x test tidy --bless
+    ./x test tidy --bless
 
 We recommend to make this check before every pull request (and every new commit
 in a pull request); you can add [git hooks]

From 1fbaa24d64960410a0f58ce33a7da6d85094b607 Mon Sep 17 00:00:00 2001
From: Manuel Drehwald <git@manuel.drehwald.info>
Date: Wed, 29 Oct 2025 00:31:09 -0700
Subject: [PATCH 12/19] fix typo in autodiff docs

---
 src/doc/rustc-dev-guide/src/autodiff/internals.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/doc/rustc-dev-guide/src/autodiff/internals.md b/src/doc/rustc-dev-guide/src/autodiff/internals.md
index c8e304f814ba1..e381b091e898a 100644
--- a/src/doc/rustc-dev-guide/src/autodiff/internals.md
+++ b/src/doc/rustc-dev-guide/src/autodiff/internals.md
@@ -20,7 +20,7 @@ The detailed documentation for the `std::autodiff` module is available at [std::
 Differentiable programming is used in various fields like numerical computing, [solid mechanics][ratel], [computational chemistry][molpipx], [fluid dynamics][waterlily] or for Neural Network training via Backpropagation, [ODE solver][diffsol], [differentiable rendering][libigl], [quantum computing][catalyst], and climate simulations.
 
 [ratel]: https://gitlab.com/micromorph/ratel
-[molpipx]: https://arxiv.org/abs/2411.17011v
+[molpipx]: https://arxiv.org/abs/2411.17011
 [waterlily]: https://github.com/WaterLily-jl/WaterLily.jl
 [diffsol]: https://github.com/martinjrobins/diffsol
 [libigl]: https://github.com/alecjacobson/libigl-enzyme-example?tab=readme-ov-file#run

From c850c272cf91bd898eb06488792741876525cc2d Mon Sep 17 00:00:00 2001
From: The rustc-josh-sync Cronjob Bot <github-actions@github.com>
Date: Mon, 3 Nov 2025 04:17:30 +0000
Subject: [PATCH 13/19] Prepare for merging from rust-lang/rust

This updates the rust-version file to c5dabe8cf798123087d094f06417f5a767ca73e8.
---
 src/doc/rustc-dev-guide/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/doc/rustc-dev-guide/rust-version b/src/doc/rustc-dev-guide/rust-version
index f100e41166860..0e89b4ab6ac75 100644
--- a/src/doc/rustc-dev-guide/rust-version
+++ b/src/doc/rustc-dev-guide/rust-version
@@ -1 +1 @@
-b1b464d6f61ec8c4e609c1328106378c066a9729
+c5dabe8cf798123087d094f06417f5a767ca73e8

From 0588be09973fc8b6545c8799932d9f5ec8f11eb7 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Tue, 4 Nov 2025 18:47:40 +0200
Subject: [PATCH 14/19] sembr src/tests/minicore.md

---
 src/doc/rustc-dev-guide/src/tests/minicore.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/doc/rustc-dev-guide/src/tests/minicore.md b/src/doc/rustc-dev-guide/src/tests/minicore.md
index 28570376f7c6b..3789b04e7c241 100644
--- a/src/doc/rustc-dev-guide/src/tests/minicore.md
+++ b/src/doc/rustc-dev-guide/src/tests/minicore.md
@@ -2,15 +2,14 @@
 
 <!-- date-check Oct 2024 -->
 
-[`tests/auxiliary/minicore.rs`][`minicore`] is a test auxiliary for
-ui/codegen/assembly test suites. It provides `core` stubs for tests that need to
+[`tests/auxiliary/minicore.rs`][`minicore`] is a test auxiliary for ui/codegen/assembly test suites.
+It provides `core` stubs for tests that need to
 build for cross-compiled targets but do not need/want to run.
 
 <div class="warning">
 
 Please note that [`minicore`] is only intended for `core` items, and explicitly
-**not** `std` or `alloc` items because `core` items are applicable to a wider
-range of tests.
+**not** `std` or `alloc` items because `core` items are applicable to a wider range of tests.
 
 </div>
 
@@ -41,8 +40,8 @@ by more than one test.
 
 ## Staying in sync with `core`
 
-The `minicore` items must be kept up to date with `core`. For consistent
-diagnostic output between using `core` and `minicore`, any `diagnostic`
+The `minicore` items must be kept up to date with `core`.
+For consistent diagnostic output between using `core` and `minicore`, any `diagnostic`
 attributes (e.g. `on_unimplemented`) should be replicated exactly in `minicore`.
 
 ## Example codegen test that uses `minicore`

From df4d8830bb1c7f63f2afc5a428560c148f718b5e Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Tue, 4 Nov 2025 18:56:17 +0200
Subject: [PATCH 15/19] date-check src/tests/minicore.md

This was updated recently.

Also, the annotation was broken, so would not appear in date-check issue.
---
 src/doc/rustc-dev-guide/src/tests/minicore.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/doc/rustc-dev-guide/src/tests/minicore.md b/src/doc/rustc-dev-guide/src/tests/minicore.md
index 3789b04e7c241..5d05c2c1e0a9a 100644
--- a/src/doc/rustc-dev-guide/src/tests/minicore.md
+++ b/src/doc/rustc-dev-guide/src/tests/minicore.md
@@ -1,6 +1,6 @@
 # `minicore` test auxiliary: using `core` stubs
 
-<!-- date-check Oct 2024 -->
+<!-- date-check: Oct 2025 -->
 
 [`tests/auxiliary/minicore.rs`][`minicore`] is a test auxiliary for ui/codegen/assembly test suites.
 It provides `core` stubs for tests that need to

From b150b0eddbc35027d9a2a24bb790473ad5ca6797 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Tue, 4 Nov 2025 19:27:48 +0200
Subject: [PATCH 16/19] sembr src/contributing.md

---
 src/doc/rustc-dev-guide/src/contributing.md | 199 +++++++++++---------
 1 file changed, 109 insertions(+), 90 deletions(-)

diff --git a/src/doc/rustc-dev-guide/src/contributing.md b/src/doc/rustc-dev-guide/src/contributing.md
index 512c6790165c0..f74b344385640 100644
--- a/src/doc/rustc-dev-guide/src/contributing.md
+++ b/src/doc/rustc-dev-guide/src/contributing.md
@@ -2,9 +2,9 @@
 
 ## Bug reports
 
-While bugs are unfortunate, they're a reality in software. We can't fix what we
-don't know about, so please report liberally. If you're not sure if something
-is a bug or not, feel free to file a bug anyway.
+While bugs are unfortunate, they're a reality in software.
+We can't fix what we don't know about, so please report liberally.
+If you're not sure if something is a bug or not, feel free to file a bug anyway.
 
 **If you believe reporting your bug publicly represents a security risk to Rust users,
 please follow our [instructions for reporting security vulnerabilities][vuln]**.
@@ -12,16 +12,18 @@ please follow our [instructions for reporting security vulnerabilities][vuln]**.
 [vuln]: https://www.rust-lang.org/policies/security
 
 If you're using the nightly channel, please check if the bug exists in the
-latest toolchain before filing your bug. It might be fixed already.
+latest toolchain before filing your bug.
+It might be fixed already.
 
 If you have the chance, before reporting a bug, please [search existing issues],
-as it's possible that someone else has already reported your error. This doesn't
-always work, and sometimes it's hard to know what to search for, so consider this
-extra credit. We won't mind if you accidentally file a duplicate report.
+as it's possible that someone else has already reported your error.
+This doesn't always work, and sometimes it's hard to know what to search for, so consider this
+extra credit.
+We won't mind if you accidentally file a duplicate report.
 
 Similarly, to help others who encountered the bug find your issue, consider
-filing an issue with a descriptive title, which contains information that might
-be unique to it.  This can be the language or compiler feature used, the
+filing an issue with a descriptive title, which contains information that might be unique to it.
+This can be the language or compiler feature used, the
 conditions that trigger the bug, or part of the error message if there is any.
 An example could be: **"impossible case reached" on lifetime inference for impl
 Trait in return position**.
@@ -31,14 +33,15 @@ in the appropriate provided template.
 
 ## Bug fixes or "normal" code changes
 
-For most PRs, no special procedures are needed. You can just [open a PR], and it
-will be reviewed, approved, and merged. This includes most bug fixes,
-refactorings, and other user-invisible changes. The next few sections talk
-about exceptions to this rule.
+For most PRs, no special procedures are needed.
+You can just [open a PR], and it will be reviewed, approved, and merged.
+This includes most bug fixes, refactorings, and other user-invisible changes.
+The next few sections talk about exceptions to this rule.
 
 Also, note that it is perfectly acceptable to open WIP PRs or GitHub [Draft PRs].
 Some people prefer to do this so they can get feedback along the
-way or share their code with a collaborator. Others do this so they can utilize
+way or share their code with a collaborator.
+Others do this so they can utilize
 the CI to build and test their PR (e.g. when developing on a slow machine).
 
 [open a PR]: #pull-requests
@@ -46,9 +49,9 @@ the CI to build and test their PR (e.g. when developing on a slow machine).
 
 ## New features
 
-Rust has strong backwards-compatibility guarantees. Thus, new features can't
-just be implemented directly in stable Rust. Instead, we have 3 release
-channels: stable, beta, and nightly.
+Rust has strong backwards-compatibility guarantees.
+Thus, new features can't just be implemented directly in stable Rust.
+Instead, we have 3 release channels: stable, beta, and nightly.
 
 - **Stable**: this is the latest stable release for general usage.
 - **Beta**: this is the next release (will be stable within 6 weeks).
@@ -65,35 +68,36 @@ Breaking changes have a [dedicated section][Breaking Changes] in the dev-guide.
 
 ### Major changes
 
-The compiler team has a special process for large changes, whether or not they
-cause breakage. This process is called a Major Change Proposal (MCP). MCP is a
-relatively lightweight mechanism for getting feedback on large changes to the
+The compiler team has a special process for large changes, whether or not they cause breakage.
+This process is called a Major Change Proposal (MCP).
+MCP is a relatively lightweight mechanism for getting feedback on large changes to the
 compiler (as opposed to a full RFC or a design meeting with the team).
 
 Example of things that might require MCPs include major refactorings, changes
 to important types, or important changes to how the compiler does something, or
 smaller user-facing changes.
 
-**When in doubt, ask on [Zulip]. It would be a shame to put a lot of work
-into a PR that ends up not getting merged!** [See this document][mcpinfo] for
-more info on MCPs.
+**When in doubt, ask on [Zulip].
+It would be a shame to put a lot of work
+into a PR that ends up not getting merged!** [See this document][mcpinfo] for more info on MCPs.
 
 [mcpinfo]: https://forge.rust-lang.org/compiler/proposals-and-stabilization.html#how-do-i-submit-an-mcp
 [zulip]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler
 
 ### Performance
 
-Compiler performance is important. We have put a lot of effort over the last
-few years into [gradually improving it][perfdash].
+Compiler performance is important.
+We have put a lot of effort over the last few years into [gradually improving it][perfdash].
 
 [perfdash]: https://perf.rust-lang.org/dashboard.html
 
 If you suspect that your change may cause a performance regression (or
 improvement), you can request a "perf run" (and your reviewer may also request one
-before approving). This is yet another bot that will compile a collection of
-benchmarks on a compiler with your changes. The numbers are reported
-[here][perf], and you can see a comparison of your changes against the latest
-master.
+before approving).
+This is yet another bot that will compile a collection of
+benchmarks on a compiler with your changes.
+The numbers are reported
+[here][perf], and you can see a comparison of your changes against the latest master.
 
 > For an introduction to the performance of Rust code in general
 > which would also be useful in rustc development, see [The Rust Performance Book].
@@ -104,11 +108,11 @@ master.
 ## Pull requests
 
 Pull requests (or PRs for short) are the primary mechanism we use to change Rust.
-GitHub itself has some [great documentation][about-pull-requests] on using the
-Pull Request feature. We use the "fork and pull" model [described here][development-models],
+GitHub itself has some [great documentation][about-pull-requests] on using the Pull Request feature.
+We use the "fork and pull" model [described here][development-models],
 where contributors push changes to their personal fork and create pull requests to
-bring those changes into the source repository. We have more info about how to use git
-when contributing to Rust under [the git section](./git.md).
+bring those changes into the source repository.
+We have more info about how to use git when contributing to Rust under [the git section](./git.md).
 
 > **Advice for potentially large, complex, cross-cutting and/or very domain-specific changes**
 >
@@ -150,7 +154,8 @@ when contributing to Rust under [the git section](./git.md).
 ### Keeping your branch up-to-date
 
 The CI in rust-lang/rust applies your patches directly against the current master,
-not against the commit your branch is based on. This can lead to unexpected failures
+not against the commit your branch is based on.
+This can lead to unexpected failures
 if your branch is outdated, even when there are no explicit merge conflicts.
 
 Update your branch only when needed: when you have merge conflicts, upstream CI is broken and blocking your green PR, or a maintainer requests it.
@@ -159,32 +164,36 @@ During review, make incremental commits to address feedback.
 Prefer to squash or rebase only at the end, or when a reviewer requests it.
 
 When updating, use `git push --force-with-lease` and leave a brief comment explaining what changed.
-Some repos prefer merging from `upstream/master` instead of rebasing; follow the project's conventions.
+Some repos prefer merging from `upstream/master` instead of rebasing;
+follow the project's conventions.
 See [keeping things up to date](git.md#keeping-things-up-to-date) for detailed instructions.
 
 After rebasing, it's recommended to [run the relevant tests locally](tests/intro.md) to catch any issues before CI runs.
 
 ### r?
 
-All pull requests are reviewed by another person. We have a bot,
-[@rustbot], that will automatically assign a random person
+All pull requests are reviewed by another person.
+We have a bot, [@rustbot], that will automatically assign a random person
 to review your request based on which files you changed.
 
 If you want to request that a specific person reviews your pull request, you
-can add an `r?` to the pull request description or in a comment. For example,
-if you want to ask a review to @awesome-reviewer, add
+can add an `r?` to the pull request description or in a comment.
+For example, if you want to ask a review to @awesome-reviewer, add
 
-    r? @awesome-reviewer
+    r?
+    @awesome-reviewer
 
 to the end of the pull request description, and [@rustbot] will assign
-them instead of a random person. This is entirely optional.
+them instead of a random person.
+This is entirely optional.
 
-You can also assign a random reviewer from a specific team by writing `r? rust-lang/groupname`.
-As an example,
-if you were making a diagnostics change,
+You can also assign a random reviewer from a specific team by writing `r?
+rust-lang/groupname`.
+As an example, if you were making a diagnostics change,
 then you could get a reviewer from the diagnostics team by adding:
 
-    r? rust-lang/diagnostics
+    r?
+    rust-lang/diagnostics
 
 For a full list of possible `groupname`s,
 check the `adhoc_groups` section at the [triagebot.toml config file],
@@ -209,15 +218,15 @@ or the list of teams in the [rust-lang teams database].
 >   the author is ready for a review,
 >   and this PR will be queued again in the reviewer's queue.
 
-Please note that the reviewers are humans, who for the most part work on `rustc`
-in their free time. This means that they can take some time to respond and review
-your PR. It also means that reviewers can miss some PRs that are assigned to them.
+Please note that the reviewers are humans, who for the most part work on `rustc` in their free time.
+This means that they can take some time to respond and review your PR.
+It also means that reviewers can miss some PRs that are assigned to them.
 
 To try to move PRs forward, the Triage WG regularly goes through all PRs that
-are waiting for review and haven't been discussed for at least 2 weeks. If you
-don't get a review within 2 weeks, feel free to ask the Triage WG on
-Zulip ([#t-release/triage]). They have knowledge of when to ping, who might be
-on vacation, etc.
+are waiting for review and haven't been discussed for at least 2 weeks.
+If you don't get a review within 2 weeks, feel free to ask the Triage WG on
+Zulip ([#t-release/triage]).
+They have knowledge of when to ping, who might be on vacation, etc.
 
 The reviewer may request some changes using the GitHub code review interface.
 They may also request special procedures for some PRs.
@@ -230,7 +239,8 @@ See [Crater] and [Breaking Changes] chapters for some examples of such procedure
 ### CI
 
 In addition to being reviewed by a human, pull requests are automatically tested,
-thanks to continuous integration (CI). Basically, every time you open and update
+thanks to continuous integration (CI).
+Basically, every time you open and update
 a pull request, CI builds the compiler and tests it against the
 [compiler test suite], and also performs other tests such as checking that
 your pull request is in compliance with Rust's style guidelines.
@@ -240,33 +250,37 @@ without going through a first review cycle, and also helps reviewers stay aware
 of the status of a particular pull request.
 
 Rust has plenty of CI capacity, and you should never have to worry about wasting
-computational resources each time you push a change. It is also perfectly fine
-(and even encouraged!) to use the CI to test your changes if it can help your
-productivity. In particular, we don't recommend running the full `./x test` suite locally,
+computational resources each time you push a change.
+It is also perfectly fine
+(and even encouraged!) to use the CI to test your changes if it can help your productivity.
+In particular, we don't recommend running the full `./x test` suite locally,
 since it takes a very long time to execute.
 
 ### r+
 
 After someone has reviewed your pull request, they will leave an annotation
-on the pull request with an `r+`. It will look something like this:
+on the pull request with an `r+`.
+It will look something like this:
 
     @bors r+
 
-This tells [@bors], our lovable integration bot, that your pull request has
-been approved. The PR then enters the [merge queue], where [@bors]
-will run *all* the tests on *every* platform we support. If it all works out,
-[@bors] will merge your code into `master` and close the pull request.
+This tells [@bors], our lovable integration bot, that your pull request has been approved.
+The PR then enters the [merge queue], where [@bors]
+will run *all* the tests on *every* platform we support.
+If it all works out, [@bors] will merge your code into `master` and close the pull request.
 
 Depending on the scale of the change, you may see a slightly different form of `r+`:
 
     @bors r+ rollup
 
 The additional `rollup` tells [@bors] that this change should always be "rolled up".
-Changes that are rolled up are tested and merged alongside other PRs, to
-speed the process up. Typically only small changes that are expected not to conflict
+Changes that are rolled up are tested and merged alongside other PRs, to speed the process up.
+Typically only small changes that are expected not to conflict
 with one another are marked as "always roll up".
 
-Be patient; this can take a while and the queue can sometimes be long. PRs are never merged by hand.
+Be patient;
+this can take a while and the queue can sometimes be long.
+PRs are never merged by hand.
 
 [@rustbot]: https://github.com/rustbot
 [@bors]: https://github.com/bors
@@ -274,7 +288,8 @@ Be patient; this can take a while and the queue can sometimes be long. PRs are n
 ### Opening a PR
 
 You are now ready to file a pull request (PR)?
-Great! Here are a few points you should be aware of.
+Great!
+Here are a few points you should be aware of.
 
 All pull requests should be filed against the `master` branch,
 unless you know for sure that you should target a different branch.
@@ -283,15 +298,16 @@ Run some style checks before you submit the PR:
 
     ./x test tidy --bless
 
-We recommend to make this check before every pull request (and every new commit
-in a pull request); you can add [git hooks]
-before every push to make sure you never forget to make this check.
+We recommend to make this check before every pull request (and every new commit in a pull request);
+you can add [git hooks] before every push to make sure you never forget to make this check.
 The CI will also run tidy and will fail if tidy fails.
 
 Rust follows a _no merge-commit policy_, meaning, when you encounter merge
-conflicts you are expected to always rebase instead of merging.  E.g. always use
-rebase when bringing the latest changes from the master branch to your feature
-branch. If your PR contains merge commits, it will get marked as `has-merge-commits`.
+conflicts you are expected to always rebase instead of merging.
+E.g.
+always use rebase when bringing the latest changes from the master branch to your feature
+branch.
+If your PR contains merge commits, it will get marked as `has-merge-commits`.
 Once you have removed the merge commits, e.g., through an interactive rebase, you
 should remove the label again:
 
@@ -300,13 +316,14 @@ should remove the label again:
 See [this chapter][labeling] for more details.
 
 If you encounter merge conflicts or when a reviewer asks you to perform some
-changes, your PR will get marked as `S-waiting-on-author`. When you resolve
-them, you should use `@rustbot` to mark it as `S-waiting-on-review`:
+changes, your PR will get marked as `S-waiting-on-author`.
+When you resolve them, you should use `@rustbot` to mark it as `S-waiting-on-review`:
 
     @rustbot ready
 
-GitHub allows [closing issues using keywords][closing-keywords]. This feature
-should be used to keep the issue tracker tidy. However, it is generally preferred
+GitHub allows [closing issues using keywords][closing-keywords].
+This feature should be used to keep the issue tracker tidy.
+However, it is generally preferred
 to put the "closes #123" text in the PR description rather than the issue commit;
 particularly during rebasing, citing the issue number in the commit can "spam"
 the issue in question.
@@ -319,9 +336,10 @@ Please update the PR description while still mentioning the issue somewhere.
 For example, you could write `Fixes (after beta backport) #NNN.`.
 
 As for further actions, please keep a sharp look-out for a PR whose title begins with
-`[beta]` or `[stable]` and which backports the PR in question. When that one gets
-merged, the relevant issue can be closed. The closing comment should mention all
-PRs that were involved. If you don't have the permissions to close the issue, please
+`[beta]` or `[stable]` and which backports the PR in question.
+When that one gets merged, the relevant issue can be closed.
+The closing comment should mention all PRs that were involved.
+If you don't have the permissions to close the issue, please
 leave a comment on the original PR asking the reviewer to close it for you.
 
 [labeling]: ./rustbot.md#issue-relabeling
@@ -330,11 +348,13 @@ leave a comment on the original PR asking the reviewer to close it for you.
 ### Reverting a PR
 
 When a PR leads to miscompile, significant performance regressions, or other critical issues, we may
-want to revert that PR with a regression test case. You can also check out the [revert policy] on
+want to revert that PR with a regression test case.
+You can also check out the [revert policy] on
 Forge docs (which is mainly targeted for reviewers, but contains useful info for PR authors too).
 
 If the PR contains huge changes, it can be challenging to revert, making it harder to review
-incremental fixes in subsequent updates. Or if certain code in that PR is heavily depended upon by
+incremental fixes in subsequent updates.
+Or if certain code in that PR is heavily depended upon by
 subsequent PRs, reverting it can become difficult.
 
 In such cases, we can identify the problematic code and disable it for some input, as shown in [#128271][#128271].
@@ -352,7 +372,8 @@ This section has moved to ["Using External Repositories"](./external-repos.md).
 
 ## Writing documentation
 
-Documentation improvements are very welcome. The source of `doc.rust-lang.org`
+Documentation improvements are very welcome.
+The source of `doc.rust-lang.org`
 is located in [`src/doc`] in the tree, and standard API documentation is generated
 from the source code itself (e.g. [`library/std/src/lib.rs`][std-root]). Documentation pull requests
 function in the same way as other pull requests.
@@ -370,8 +391,8 @@ Results should appear in `build/host/doc`, as well as automatically open in your
 See [Building Documentation](./building/compiler-documenting.md#building-documentation) for more
 information.
 
-You can also use `rustdoc` directly to check small fixes. For example,
-`rustdoc src/doc/reference.md` will render reference to `doc/reference.html`.
+You can also use `rustdoc` directly to check small fixes.
+For example, `rustdoc src/doc/reference.md` will render reference to `doc/reference.html`.
 The CSS might be messed up, but you can verify that the HTML is right.
 
 Please notice that we don't accept typography/spellcheck fixes to **internal documentation**
@@ -389,7 +410,8 @@ There are issues for beginners and advanced compiler devs alike!
 Just a few things to keep in mind:
 
 - Please try to avoid overly long lines and use semantic line breaks (where you break the line after each sentence).
-  There is no strict limit on line lengths; let the sentence or part of the sentence flow to its proper end on the same line.
+  There is no strict limit on line lengths;
+  let the sentence or part of the sentence flow to its proper end on the same line.
 
   You can use a tool in ci/sembr to help with this.
   Its help output can be seen with this command:
@@ -406,8 +428,7 @@ Just a few things to keep in mind:
     as change is a constant across the project.
 
   - The date the comment was added, e.g. instead of writing _"Currently, ..."_
-    or _"As of now, ..."_,
-    consider adding the date, in one of the following formats:
+    or _"As of now, ..."_, consider adding the date, in one of the following formats:
     - Jan 2021
     - January 2021
     - jan 2021
@@ -417,8 +438,7 @@ Just a few things to keep in mind:
     that generates a monthly report showing those that are over 6 months old
     ([example](https://github.com/rust-lang/rustc-dev-guide/issues/2052)).
 
-    For the action to pick the date,
-    add a special annotation before specifying the date:
+    For the action to pick the date, add a special annotation before specifying the date:
 
     ```md
     <!-- date-check --> Apr 2025
@@ -442,8 +462,7 @@ Just a few things to keep in mind:
     outdated.
 
 - If a text grows rather long (more than a few page scrolls) or complicated (more than four
-  subsections),
-  it might benefit from having a Table of Contents at the beginning,
+  subsections), it might benefit from having a Table of Contents at the beginning,
   which you can auto-generate by including the `<!-- toc -->` marker at the top.
 
 #### ⚠️ Note: Where to contribute `rustc-dev-guide` changes

From 2dee8525ac070eb1136d6df149ede247515416c7 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Tue, 4 Nov 2025 21:15:49 +0200
Subject: [PATCH 17/19] misc improvents to src/contributing.md

---
 src/doc/rustc-dev-guide/src/contributing.md | 43 ++++++++++-----------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/src/doc/rustc-dev-guide/src/contributing.md b/src/doc/rustc-dev-guide/src/contributing.md
index f74b344385640..cd6ebc90b7cb8 100644
--- a/src/doc/rustc-dev-guide/src/contributing.md
+++ b/src/doc/rustc-dev-guide/src/contributing.md
@@ -4,7 +4,7 @@
 
 While bugs are unfortunate, they're a reality in software.
 We can't fix what we don't know about, so please report liberally.
-If you're not sure if something is a bug or not, feel free to file a bug anyway.
+If you're not sure if something is a bug, feel free to open an issue anyway.
 
 **If you believe reporting your bug publicly represents a security risk to Rust users,
 please follow our [instructions for reporting security vulnerabilities][vuln]**.
@@ -77,12 +77,12 @@ Example of things that might require MCPs include major refactorings, changes
 to important types, or important changes to how the compiler does something, or
 smaller user-facing changes.
 
-**When in doubt, ask on [Zulip].
+**When in doubt, ask [on Zulip].
 It would be a shame to put a lot of work
 into a PR that ends up not getting merged!** [See this document][mcpinfo] for more info on MCPs.
 
 [mcpinfo]: https://forge.rust-lang.org/compiler/proposals-and-stabilization.html#how-do-i-submit-an-mcp
-[zulip]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler
+[on Zulip]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler
 
 ### Performance
 
@@ -109,10 +109,10 @@ The numbers are reported
 
 Pull requests (or PRs for short) are the primary mechanism we use to change Rust.
 GitHub itself has some [great documentation][about-pull-requests] on using the Pull Request feature.
-We use the "fork and pull" model [described here][development-models],
+We use the ["fork and pull" model][development-models],
 where contributors push changes to their personal fork and create pull requests to
 bring those changes into the source repository.
-We have more info about how to use git when contributing to Rust under [the git section](./git.md).
+We have [a chapter](git.md) on how to use Git when contributing to Rust.
 
 > **Advice for potentially large, complex, cross-cutting and/or very domain-specific changes**
 >
@@ -178,22 +178,19 @@ to review your request based on which files you changed.
 
 If you want to request that a specific person reviews your pull request, you
 can add an `r?` to the pull request description or in a comment.
-For example, if you want to ask a review to @awesome-reviewer, add
+For example, if you want to ask a review by @awesome-reviewer,
+add the following to the end of the pull request description:
 
-    r?
-    @awesome-reviewer
+    r? @awesome-reviewer
 
-to the end of the pull request description, and [@rustbot] will assign
-them instead of a random person.
+[@rustbot] will then assign the PR to that reviewer instead of a random person.
 This is entirely optional.
 
-You can also assign a random reviewer from a specific team by writing `r?
-rust-lang/groupname`.
+You can also assign a random reviewer from a specific team by writing `r? rust-lang/groupname`.
 As an example, if you were making a diagnostics change,
-then you could get a reviewer from the diagnostics team by adding:
+you could get a reviewer from the diagnostics team by adding:
 
-    r?
-    rust-lang/diagnostics
+    r? rust-lang/diagnostics
 
 For a full list of possible `groupname`s,
 check the `adhoc_groups` section at the [triagebot.toml config file],
@@ -275,12 +272,12 @@ Depending on the scale of the change, you may see a slightly different form of `
 
 The additional `rollup` tells [@bors] that this change should always be "rolled up".
 Changes that are rolled up are tested and merged alongside other PRs, to speed the process up.
-Typically only small changes that are expected not to conflict
+Typically, only small changes that are expected not to conflict
 with one another are marked as "always roll up".
 
 Be patient;
 this can take a while and the queue can sometimes be long.
-PRs are never merged by hand.
+Also, note that PRs are never merged by hand.
 
 [@rustbot]: https://github.com/rustbot
 [@bors]: https://github.com/bors
@@ -302,11 +299,11 @@ We recommend to make this check before every pull request (and every new commit
 you can add [git hooks] before every push to make sure you never forget to make this check.
 The CI will also run tidy and will fail if tidy fails.
 
-Rust follows a _no merge-commit policy_, meaning, when you encounter merge
-conflicts you are expected to always rebase instead of merging.
-E.g.
-always use rebase when bringing the latest changes from the master branch to your feature
-branch.
+Rust follows a _no merge-commit policy_,
+meaning that when you encounter merge conflicts,
+you are expected to always rebase instead of merging.
+For example,
+always use rebase when bringing the latest changes from the master branch to your feature branch.
 If your PR contains merge commits, it will get marked as `has-merge-commits`.
 Once you have removed the merge commits, e.g., through an interactive rebase, you
 should remove the label again:
@@ -424,7 +421,7 @@ Just a few things to keep in mind:
   and/or a reason so that the reader knows how much to trust the information.
   Aim to provide a reasonable amount of context, possibly including but not limited to:
 
-  - A reason for why the data may be out of date other than "change",
+  - A reason for why the text may be out of date other than "change",
     as change is a constant across the project.
 
   - The date the comment was added, e.g. instead of writing _"Currently, ..."_

From 56757ab89768801c6d7de559b8886f7b9b357613 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Tue, 4 Nov 2025 23:27:09 +0200
Subject: [PATCH 18/19] sembr: adjust after using src/contributing.md

---
 src/doc/rustc-dev-guide/ci/sembr/src/main.rs | 28 +++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
index edf5b6401cd9b..c056f68c31d78 100644
--- a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
+++ b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs
@@ -27,7 +27,7 @@ static REGEX_IGNORE: LazyLock<Regex> =
 static REGEX_IGNORE_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)$").unwrap());
 static REGEX_IGNORE_LINK_TARGETS: LazyLock<Regex> =
     LazyLock::new(|| Regex::new(r"^\[.+\]: ").unwrap());
-static REGEX_SPLIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)\s+").unwrap());
+static REGEX_SPLIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\.|[^r]\?|;|!)\s+").unwrap());
 
 fn main() -> Result<()> {
     let cli = Cli::parse();
@@ -92,7 +92,7 @@ fn display(header: &str, paths: &[PathBuf]) {
 
 fn ignore(line: &str, in_code_block: bool) -> bool {
     in_code_block
-        || line.contains("e.g.")
+        || line.to_lowercase().contains("e.g.")
         || line.contains("i.e.")
         || line.contains('|')
         || line.trim_start().starts_with('>')
@@ -174,7 +174,9 @@ fn test_sembr() {
 must! be; split?  and.   normalizes space
 1. ignore numbered
 ignore | tables
-ignore e.g. and i.e.
+ignore e.g. and
+ignore i.e. and
+ignore E.g. too
 - ignore. list
 * ignore. list
 ```
@@ -191,7 +193,9 @@ and.
 normalizes space
 1. ignore numbered
 ignore | tables
-ignore e.g. and i.e.
+ignore e.g. and
+ignore i.e. and
+ignore E.g. too
 - ignore. list
 * ignore. list
 ```
@@ -269,3 +273,19 @@ hi again.
     let processed = lengthen_lines(&processed, 50);
     assert_eq!(expected, processed);
 }
+
+#[test]
+fn test_sembr_question_mark() {
+    let original = "\
+o? whatever
+r? @reviewer
+ r? @reviewer
+";
+    let expected = "\
+o?
+whatever
+r? @reviewer
+ r? @reviewer
+";
+    assert_eq!(expected, comply(original));
+}

From 743b803fae62fbc7399d0e91a035758f4ee1dd92 Mon Sep 17 00:00:00 2001
From: Tshepang Mbambo <hopsi@tuta.io>
Date: Tue, 4 Nov 2025 23:30:58 +0200
Subject: [PATCH 19/19] date-check src/contributing.md

Had a look due to reviewing sembr changes
---
 src/doc/rustc-dev-guide/src/contributing.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/doc/rustc-dev-guide/src/contributing.md b/src/doc/rustc-dev-guide/src/contributing.md
index cd6ebc90b7cb8..b8614e5a90f0c 100644
--- a/src/doc/rustc-dev-guide/src/contributing.md
+++ b/src/doc/rustc-dev-guide/src/contributing.md
@@ -438,20 +438,20 @@ Just a few things to keep in mind:
     For the action to pick the date, add a special annotation before specifying the date:
 
     ```md
-    <!-- date-check --> Apr 2025
+    <!-- date-check --> Nov 2025
     ```
 
     Example:
 
     ```md
-    As of <!-- date-check --> Apr 2025, the foo did the bar.
+    As of <!-- date-check --> Nov 2025, the foo did the bar.
     ```
 
     For cases where the date should not be part of the visible rendered output,
     use the following instead:
 
     ```md
-    <!-- date-check: Apr 2025 -->
+    <!-- date-check: Nov 2025 -->
     ```
 
   - A link to a relevant WG, tracking issue, `rustc` rustdoc page, or similar, that may provide