Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions compiler/rustc_ast/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,27 +77,35 @@ pub struct FormatArguments {
arguments: Vec<FormatArgument>,
num_unnamed_args: usize,
num_explicit_args: usize,
names: FxHashMap<Symbol, usize>,
explicit_names: FxHashMap<Symbol, usize>,
}

impl FormatArguments {
pub fn new() -> Self {
Self {
arguments: Vec::new(),
names: FxHashMap::default(),
explicit_names: FxHashMap::default(),
num_unnamed_args: 0,
num_explicit_args: 0,
}
}

pub fn add(&mut self, arg: FormatArgument) -> usize {
let index = self.arguments.len();
if let Some(name) = arg.kind.ident() {
self.names.insert(name.name, index);
} else if self.names.is_empty() {
// Only count the unnamed args before the first named arg.
// (Any later ones are errors.)
self.num_unnamed_args += 1;
match arg.kind {
FormatArgumentKind::Normal => {
// Only count the unnamed args before the first named arg.
// (Any later ones are errors.)
if self.explicit_names.is_empty() {
self.num_unnamed_args += 1;
}
}
FormatArgumentKind::Named(ident) => {
self.explicit_names.insert(ident.name, index);
}
FormatArgumentKind::Captured(_) => {
// Don't record the name yet, to keep duplicate captures until AST->HIR lowering.
}
}
if !matches!(arg.kind, FormatArgumentKind::Captured(..)) {
// This is an explicit argument.
Expand All @@ -113,8 +121,8 @@ impl FormatArguments {
index
}

pub fn by_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> {
let i = *self.names.get(&name)?;
pub fn by_explicit_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> {
let i = *self.explicit_names.get(&name)?;
Some((i, &self.arguments[i]))
}

Expand Down
80 changes: 78 additions & 2 deletions compiler/rustc_ast_lowering/src/format.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use std::borrow::Cow;

use rustc_ast::*;
use rustc_data_structures::fx::FxIndexMap;
use rustc_hir as hir;
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
use rustc_hir::def::{DefKind, Res};
use rustc_hir::{self as hir};
use rustc_session::config::FmtDebug;
use rustc_span::{ByteSymbol, DesugaringKind, Ident, Span, Symbol, sym};

Expand All @@ -25,6 +26,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
fmt = flatten_format_args(fmt);
fmt = self.inline_literals(fmt);
}
fmt = self.dedup_captured_places(fmt);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The de-duplication happens here as a separate pass, heavily based on the unstable pass to flatten nested format_args! and inline literals.

expand_format_args(self, sp, &fmt, allow_const)
}

Expand Down Expand Up @@ -137,6 +139,80 @@ impl<'hir> LoweringContext<'_, 'hir> {

fmt
}

/// De-duplicate implicit captures of identifiers that refer to places.
///
/// Turns
///
/// `format_args!("Hello, {hello}, {hello}!")`
///
/// into
///
/// `format_args!("Hello, {hello}, {hello}!", hello=hello)`.
fn dedup_captured_places<'fmt>(&self, mut fmt: Cow<'fmt, FormatArgs>) -> Cow<'fmt, FormatArgs> {
use std::collections::hash_map::Entry;

let mut deduped_arg_indices: FxHashMap<Symbol, usize> = FxHashMap::default();
let mut remove = vec![false; fmt.arguments.all_args().len()];
let mut deduped_anything = false;

// Re-use arguments for placeholders capturing the same local/static identifier.
for i in 0..fmt.template.len() {
if let FormatArgsPiece::Placeholder(placeholder) = &fmt.template[i]
&& let Ok(arg_index) = placeholder.argument.index
&& let arg = &fmt.arguments.all_args()[arg_index]
&& let FormatArgumentKind::Captured(ident) = arg.kind
{
match deduped_arg_indices.entry(ident.name) {
Entry::Occupied(occupied_entry) => {
// We've seen this identifier before, and it's dedupable. Point the
// placeholder at the recorded arg index, cloning `fmt` if necessary.
let piece = &mut fmt.to_mut().template[i];
let FormatArgsPiece::Placeholder(placeholder) = piece else {
unreachable!();
};
placeholder.argument.index = Ok(*occupied_entry.get());
remove[arg_index] = true;
deduped_anything = true;
}
Entry::Vacant(vacant_entry) => {
// This is the first time we've seen a captured identifier. If it's a local
// or static, note the argument index so other occurrences can be deduped.
if let Some(partial_res) = self.resolver.partial_res_map.get(&arg.expr.id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be the first use of self.resolver in format_args. I understand that's the entire point of this PR, but that's exactly the thing that I think is problematic. This is what would make format_args too magical for me.

Copy link
Member

@joshtriplett joshtriplett Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a separate optimization pass, which is what makes it seem reasonable to me. That said, it would be helpful to understand better what aspect of this optimization pass you see as a maintenance issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In particular, fomat_args already has magic optimization passes, doesn't it? If format arguments are constants, they can get "flattened".

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ouch, that's messy... though at least it seems to only affect whether a program compiles, not how it behaves?

&& let Some(res) = partial_res.full_res()
&& matches!(res, Res::Local(_) | Res::Def(DefKind::Static { .. }, _))
{
vacant_entry.insert(arg_index);
}
}
}
}
}

// Remove the arguments that were de-duplicated.
if deduped_anything {
let fmt = fmt.to_mut();

// Drop all the arguments that are marked for removal.
let mut remove_it = remove.iter();
fmt.arguments.all_args_mut().retain(|_| remove_it.next() != Some(&true));

// Calculate the mapping of old to new indexes for the remaining arguments.
let index_map: Vec<usize> = remove
.into_iter()
.scan(0, |i, remove| {
let mapped = *i;
*i += !remove as usize;
Some(mapped)
})
.collect();

// Correct the indexes that refer to arguments that have shifted position.
for_all_argument_indexes(&mut fmt.template, |index| *index = index_map[*index]);
}
Comment on lines +192 to +212
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was all copy-pasted from inline_literals. It should either be factored out or made more specialized. E.g. here it doesn't have to iterate through all arguments, since captured arguments should always be last, I think? Probably there's plenty of other things it could do to be more clever.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dianne Captured args aren't always last:

let x = 42;
println!("{x} {} {x}", "==");

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By last, I mean in fmt.arguments's Vec. To my understanding, positional arguments come first, then named arguments, and then captured arguments are last. Since only captured arguments would be removed by the de-duplication pass, we could do something like (ignoring privacy, readability, and other practical concerns)

fmt
    .arguments
    .all_args_mut()
    .extract_if(fmt.arguments.num_explicit_args.., |_| /* ... */)
    .for_each(drop)

It feels excessive, and it would probably need a helper on rustc_ast::FormatArguments to avoid exposing to the rest of rustc that arguments are sorted in that way, but in theory it would avoid some iteration.


fmt
}
}

/// Flattens nested `format_args!()` into one.
Expand Down
11 changes: 5 additions & 6 deletions compiler/rustc_builtin_macros/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ fn parse_args<'a>(ecx: &ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a,
p.bump();
p.expect(exp!(Eq))?;
let expr = p.parse_expr()?;
if let Some((_, prev)) = args.by_name(ident.name) {
if let Some((_, prev)) = args.by_explicit_name(ident.name) {
ecx.dcx().emit_err(errors::FormatDuplicateArg {
span: ident.span,
prev: prev.kind.ident().unwrap().span,
Expand Down Expand Up @@ -396,12 +396,11 @@ fn make_format_args(
}
Name(name, span) => {
let name = Symbol::intern(name);
if let Some((index, _)) = args.by_name(name) {
if let Some((index, _)) = args.by_explicit_name(name) {
// Name found in `args`, so we resolve it to its index.
if index < args.explicit_args().len() {
// Mark it as used, if it was an explicit argument.
used[index] = true;
}
assert!(index < args.explicit_args().len());
// Mark it as used, as this is an explicit argument.
used[index] = true;
Ok(index)
} else {
// Name not found in `args`, so we add it as an implicitly captured argument.
Expand Down
81 changes: 81 additions & 0 deletions tests/pretty/hir-format-args-duplicated-captures.pp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
//! Test for <https://github.com/rust-lang/rust/issues/145739>: identifiers referring to places
//! should have their captures de-duplicated by `format_args!`, but identifiers referring to values
//! should not be de-duplicated.
extern crate std;
#[prelude_import]
use ::std::prelude::rust_2015::*;
//@ pretty-compare-only
//@ pretty-mode:hir
//@ pp-exact:hir-format-args-duplicated-captures.pp

const X: i32 = 42;

struct Struct;

static STATIC: i32 = 0;

fn main() {
// Consts and constructors refer to values. Whether we construct them multiple times or just
// once can be observed in some cases, so we don't de-duplicate them.
let _ =
{
super let args = (&X, &X);
super let args =
[format_argument::new_display(args.0),
format_argument::new_display(args.1)];
unsafe { format_arguments::new(b"\xc0\x01 \xc0\x00", &args) }
};
let _ =
{
super let args = (&Struct, &Struct);
super let args =
[format_argument::new_display(args.0),
format_argument::new_display(args.1)];
unsafe { format_arguments::new(b"\xc0\x01 \xc0\x00", &args) }
};

// Variables and statics refer to places. We can de-duplicate without an observable difference.
let x = 3;
let _ =
{
super let args = (&STATIC,);
super let args = [format_argument::new_display(args.0)];
unsafe {
format_arguments::new(b"\xc0\x01 \xc8\x00\x00\x00", &args)
}
};
let _ =
{
super let args = (&x,);
super let args = [format_argument::new_display(args.0)];
unsafe {
format_arguments::new(b"\xc0\x01 \xc8\x00\x00\x00", &args)
}
};

// We don't de-duplicate widths or precisions since de-duplication can be observed.
let _ =
{
super let args = (&x, &x, &x);
super let args =
[format_argument::new_display(args.0),
format_argument::from_usize(args.1),
format_argument::from_usize(args.2)];
Comment on lines +56 to +63
Copy link
Contributor Author

@dianne dianne Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could get this down to only capturing &x once, but it'd still need two separate from_usize calls to always be the same as providing the width two separate times. This is similar to my understanding of why {a.b} is tricky to fully de-duplicate: if x has an impure/pathological impl Deref<Target = usize>, the deref coercion of &x to a &usize could return different values each time (or modify global state, etc.).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't try to do that optimization in this PR since I think de-duplicating the from_usize calls would mean adding hacks to more central parts of the format_args! expansion.

unsafe {
format_arguments::new(b"\xd3 \x00\x00h\x01\x00\x01 \xdb \x00\x00h\x02\x00\x00\x00\x00",
&args)
}
};
let _ =
{
super let args = (&0.0, &x, &x);
super let args =
[format_argument::new_display(args.0),
format_argument::from_usize(args.1),
format_argument::from_usize(args.2)];
unsafe {
format_arguments::new(b"\xe5 \x00\x00p\x01\x00\x01 \xed \x00\x00p\x02\x00\x00\x00\x00",
&args)
}
};
}
28 changes: 28 additions & 0 deletions tests/pretty/hir-format-args-duplicated-captures.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//! Test for <https://github.com/rust-lang/rust/issues/145739>: identifiers referring to places
//! should have their captures de-duplicated by `format_args!`, but identifiers referring to values
//! should not be de-duplicated.
//@ pretty-compare-only
//@ pretty-mode:hir
//@ pp-exact:hir-format-args-duplicated-captures.pp

const X: i32 = 42;

struct Struct;

static STATIC: i32 = 0;

fn main() {
// Consts and constructors refer to values. Whether we construct them multiple times or just
// once can be observed in some cases, so we don't de-duplicate them.
let _ = format_args!("{X} {X}");
let _ = format_args!("{Struct} {Struct}");

// Variables and statics refer to places. We can de-duplicate without an observable difference.
let x = 3;
let _ = format_args!("{STATIC} {STATIC}");
let _ = format_args!("{x} {x}");

// We don't de-duplicate widths or precisions since de-duplication can be observed.
let _ = format_args!("{x:x$} {x:x$}");
let _ = format_args!("{0:.x$} {0:.x$}", 0.0);
}
37 changes: 37 additions & 0 deletions tests/ui/deref/format-args-capture-deref-coercion.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//! Test for <https://github.com/rust-lang/rust/issues/145739>: width and precision arguments
//! implicitly captured by `format_args!` should behave as if they were written individually.
//@ run-pass
use std::ops::Deref;
use std::sync::atomic::{AtomicUsize, Ordering};

static DEREF_COUNTER: AtomicUsize = AtomicUsize::new(0);

struct LogDeref;
impl Deref for LogDeref {
type Target = usize;
fn deref(&self) -> &usize {
if DEREF_COUNTER.fetch_add(1, Ordering::Relaxed).is_multiple_of(2) {
&2
} else {
&3
}
}
}

fn main() {
assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 0);

let x = 0.0;

let _ = format_args!("{x:LogDeref$} {x:LogDeref$}");
// Increased by 2, as `&LogDeref` is coerced to a `&usize` twice.
assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 2);

let _ = format_args!("{x:.LogDeref$} {x:.LogDeref$}");
// Increased by 2, as `&LogDeref` is coerced to a `&usize` twice.
assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 4);

let _ = format_args!("{x:LogDeref$} {x:.LogDeref$}");
// Increased by 2, as `&LogDeref` is coerced to a `&usize` twice.
assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 6);
}
Loading
Loading