-
-
Notifications
You must be signed in to change notification settings - Fork 14.6k
format_args!: only de-duplicate captured identifiers that refer to places
#152480
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,9 @@ | ||
| use std::borrow::Cow; | ||
|
|
||
| use rustc_ast::*; | ||
| use rustc_data_structures::fx::FxIndexMap; | ||
| use rustc_hir as hir; | ||
| use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; | ||
| use rustc_hir::def::{DefKind, Res}; | ||
| use rustc_hir::{self as hir}; | ||
| use rustc_session::config::FmtDebug; | ||
| use rustc_span::{ByteSymbol, DesugaringKind, Ident, Span, Symbol, sym}; | ||
|
|
||
|
|
@@ -25,6 +26,7 @@ impl<'hir> LoweringContext<'_, 'hir> { | |
| fmt = flatten_format_args(fmt); | ||
| fmt = self.inline_literals(fmt); | ||
| } | ||
| fmt = self.dedup_captured_places(fmt); | ||
| expand_format_args(self, sp, &fmt, allow_const) | ||
| } | ||
|
|
||
|
|
@@ -137,6 +139,80 @@ impl<'hir> LoweringContext<'_, 'hir> { | |
|
|
||
| fmt | ||
| } | ||
|
|
||
| /// De-duplicate implicit captures of identifiers that refer to places. | ||
| /// | ||
| /// Turns | ||
| /// | ||
| /// `format_args!("Hello, {hello}, {hello}!")` | ||
| /// | ||
| /// into | ||
| /// | ||
| /// `format_args!("Hello, {hello}, {hello}!", hello=hello)`. | ||
| fn dedup_captured_places<'fmt>(&self, mut fmt: Cow<'fmt, FormatArgs>) -> Cow<'fmt, FormatArgs> { | ||
| use std::collections::hash_map::Entry; | ||
|
|
||
| let mut deduped_arg_indices: FxHashMap<Symbol, usize> = FxHashMap::default(); | ||
| let mut remove = vec![false; fmt.arguments.all_args().len()]; | ||
| let mut deduped_anything = false; | ||
|
|
||
| // Re-use arguments for placeholders capturing the same local/static identifier. | ||
| for i in 0..fmt.template.len() { | ||
| if let FormatArgsPiece::Placeholder(placeholder) = &fmt.template[i] | ||
| && let Ok(arg_index) = placeholder.argument.index | ||
| && let arg = &fmt.arguments.all_args()[arg_index] | ||
| && let FormatArgumentKind::Captured(ident) = arg.kind | ||
| { | ||
| match deduped_arg_indices.entry(ident.name) { | ||
| Entry::Occupied(occupied_entry) => { | ||
| // We've seen this identifier before, and it's dedupable. Point the | ||
| // placeholder at the recorded arg index, cloning `fmt` if necessary. | ||
| let piece = &mut fmt.to_mut().template[i]; | ||
| let FormatArgsPiece::Placeholder(placeholder) = piece else { | ||
| unreachable!(); | ||
| }; | ||
| placeholder.argument.index = Ok(*occupied_entry.get()); | ||
| remove[arg_index] = true; | ||
| deduped_anything = true; | ||
| } | ||
| Entry::Vacant(vacant_entry) => { | ||
| // This is the first time we've seen a captured identifier. If it's a local | ||
| // or static, note the argument index so other occurrences can be deduped. | ||
| if let Some(partial_res) = self.resolver.partial_res_map.get(&arg.expr.id) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be the first use of
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a separate optimization pass, which is what makes it seem reasonable to me. That said, it would be helpful to understand better what aspect of this optimization pass you see as a maintenance issue.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In particular, fomat_args already has magic optimization passes, doesn't it? If format arguments are constants, they can get "flattened".
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, and these optimization passes already has user-visible effects: https://doc.rust-lang.org/1.93.1/reference/expressions.html#r-expr.super-macros.format_args.super-temporaries
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ouch, that's messy... though at least it seems to only affect whether a program compiles, not how it behaves? |
||
| && let Some(res) = partial_res.full_res() | ||
| && matches!(res, Res::Local(_) | Res::Def(DefKind::Static { .. }, _)) | ||
| { | ||
| vacant_entry.insert(arg_index); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Remove the arguments that were de-duplicated. | ||
| if deduped_anything { | ||
| let fmt = fmt.to_mut(); | ||
|
|
||
| // Drop all the arguments that are marked for removal. | ||
| let mut remove_it = remove.iter(); | ||
| fmt.arguments.all_args_mut().retain(|_| remove_it.next() != Some(&true)); | ||
|
|
||
| // Calculate the mapping of old to new indexes for the remaining arguments. | ||
| let index_map: Vec<usize> = remove | ||
| .into_iter() | ||
| .scan(0, |i, remove| { | ||
| let mapped = *i; | ||
| *i += !remove as usize; | ||
| Some(mapped) | ||
| }) | ||
| .collect(); | ||
|
|
||
| // Correct the indexes that refer to arguments that have shifted position. | ||
| for_all_argument_indexes(&mut fmt.template, |index| *index = index_map[*index]); | ||
| } | ||
|
Comment on lines
+192
to
+212
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was all copy-pasted from
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dianne Captured args aren't always last: let x = 42;
println!("{x} {} {x}", "==");
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By last, I mean in fmt
.arguments
.all_args_mut()
.extract_if(fmt.arguments.num_explicit_args.., |_| /* ... */)
.for_each(drop)It feels excessive, and it would probably need a helper on |
||
|
|
||
| fmt | ||
| } | ||
| } | ||
|
|
||
| /// Flattens nested `format_args!()` into one. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| //! Test for <https://github.com/rust-lang/rust/issues/145739>: identifiers referring to places | ||
| //! should have their captures de-duplicated by `format_args!`, but identifiers referring to values | ||
| //! should not be de-duplicated. | ||
| extern crate std; | ||
| #[prelude_import] | ||
| use ::std::prelude::rust_2015::*; | ||
| //@ pretty-compare-only | ||
| //@ pretty-mode:hir | ||
| //@ pp-exact:hir-format-args-duplicated-captures.pp | ||
|
|
||
| const X: i32 = 42; | ||
|
|
||
| struct Struct; | ||
|
|
||
| static STATIC: i32 = 0; | ||
|
|
||
| fn main() { | ||
| // Consts and constructors refer to values. Whether we construct them multiple times or just | ||
| // once can be observed in some cases, so we don't de-duplicate them. | ||
| let _ = | ||
| { | ||
| super let args = (&X, &X); | ||
| super let args = | ||
| [format_argument::new_display(args.0), | ||
| format_argument::new_display(args.1)]; | ||
| unsafe { format_arguments::new(b"\xc0\x01 \xc0\x00", &args) } | ||
| }; | ||
| let _ = | ||
| { | ||
| super let args = (&Struct, &Struct); | ||
| super let args = | ||
| [format_argument::new_display(args.0), | ||
| format_argument::new_display(args.1)]; | ||
| unsafe { format_arguments::new(b"\xc0\x01 \xc0\x00", &args) } | ||
| }; | ||
|
|
||
| // Variables and statics refer to places. We can de-duplicate without an observable difference. | ||
| let x = 3; | ||
| let _ = | ||
| { | ||
| super let args = (&STATIC,); | ||
| super let args = [format_argument::new_display(args.0)]; | ||
| unsafe { | ||
| format_arguments::new(b"\xc0\x01 \xc8\x00\x00\x00", &args) | ||
| } | ||
| }; | ||
| let _ = | ||
| { | ||
| super let args = (&x,); | ||
| super let args = [format_argument::new_display(args.0)]; | ||
| unsafe { | ||
| format_arguments::new(b"\xc0\x01 \xc8\x00\x00\x00", &args) | ||
| } | ||
| }; | ||
|
|
||
| // We don't de-duplicate widths or precisions since de-duplication can be observed. | ||
| let _ = | ||
| { | ||
| super let args = (&x, &x, &x); | ||
| super let args = | ||
| [format_argument::new_display(args.0), | ||
| format_argument::from_usize(args.1), | ||
| format_argument::from_usize(args.2)]; | ||
|
Comment on lines
+56
to
+63
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we could get this down to only capturing
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't try to do that optimization in this PR since I think de-duplicating the |
||
| unsafe { | ||
| format_arguments::new(b"\xd3 \x00\x00h\x01\x00\x01 \xdb \x00\x00h\x02\x00\x00\x00\x00", | ||
| &args) | ||
| } | ||
| }; | ||
| let _ = | ||
| { | ||
| super let args = (&0.0, &x, &x); | ||
| super let args = | ||
| [format_argument::new_display(args.0), | ||
| format_argument::from_usize(args.1), | ||
| format_argument::from_usize(args.2)]; | ||
| unsafe { | ||
| format_arguments::new(b"\xe5 \x00\x00p\x01\x00\x01 \xed \x00\x00p\x02\x00\x00\x00\x00", | ||
| &args) | ||
| } | ||
| }; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| //! Test for <https://github.com/rust-lang/rust/issues/145739>: identifiers referring to places | ||
| //! should have their captures de-duplicated by `format_args!`, but identifiers referring to values | ||
| //! should not be de-duplicated. | ||
| //@ pretty-compare-only | ||
| //@ pretty-mode:hir | ||
| //@ pp-exact:hir-format-args-duplicated-captures.pp | ||
|
|
||
| const X: i32 = 42; | ||
|
|
||
| struct Struct; | ||
|
|
||
| static STATIC: i32 = 0; | ||
|
|
||
| fn main() { | ||
| // Consts and constructors refer to values. Whether we construct them multiple times or just | ||
| // once can be observed in some cases, so we don't de-duplicate them. | ||
| let _ = format_args!("{X} {X}"); | ||
| let _ = format_args!("{Struct} {Struct}"); | ||
|
|
||
| // Variables and statics refer to places. We can de-duplicate without an observable difference. | ||
| let x = 3; | ||
| let _ = format_args!("{STATIC} {STATIC}"); | ||
| let _ = format_args!("{x} {x}"); | ||
|
|
||
| // We don't de-duplicate widths or precisions since de-duplication can be observed. | ||
| let _ = format_args!("{x:x$} {x:x$}"); | ||
| let _ = format_args!("{0:.x$} {0:.x$}", 0.0); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| //! Test for <https://github.com/rust-lang/rust/issues/145739>: width and precision arguments | ||
| //! implicitly captured by `format_args!` should behave as if they were written individually. | ||
| //@ run-pass | ||
| use std::ops::Deref; | ||
| use std::sync::atomic::{AtomicUsize, Ordering}; | ||
|
|
||
| static DEREF_COUNTER: AtomicUsize = AtomicUsize::new(0); | ||
|
|
||
| struct LogDeref; | ||
| impl Deref for LogDeref { | ||
| type Target = usize; | ||
| fn deref(&self) -> &usize { | ||
| if DEREF_COUNTER.fetch_add(1, Ordering::Relaxed).is_multiple_of(2) { | ||
| &2 | ||
| } else { | ||
| &3 | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn main() { | ||
| assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 0); | ||
|
|
||
| let x = 0.0; | ||
|
|
||
| let _ = format_args!("{x:LogDeref$} {x:LogDeref$}"); | ||
| // Increased by 2, as `&LogDeref` is coerced to a `&usize` twice. | ||
| assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 2); | ||
|
|
||
| let _ = format_args!("{x:.LogDeref$} {x:.LogDeref$}"); | ||
| // Increased by 2, as `&LogDeref` is coerced to a `&usize` twice. | ||
| assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 4); | ||
|
|
||
| let _ = format_args!("{x:LogDeref$} {x:.LogDeref$}"); | ||
| // Increased by 2, as `&LogDeref` is coerced to a `&usize` twice. | ||
| assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 6); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The de-duplication happens here as a separate pass, heavily based on the unstable pass to flatten nested
format_args!and inline literals.