Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions compiler/rustc_ast/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,27 +77,35 @@ pub struct FormatArguments {
arguments: Vec<FormatArgument>,
num_unnamed_args: usize,
num_explicit_args: usize,
names: FxHashMap<Symbol, usize>,
explicit_names: FxHashMap<Symbol, usize>,
}

impl FormatArguments {
pub fn new() -> Self {
Self {
arguments: Vec::new(),
names: FxHashMap::default(),
explicit_names: FxHashMap::default(),
num_unnamed_args: 0,
num_explicit_args: 0,
}
}

pub fn add(&mut self, arg: FormatArgument) -> usize {
let index = self.arguments.len();
if let Some(name) = arg.kind.ident() {
self.names.insert(name.name, index);
} else if self.names.is_empty() {
// Only count the unnamed args before the first named arg.
// (Any later ones are errors.)
self.num_unnamed_args += 1;
match arg.kind {
FormatArgumentKind::Normal => {
// Only count the unnamed args before the first named arg.
// (Any later ones are errors.)
if self.explicit_names.is_empty() {
self.num_unnamed_args += 1;
}
}
FormatArgumentKind::Named(ident) => {
self.explicit_names.insert(ident.name, index);
}
FormatArgumentKind::Captured(_) => {
// Don't record the name yet, to keep duplicate captures until AST->HIR lowering.
}
}
if !matches!(arg.kind, FormatArgumentKind::Captured(..)) {
// This is an explicit argument.
Expand All @@ -113,8 +121,8 @@ impl FormatArguments {
index
}

pub fn by_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> {
let i = *self.names.get(&name)?;
pub fn by_explicit_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> {
let i = *self.explicit_names.get(&name)?;
Some((i, &self.arguments[i]))
}

Expand Down
277 changes: 275 additions & 2 deletions compiler/rustc_ast_lowering/src/format.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,103 @@
use std::borrow::Cow;

use rustc_ast::*;
use rustc_data_structures::fx::FxIndexMap;
use rustc_hir as hir;
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
use rustc_hir::def::{DefKind, Res};
use rustc_hir::{self as hir};
use rustc_session::config::FmtDebug;
use rustc_span::{ByteSymbol, DesugaringKind, Ident, Span, Symbol, sym};

use super::LoweringContext;
use crate::ResolverAstLoweringExt;

/// Collect statistics about a `FormatArgs` for crater analysis.
fn collect_format_args_stats(fmt: &FormatArgs) -> FormatArgsStats {
let mut pieces = 0usize;
let mut placeholders = 0usize;
let mut width_args = 0usize;
let mut precision_args = 0usize;

for piece in &fmt.template {
match piece {
FormatArgsPiece::Literal(_) => pieces += 1,
FormatArgsPiece::Placeholder(ph) => {
placeholders += 1;
if let Some(FormatCount::Argument(_)) = &ph.format_options.width {
width_args += 1;
}
if let Some(FormatCount::Argument(_)) = &ph.format_options.precision {
precision_args += 1;
}
}
}
}

let mut positional = 0usize;
let mut named = 0usize;
let mut captured = 0usize;
let mut captured_names: FxHashMap<Symbol, usize> = FxHashMap::default();

for arg in fmt.arguments.all_args() {
match &arg.kind {
FormatArgumentKind::Normal => positional += 1,
FormatArgumentKind::Named(_) => named += 1,
FormatArgumentKind::Captured(ident) => {
captured += 1;
*captured_names.entry(ident.name).or_default() += 1;
}
}
}

let unique_captures = captured_names.len();
let duplicate_captures = captured - unique_captures;

// Count how many captures are duplicated 2x, 3x, 4x+.
let mut dup_2x = 0usize;
let mut dup_3x = 0usize;
let mut dup_4plus = 0usize;
#[allow(rustc::potential_query_instability)]
for (_name, count) in &captured_names {
match *count {
0 | 1 => {}
2 => dup_2x += 1,
3 => dup_3x += 1,
_ => dup_4plus += 1,
}
}

FormatArgsStats {
pieces,
placeholders,
width_args,
precision_args,
total_args: positional + named + captured,
positional,
named,
captured,
unique_captures,
duplicate_captures,
dup_2x,
dup_3x,
dup_4plus,
}
}

struct FormatArgsStats {
pieces: usize,
placeholders: usize,
width_args: usize,
precision_args: usize,
total_args: usize,
positional: usize,
named: usize,
captured: usize,
unique_captures: usize,
duplicate_captures: usize,
dup_2x: usize,
dup_3x: usize,
dup_4plus: usize,
}

impl<'hir, R: ResolverAstLoweringExt<'hir>> LoweringContext<'_, 'hir, R> {
pub(crate) fn lower_format_args(&mut self, sp: Span, fmt: &FormatArgs) -> hir::ExprKind<'hir> {
// Never call the const constructor of `fmt::Arguments` if the
Expand All @@ -26,9 +115,119 @@ impl<'hir, R: ResolverAstLoweringExt<'hir>> LoweringContext<'_, 'hir, R> {
fmt = flatten_format_args(fmt);
fmt = self.inline_literals(fmt);
}

// --- Crater instrumentation: collect stats before dedup ---
let before = collect_format_args_stats(&fmt);
let args_before_dedup = before.total_args;

fmt = self.dedup_captured_places(fmt);

// --- Crater instrumentation: collect stats after dedup ---
let args_after_dedup = fmt.arguments.all_args().len();
let deduped_by_opt = args_before_dedup.saturating_sub(args_after_dedup);
let not_deduped = before.duplicate_captures.saturating_sub(deduped_by_opt);

// Classify duplicated captures by resolution.
let (const_dups, constparam_dups, other_dups) =
self.classify_dup_captures(&fmt);

// The "old world" (current stable) arg count: all captures with the
// same name were deduplicated, so we count unique captures only.
let args_old_world = before.positional + before.named + before.unique_captures;
// Size estimates (bytes, assuming 64-bit: rt::Argument = 16 bytes).
let size_no_dedup = before.total_args * 16;
let size_old_world = args_old_world * 16;
let size_with_opt = args_after_dedup * 16;

eprintln!(
"[FMTARGS] {{\
\"p\":{},\"ph\":{},\"wa\":{},\"pa\":{},\
\"a\":{},\"pos\":{},\"named\":{},\"cap\":{},\
\"ucap\":{},\"dup\":{},\"const\":{},\"constparam\":{},\"other_dup\":{},\
\"d2\":{},\"d3\":{},\"d4p\":{},\
\"deduped\":{},\"remaining\":{},\
\"a_old\":{},\"a_opt\":{},\
\"sz_no_dedup\":{},\"sz_old\":{},\"sz_opt\":{}\
}}",
before.pieces,
before.placeholders,
before.width_args,
before.precision_args,
before.total_args,
before.positional,
before.named,
before.captured,
before.unique_captures,
before.duplicate_captures,
const_dups,
constparam_dups,
other_dups,
before.dup_2x,
before.dup_3x,
before.dup_4plus,
deduped_by_opt,
not_deduped,
args_old_world,
args_after_dedup,
size_no_dedup,
size_old_world,
size_with_opt,
);

expand_format_args(self, sp, &fmt, allow_const)
}

/// Classify duplicated captured arguments by their name
/// resolution. Returns (const_count, constparam_count,
/// other_count) counting the number of *extra* captures
/// (beyond the first) in each category.
fn classify_dup_captures(&self, fmt: &FormatArgs) -> (usize, usize, usize) {
let mut seen: FxHashMap<Symbol, usize> = FxHashMap::default();
let mut const_dups = 0usize;
let mut constparam_dups = 0usize;
let mut other_dups = 0usize;

for arg in fmt.arguments.all_args() {
if let FormatArgumentKind::Captured(ident) = &arg.kind {
let count = seen.entry(ident.name).or_default();
*count += 1;
if *count == 2 {
// First duplicate -- classify by resolution.
// (Subsequent duplicates of the same name
// are already counted by the dup_2x/3x/4p
// fields.)
if let Some(partial_res) =
self.resolver.get_partial_res(arg.expr.id)
&& let Some(res) = partial_res.full_res()
{
match res {
Res::Local(_)
| Res::Def(
DefKind::Static { .. },
_,
) => {
// Places -- handled by
// dedup_captured_places.
}
Res::Def(
DefKind::Const { .. }
| DefKind::AssocConst { .. },
_,
) => const_dups += 1,
Res::Def(
DefKind::ConstParam, _
) => constparam_dups += 1,
_ => other_dups += 1,
}
} else {
other_dups += 1;
}
}
}
}
(const_dups, constparam_dups, other_dups)
}

/// Try to convert a literal into an interned string
fn try_inline_lit(&self, lit: token::Lit) -> Option<Symbol> {
match LitKind::from_token_lit(lit) {
Expand Down Expand Up @@ -138,6 +337,80 @@ impl<'hir, R: ResolverAstLoweringExt<'hir>> LoweringContext<'_, 'hir, R> {

fmt
}

/// De-duplicate implicit captures of identifiers that refer to places.
///
/// Turns
///
/// `format_args!("Hello, {hello}, {hello}!")`
///
/// into
///
/// `format_args!("Hello, {hello}, {hello}!", hello=hello)`.
fn dedup_captured_places<'fmt>(&self, mut fmt: Cow<'fmt, FormatArgs>) -> Cow<'fmt, FormatArgs> {
use std::collections::hash_map::Entry;

let mut deduped_arg_indices: FxHashMap<Symbol, usize> = FxHashMap::default();
let mut remove = vec![false; fmt.arguments.all_args().len()];
let mut deduped_anything = false;

// Re-use arguments for placeholders capturing the same local/static identifier.
for i in 0..fmt.template.len() {
if let FormatArgsPiece::Placeholder(placeholder) = &fmt.template[i]
&& let Ok(arg_index) = placeholder.argument.index
&& let arg = &fmt.arguments.all_args()[arg_index]
&& let FormatArgumentKind::Captured(ident) = arg.kind
{
match deduped_arg_indices.entry(ident.name) {
Entry::Occupied(occupied_entry) => {
// We've seen this identifier before, and it's dedupable. Point the
// placeholder at the recorded arg index, cloning `fmt` if necessary.
let piece = &mut fmt.to_mut().template[i];
let FormatArgsPiece::Placeholder(placeholder) = piece else {
unreachable!();
};
placeholder.argument.index = Ok(*occupied_entry.get());
remove[arg_index] = true;
deduped_anything = true;
}
Entry::Vacant(vacant_entry) => {
// This is the first time we've seen a captured identifier. If it's a local
// or static, note the argument index so other occurrences can be deduped.
if let Some(partial_res) = self.resolver.get_partial_res(arg.expr.id)
&& let Some(res) = partial_res.full_res()
&& matches!(res, Res::Local(_) | Res::Def(DefKind::Static { .. }, _))
{
vacant_entry.insert(arg_index);
}
}
}
}
}

// Remove the arguments that were de-duplicated.
if deduped_anything {
let fmt = fmt.to_mut();

// Drop all the arguments that are marked for removal.
let mut remove_it = remove.iter();
fmt.arguments.all_args_mut().retain(|_| remove_it.next() != Some(&true));

// Calculate the mapping of old to new indexes for the remaining arguments.
let index_map: Vec<usize> = remove
.into_iter()
.scan(0, |i, remove| {
let mapped = *i;
*i += !remove as usize;
Some(mapped)
})
.collect();

// Correct the indexes that refer to arguments that have shifted position.
for_all_argument_indexes(&mut fmt.template, |index| *index = index_map[*index]);
}

fmt
}
}

/// Flattens nested `format_args!()` into one.
Expand Down
11 changes: 5 additions & 6 deletions compiler/rustc_builtin_macros/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ fn parse_args<'a>(ecx: &ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a,
p.bump();
p.expect(exp!(Eq))?;
let expr = p.parse_expr()?;
if let Some((_, prev)) = args.by_name(ident.name) {
if let Some((_, prev)) = args.by_explicit_name(ident.name) {
ecx.dcx().emit_err(errors::FormatDuplicateArg {
span: ident.span,
prev: prev.kind.ident().unwrap().span,
Expand Down Expand Up @@ -396,12 +396,11 @@ fn make_format_args(
}
Name(name, span) => {
let name = Symbol::intern(name);
if let Some((index, _)) = args.by_name(name) {
if let Some((index, _)) = args.by_explicit_name(name) {
// Name found in `args`, so we resolve it to its index.
if index < args.explicit_args().len() {
// Mark it as used, if it was an explicit argument.
used[index] = true;
}
assert!(index < args.explicit_args().len());
// Mark it as used, as this is an explicit argument.
used[index] = true;
Ok(index)
} else {
// Name not found in `args`, so we add it as an implicitly captured argument.
Expand Down
Loading
Loading