From 8c62dc1f7846cd15990f1fe87a1c4e893b321d2b Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 3 Mar 2024 15:34:22 +0300 Subject: [PATCH 1/2] rustc_span: Add conveniences for working with span formats Centralize span ctxt updates in Span::update_ctxt. Stop requiring inline and interned ctxts in partially interned format to be synchronized. --- compiler/rustc_expand/src/mbe/transcribe.rs | 10 +- compiler/rustc_span/src/lib.rs | 26 ++- compiler/rustc_span/src/span_encoding.rs | 234 +++++++++++++------- 3 files changed, 173 insertions(+), 97 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index e1f50876b0588..7e6508c488a0d 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -30,11 +30,11 @@ impl MutVisitor for Marker { // it's some advanced case with macro-generated macros. So if we cache the marked version // of that context once, we'll typically have a 100% cache hit rate after that. let Marker(expn_id, transparency, ref mut cache) = *self; - let data = span.data(); - let marked_ctxt = *cache - .entry(data.ctxt) - .or_insert_with(|| data.ctxt.apply_mark(expn_id.to_expn_id(), transparency)); - *span = data.with_ctxt(marked_ctxt); + span.update_ctxt(|ctxt| { + *cache + .entry(ctxt) + .or_insert_with(|| ctxt.apply_mark(expn_id.to_expn_id(), transparency)) + }); } } diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index b2ca01fe3b94c..f1d7fea92a56c 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -520,8 +520,9 @@ impl SpanData { pub fn with_hi(&self, hi: BytePos) -> Span { Span::new(self.lo, hi, self.ctxt, self.parent) } + /// Avoid if possible, `Span::update_ctxt` should be preferred. #[inline] - pub fn with_ctxt(&self, ctxt: SyntaxContext) -> Span { + fn with_ctxt(&self, ctxt: SyntaxContext) -> Span { Span::new(self.lo, self.hi, ctxt, self.parent) } #[inline] @@ -576,8 +577,9 @@ impl Span { self.data().with_hi(hi) } #[inline] - pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span { - self.data_untracked().with_ctxt(ctxt) + pub fn with_ctxt(mut self, ctxt: SyntaxContext) -> Span { + self.update_ctxt(|_| ctxt); + self } #[inline] pub fn parent(self) -> Option { @@ -1051,9 +1053,9 @@ impl Span { } #[inline] - pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span { - let span = self.data(); - span.with_ctxt(span.ctxt.apply_mark(expn_id, transparency)) + pub fn apply_mark(mut self, expn_id: ExpnId, transparency: Transparency) -> Span { + self.update_ctxt(|ctxt| ctxt.apply_mark(expn_id, transparency)); + self } #[inline] @@ -1101,15 +1103,15 @@ impl Span { } #[inline] - pub fn normalize_to_macros_2_0(self) -> Span { - let span = self.data(); - span.with_ctxt(span.ctxt.normalize_to_macros_2_0()) + pub fn normalize_to_macros_2_0(mut self) -> Span { + self.update_ctxt(|ctxt| ctxt.normalize_to_macros_2_0()); + self } #[inline] - pub fn normalize_to_macro_rules(self) -> Span { - let span = self.data(); - span.with_ctxt(span.ctxt.normalize_to_macro_rules()) + pub fn normalize_to_macro_rules(mut self) -> Span { + self.update_ctxt(|ctxt| ctxt.normalize_to_macro_rules()); + self } } diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index 788a52faf5688..be4a72896d761 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -4,10 +4,10 @@ use crate::SPAN_TRACK; use crate::{BytePos, SpanData}; use rustc_data_structures::fx::FxIndexSet; - // This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. // See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 use rustc_serialize::int_overflow::DebugStrictAdd; +use std::mem::transmute; /// A compressed span. /// @@ -87,6 +87,106 @@ pub struct Span { ctxt_or_parent_or_marker: u16, } +// Convenience structures for all span formats. +#[derive(Clone, Copy)] +struct InlineCtxt { + lo: u32, + len: u16, + ctxt: u16, +} + +#[derive(Clone, Copy)] +struct InlineParent { + lo: u32, + len_with_tag: u16, + parent: u16, +} + +#[derive(Clone, Copy)] +struct PartiallyInterned { + index: u32, + _marker1: u16, + ctxt: u16, +} + +#[derive(Clone, Copy)] +struct Interned { + index: u32, + _marker1: u16, + _marker2: u16, +} + +enum Fmt<'a> { + InlineCtxt(&'a mut InlineCtxt), + InlineParent(&'a mut InlineParent), + PartiallyInterned(&'a mut PartiallyInterned), + Interned(&'a mut Interned), +} + +impl InlineCtxt { + fn data(self) -> SpanData { + let len = self.len as u32; + debug_assert!(len <= MAX_LEN); + SpanData { + lo: BytePos(self.lo), + hi: BytePos(self.lo.debug_strict_add(len)), + ctxt: SyntaxContext::from_u32(self.ctxt as u32), + parent: None, + } + } + fn to_span(self) -> Span { + unsafe { transmute(self) } + } +} + +impl InlineParent { + fn data(self) -> SpanData { + let len = (self.len_with_tag & !PARENT_TAG) as u32; + debug_assert!(len <= MAX_LEN); + SpanData { + lo: BytePos(self.lo), + hi: BytePos(self.lo.debug_strict_add(len)), + ctxt: SyntaxContext::root(), + parent: Some(LocalDefId { local_def_index: DefIndex::from_u32(self.parent as u32) }), + } + } + fn to_span(self) -> Span { + unsafe { transmute(self) } + } +} + +impl PartiallyInterned { + fn data(self) -> SpanData { + SpanData { + ctxt: SyntaxContext::from_u32(self.ctxt as u32), + ..with_span_interner(|interner| interner.spans[self.index as usize]) + } + } + fn to_span(self) -> Span { + unsafe { transmute(self) } + } +} + +impl Interned { + fn data(self) -> SpanData { + with_span_interner(|interner| interner.spans[self.index as usize]) + } + fn to_span(self) -> Span { + unsafe { transmute(self) } + } +} + +impl Fmt<'_> { + fn data(self) -> SpanData { + match self { + Fmt::InlineCtxt(span) => span.data(), + Fmt::InlineParent(span) => span.data(), + Fmt::PartiallyInterned(span) => span.data(), + Fmt::Interned(span) => span.data(), + } + } +} + // `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from // `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.) const MAX_LEN: u32 = 0b0111_1111_1111_1110; @@ -111,42 +211,47 @@ impl Span { std::mem::swap(&mut lo, &mut hi); } - let (lo2, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32()); - + // Small len may enable one of fully inline formats (or may not). + let (len, ctxt32) = (hi.0 - lo.0, ctxt.as_u32()); if len <= MAX_LEN { - if ctxt2 <= MAX_CTXT && parent.is_none() { - // Inline-context format. - return Span { - lo_or_index: lo2, - len_with_tag_or_marker: len as u16, - ctxt_or_parent_or_marker: ctxt2 as u16, - }; - } else if ctxt2 == SyntaxContext::root().as_u32() + if ctxt32 <= MAX_CTXT && parent.is_none() { + return InlineCtxt { lo: lo.0, len: len as u16, ctxt: ctxt32 as u16 }.to_span(); + } else if ctxt32 == 0 && let Some(parent) = parent - && let parent2 = parent.local_def_index.as_u32() - && parent2 <= MAX_CTXT + && let parent32 = parent.local_def_index.as_u32() + && parent32 <= MAX_CTXT { - // Inline-parent format. - return Span { - lo_or_index: lo2, - len_with_tag_or_marker: PARENT_TAG | len as u16, - ctxt_or_parent_or_marker: parent2 as u16, - }; + let len_with_tag = PARENT_TAG | len as u16; + return InlineParent { lo: lo.0, len_with_tag, parent: parent32 as u16 }.to_span(); } } - // Partially-interned or fully-interned format. - let index = - with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent })); - let ctxt_or_parent_or_marker = if ctxt2 <= MAX_CTXT { - ctxt2 as u16 // partially-interned - } else { - CTXT_INTERNED_MARKER // fully-interned + // Otherwise small ctxt may enable the partially inline format. + let index = |ctxt| { + with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent })) }; - Span { - lo_or_index: index, - len_with_tag_or_marker: BASE_LEN_INTERNED_MARKER, - ctxt_or_parent_or_marker, + if ctxt32 <= MAX_CTXT { + let index = index(SyntaxContext::from_u32(u32::MAX)); // any value, should never be read + PartiallyInterned { index, _marker1: BASE_LEN_INTERNED_MARKER, ctxt: ctxt32 as u16 } + .to_span() + } else { + let index = index(ctxt); + Interned { index, _marker1: BASE_LEN_INTERNED_MARKER, _marker2: CTXT_INTERNED_MARKER } + .to_span() + } + } + + fn fmt(&mut self) -> Fmt<'_> { + if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER { + if self.len_with_tag_or_marker & PARENT_TAG == 0 { + Fmt::InlineCtxt(unsafe { transmute(self) }) + } else { + Fmt::InlineParent(unsafe { transmute(self) }) + } + } else if self.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER { + Fmt::PartiallyInterned(unsafe { transmute(self) }) + } else { + Fmt::Interned(unsafe { transmute(self) }) } } @@ -162,39 +267,8 @@ impl Span { /// Internal function to translate between an encoded span and the expanded representation. /// This function must not be used outside the incremental engine. #[inline] - pub fn data_untracked(self) -> SpanData { - if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER { - if self.len_with_tag_or_marker & PARENT_TAG == 0 { - // Inline-context format. - let len = self.len_with_tag_or_marker as u32; - debug_assert!(len <= MAX_LEN); - SpanData { - lo: BytePos(self.lo_or_index), - hi: BytePos(self.lo_or_index.debug_strict_add(len)), - ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32), - parent: None, - } - } else { - // Inline-parent format. - let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32; - debug_assert!(len <= MAX_LEN); - let parent = LocalDefId { - local_def_index: DefIndex::from_u32(self.ctxt_or_parent_or_marker as u32), - }; - SpanData { - lo: BytePos(self.lo_or_index), - hi: BytePos(self.lo_or_index.debug_strict_add(len)), - ctxt: SyntaxContext::root(), - parent: Some(parent), - } - } - } else { - // Fully-interned or partially-interned format. In either case, - // the interned value contains all the data, so we don't need to - // distinguish them. - let index = self.lo_or_index; - with_span_interner(|interner| interner.spans[index as usize]) - } + pub fn data_untracked(mut self) -> SpanData { + self.fmt().data() } /// Returns `true` if this is a dummy span with any hygienic context. @@ -214,26 +288,26 @@ impl Span { } } + // For optimization we are interested in cases in which the context is inline and the context + // update doesn't change format. All non-inline or format changing scenarios require accessing + // interner and can fall back to `Span::new`. + pub fn update_ctxt(&mut self, update: impl FnOnce(SyntaxContext) -> SyntaxContext) { + // FIXME(#125017): Update ctxt inline without touching interner when possible. + let data = self.data(); + *self = data.with_ctxt(update(data.ctxt)); + } + // Returns either syntactic context, if it can be retrieved without taking the interner lock, // or an index into the interner if it cannot. - fn inline_ctxt(self) -> Result { - Ok(if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER { - if self.len_with_tag_or_marker & PARENT_TAG == 0 { - // Inline-context format. - SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32) - } else { - // Inline-parent format. We know that the SyntaxContext is root. - SyntaxContext::root() + fn inline_ctxt(mut self) -> Result { + match self.fmt() { + Fmt::InlineCtxt(InlineCtxt { ctxt, .. }) + | Fmt::PartiallyInterned(PartiallyInterned { ctxt, .. }) => { + Ok(SyntaxContext::from_u32(*ctxt as u32)) } - } else if self.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER { - // Partially-interned format. This path avoids looking up the - // interned value, and is the whole point of the - // partially-interned format. - SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32) - } else { - // Fully-interned format. - return Err(self.lo_or_index as usize); - }) + Fmt::InlineParent(_) => Ok(SyntaxContext::root()), + Fmt::Interned(span) => Err(span.index as usize), + } } /// This function is used as a fast path when decoding the full `SpanData` is not necessary. From 00c399ecb93fa5c632cedd3d86901f11ef5fc3f1 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 2 Jun 2024 01:37:11 +0300 Subject: [PATCH 2/2] rustc_span: Inline new hot functions --- compiler/rustc_span/src/span_encoding.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index be4a72896d761..f467a7bcc18fe 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -124,6 +124,7 @@ enum Fmt<'a> { } impl InlineCtxt { + #[inline] fn data(self) -> SpanData { let len = self.len as u32; debug_assert!(len <= MAX_LEN); @@ -134,12 +135,14 @@ impl InlineCtxt { parent: None, } } + #[inline] fn to_span(self) -> Span { unsafe { transmute(self) } } } impl InlineParent { + #[inline] fn data(self) -> SpanData { let len = (self.len_with_tag & !PARENT_TAG) as u32; debug_assert!(len <= MAX_LEN); @@ -150,33 +153,39 @@ impl InlineParent { parent: Some(LocalDefId { local_def_index: DefIndex::from_u32(self.parent as u32) }), } } + #[inline] fn to_span(self) -> Span { unsafe { transmute(self) } } } impl PartiallyInterned { + #[inline] fn data(self) -> SpanData { SpanData { ctxt: SyntaxContext::from_u32(self.ctxt as u32), ..with_span_interner(|interner| interner.spans[self.index as usize]) } } + #[inline] fn to_span(self) -> Span { unsafe { transmute(self) } } } impl Interned { + #[inline] fn data(self) -> SpanData { with_span_interner(|interner| interner.spans[self.index as usize]) } + #[inline] fn to_span(self) -> Span { unsafe { transmute(self) } } } impl Fmt<'_> { + #[inline] fn data(self) -> SpanData { match self { Fmt::InlineCtxt(span) => span.data(), @@ -241,6 +250,7 @@ impl Span { } } + #[inline] fn fmt(&mut self) -> Fmt<'_> { if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER { if self.len_with_tag_or_marker & PARENT_TAG == 0 { @@ -291,6 +301,7 @@ impl Span { // For optimization we are interested in cases in which the context is inline and the context // update doesn't change format. All non-inline or format changing scenarios require accessing // interner and can fall back to `Span::new`. + #[inline] pub fn update_ctxt(&mut self, update: impl FnOnce(SyntaxContext) -> SyntaxContext) { // FIXME(#125017): Update ctxt inline without touching interner when possible. let data = self.data(); @@ -299,6 +310,7 @@ impl Span { // Returns either syntactic context, if it can be retrieved without taking the interner lock, // or an index into the interner if it cannot. + #[inline] fn inline_ctxt(mut self) -> Result { match self.fmt() { Fmt::InlineCtxt(InlineCtxt { ctxt, .. })