Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shrink span encoding further #119367

Merged
merged 1 commit into from
Dec 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
40 changes: 19 additions & 21 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,39 +508,37 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnId {
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span {
let start = decoder.position();
let mode = SpanEncodingMode::decode(decoder);
Mark-Simulacrum marked this conversation as resolved.
Show resolved Hide resolved
let data = match mode {
SpanEncodingMode::Direct => SpanData::decode(decoder),
SpanEncodingMode::RelativeOffset(offset) => {
decoder.with_position(start - offset, |decoder| {
let mode = SpanEncodingMode::decode(decoder);
debug_assert!(matches!(mode, SpanEncodingMode::Direct));
SpanData::decode(decoder)
})
}
SpanEncodingMode::AbsoluteOffset(addr) => decoder.with_position(addr, |decoder| {
let mode = SpanEncodingMode::decode(decoder);
debug_assert!(matches!(mode, SpanEncodingMode::Direct));
SpanData::decode(decoder)
}),
let tag = SpanTag(decoder.peek_byte());
let data = if tag.kind() == SpanKind::Indirect {
// Skip past the tag we just peek'd.
decoder.read_u8();
let offset_or_position = decoder.read_usize();
let position = if tag.is_relative_offset() {
start - offset_or_position
} else {
offset_or_position
};
decoder.with_position(position, SpanData::decode)
} else {
SpanData::decode(decoder)
};
Span::new(data.lo, data.hi, data.ctxt, data.parent)
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SpanData {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> SpanData {
let ctxt = SyntaxContext::decode(decoder);
let tag = u8::decode(decoder);
let tag = SpanTag::decode(decoder);
let ctxt = tag.context().unwrap_or_else(|| SyntaxContext::decode(decoder));

if tag == TAG_PARTIAL_SPAN {
if tag.kind() == SpanKind::Partial {
return DUMMY_SP.with_ctxt(ctxt).data();
}

debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
debug_assert!(tag.kind() == SpanKind::Local || tag.kind() == SpanKind::Foreign);

let lo = BytePos::decode(decoder);
let len = BytePos::decode(decoder);
let len = tag.length().unwrap_or_else(|| BytePos::decode(decoder));
let hi = lo + len;

let Some(sess) = decoder.sess else {
Expand Down Expand Up @@ -581,7 +579,7 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SpanData {
// treat the 'local' and 'foreign' cases almost identically during deserialization:
// we can call `imported_source_file` for the proper crate, and binary search
// through the returned slice using our span.
let source_file = if tag == TAG_VALID_SPAN_LOCAL {
let source_file = if tag.kind() == SpanKind::Local {
decoder.cdata().imported_source_file(metadata_index, sess)
} else {
// When we encode a proc-macro crate, all `Span`s should be encoded
Expand Down
44 changes: 29 additions & 15 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,17 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
// previously saved offset must be smaller than the current position.
let offset = s.opaque.position() - last_location;
if offset < last_location {
SpanEncodingMode::RelativeOffset(offset).encode(s)
SpanTag::indirect(true).encode(s);
offset.encode(s);
} else {
SpanEncodingMode::AbsoluteOffset(last_location).encode(s)
SpanTag::indirect(false).encode(s);
last_location.encode(s);
}
}
Entry::Vacant(v) => {
let position = s.opaque.position();
v.insert(position);
SpanEncodingMode::Direct.encode(s);
// Data is encoded with a SpanTag prefix (see below).
self.data().encode(s);
}
}
Expand Down Expand Up @@ -225,14 +227,15 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
// IMPORTANT: If this is ever changed, be sure to update
// `rustc_span::hygiene::raw_encode_expn_id` to handle
// encoding `ExpnData` for proc-macro crates.
if s.is_proc_macro {
SyntaxContext::root().encode(s);
} else {
self.ctxt.encode(s);
}
let ctxt = if s.is_proc_macro { SyntaxContext::root() } else { self.ctxt };

if self.is_dummy() {
return TAG_PARTIAL_SPAN.encode(s);
let tag = SpanTag::new(SpanKind::Partial, ctxt, 0);
tag.encode(s);
if tag.context().is_none() {
ctxt.encode(s);
}
return;
}

// The Span infrastructure should make sure that this invariant holds:
Expand All @@ -250,7 +253,12 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
if !source_file.contains(self.hi) {
// Unfortunately, macro expansion still sometimes generates Spans
// that malformed in this way.
return TAG_PARTIAL_SPAN.encode(s);
let tag = SpanTag::new(SpanKind::Partial, ctxt, 0);
tag.encode(s);
if tag.context().is_none() {
ctxt.encode(s);
}
return;
}

// There are two possible cases here:
Expand All @@ -269,7 +277,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
// if we're a proc-macro crate.
// This allows us to avoid loading the dependencies of proc-macro crates: all of
// the information we need to decode `Span`s is stored in the proc-macro crate.
let (tag, metadata_index) = if source_file.is_imported() && !s.is_proc_macro {
let (kind, metadata_index) = if source_file.is_imported() && !s.is_proc_macro {
// To simplify deserialization, we 'rebase' this span onto the crate it originally came
// from (the crate that 'owns' the file it references. These rebased 'lo' and 'hi'
// values are relative to the source map information for the 'foreign' crate whose
Expand All @@ -287,7 +295,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
}
};

(TAG_VALID_SPAN_FOREIGN, metadata_index)
(SpanKind::Foreign, metadata_index)
} else {
// Record the fact that we need to encode the data for this `SourceFile`
let source_files =
Expand All @@ -296,7 +304,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
let metadata_index: u32 =
metadata_index.try_into().expect("cannot export more than U32_MAX files");

(TAG_VALID_SPAN_LOCAL, metadata_index)
(SpanKind::Local, metadata_index)
};

// Encode the start position relative to the file start, so we profit more from the
Expand All @@ -307,14 +315,20 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
// from the variable-length integer encoding that we use.
let len = self.hi - self.lo;

let tag = SpanTag::new(kind, ctxt, len.0 as usize);
tag.encode(s);
if tag.context().is_none() {
ctxt.encode(s);
}
lo.encode(s);
len.encode(s);
if tag.length().is_none() {
len.encode(s);
}

// Encode the index of the `SourceFile` for the span, in order to make decoding faster.
metadata_index.encode(s);

if tag == TAG_VALID_SPAN_FOREIGN {
if kind == SpanKind::Foreign {
// This needs to be two lines to avoid holding the `s.source_file_cache`
// while calling `cnum.encode(s)`
let cnum = s.source_file_cache.0.cnum;
Expand Down
93 changes: 82 additions & 11 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,6 @@ const METADATA_VERSION: u8 = 9;
/// unsigned integer, and further followed by the rustc version string.
pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION];

#[derive(Encodable, Decodable)]
enum SpanEncodingMode {
RelativeOffset(usize),
AbsoluteOffset(usize),
Direct,
}

/// A value of type T referred to by its absolute position
/// in the metadata, and which can be decoded lazily.
///
Expand Down Expand Up @@ -488,10 +481,88 @@ bitflags::bitflags! {
}
}

// Tags used for encoding Spans:
const TAG_VALID_SPAN_LOCAL: u8 = 0;
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
const TAG_PARTIAL_SPAN: u8 = 2;
/// A span tag byte encodes a bunch of data, so that we can cut out a few extra bytes from span
/// encodings (which are very common, for example, libcore has ~650,000 unique spans and over 1.1
/// million references to prior-written spans).
///
/// The byte format is split into several parts:
///
/// [ a a a a a c d d ]
///
/// `a` bits represent the span length. We have 5 bits, so we can store lengths up to 30 inline, with
/// an all-1s pattern representing that the length is stored separately.
///
/// `c` represents whether the span context is zero (and then it is not stored as a separate varint)
/// for direct span encodings, and whether the offset is absolute or relative otherwise (zero for
/// absolute).
///
/// d bits represent the kind of span we are storing (local, foreign, partial, indirect).
#[derive(Encodable, Decodable, Copy, Clone)]
struct SpanTag(u8);

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum SpanKind {
Local = 0b00,
Foreign = 0b01,
Partial = 0b10,
// Indicates the actual span contents are elsewhere.
// If this is the kind, then the span context bit represents whether it is a relative or
// absolute offset.
Indirect = 0b11,
}

impl SpanTag {
fn new(kind: SpanKind, context: rustc_span::SyntaxContext, length: usize) -> SpanTag {
let mut data = 0u8;
data |= kind as u8;
if context.is_root() {
data |= 0b100;
}
let all_1s_len = (0xffu8 << 3) >> 3;
// strictly less than - all 1s pattern is a sentinel for storage being out of band.
if length < all_1s_len as usize {
data |= (length as u8) << 3;
} else {
data |= all_1s_len << 3;
}

SpanTag(data)
}

fn indirect(relative: bool) -> SpanTag {
let mut tag = SpanTag(SpanKind::Indirect as u8);
if relative {
tag.0 |= 0b100;
}
tag
}

fn kind(self) -> SpanKind {
let masked = self.0 & 0b11;
match masked {
0b00 => SpanKind::Local,
0b01 => SpanKind::Foreign,
0b10 => SpanKind::Partial,
0b11 => SpanKind::Indirect,
_ => unreachable!(),
}
}

fn is_relative_offset(self) -> bool {
debug_assert_eq!(self.kind(), SpanKind::Indirect);
self.0 & 0b100 != 0
}

fn context(self) -> Option<rustc_span::SyntaxContext> {
if self.0 & 0b100 != 0 { Some(rustc_span::SyntaxContext::root()) } else { None }
}

fn length(self) -> Option<rustc_span::BytePos> {
let all_1s_len = (0xffu8 << 3) >> 3;
let len = self.0 >> 3;
if len != all_1s_len { Some(rustc_span::BytePos(u32::from(len))) } else { None }
}
}

// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
Expand Down