Skip to content

Commit

Permalink
Rollup merge of #117042 - Zalathar:file-table, r=cjgillot
Browse files Browse the repository at this point in the history
coverage: Emit the filenames section before encoding per-function mappings

When embedding coverage information in LLVM IR (and ultimately in the resulting binary), there are two main things that each CGU needs to emit:

- A single `__llvm_covmap` record containing a coverage header, which mostly consists of a list of filenames used by the CGU's coverage mappings.
- Several `__llvm_covfun` records, one for each instrumented function, each of which contains the hash of the list of filenames in the header.

There is a kind of loose cyclic dependency between the two: we need the hash of the file table before we can emit the covfun records, but we need to traverse all of the instrumented functions in order to build the file table.

The existing code works by processing the individual functions first. It lazily adds filenames to the file table, and stores the mostly-complete function records in a temporary list. After this it hashes the file table, emits the header (containing the file table), and then uses the hash to emit all of the function records.

This PR reverses that order: first we traverse all of the functions (without trying to prepare their function records) to build a *complete* file table, and then emit it immediately. At this point we have the file table hash, so we can then proceed to build and emit all of the function records, without needing to store them in an intermediate list.

---

Along the way, this PR makes some necessary changes that are also worthwhile in their own right:
- We split `FunctionCoverage` into distinct collector/finished phases, which neatly avoids some borrow-checker hassles when extracting a function's final expression/mapping data.
- We avoid having to re-sort a function's mappings when preparing the list of filenames that it uses.
  • Loading branch information
matthiaskrgr committed Oct 23, 2023
2 parents e86f9b6 + 6af9fef commit dde77f7
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 106 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3601,6 +3601,7 @@ version = "0.0.0"
dependencies = [
"bitflags 1.3.2",
"cstr",
"itertools",
"libc",
"measureme",
"object",
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_llvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ test = false
[dependencies]
bitflags = "1.0"
cstr = "0.2"
itertools = "0.10.5"
libc = "0.2"
measureme = "10.0.0"
object = { version = "0.32.0", default-features = false, features = [
Expand Down
76 changes: 40 additions & 36 deletions compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};

use rustc_data_structures::captures::Captures;
use rustc_data_structures::fx::FxIndexSet;
use rustc_index::bit_set::BitSet;
use rustc_middle::mir::coverage::{
CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op,
};
use rustc_middle::ty::Instance;
use rustc_span::Symbol;

/// Holds all of the coverage mapping data associated with a function instance,
/// collected during traversal of `Coverage` statements in the function's MIR.
#[derive(Debug)]
pub struct FunctionCoverage<'tcx> {
pub struct FunctionCoverageCollector<'tcx> {
/// Coverage info that was attached to this function by the instrumentor.
function_coverage_info: &'tcx FunctionCoverageInfo,
is_used: bool,
Expand All @@ -26,7 +28,7 @@ pub struct FunctionCoverage<'tcx> {
expressions_seen: BitSet<ExpressionId>,
}

impl<'tcx> FunctionCoverage<'tcx> {
impl<'tcx> FunctionCoverageCollector<'tcx> {
/// Creates a new set of coverage data for a used (called) function.
pub fn new(
instance: Instance<'tcx>,
Expand Down Expand Up @@ -76,11 +78,6 @@ impl<'tcx> FunctionCoverage<'tcx> {
}
}

/// Returns true for a used (called) function, and false for an unused function.
pub fn is_used(&self) -> bool {
self.is_used
}

/// Marks a counter ID as having been seen in a counter-increment statement.
#[instrument(level = "debug", skip(self))]
pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) {
Expand Down Expand Up @@ -165,72 +162,79 @@ impl<'tcx> FunctionCoverage<'tcx> {
ZeroExpressions(zero_expressions)
}

pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> {
let zero_expressions = self.identify_zero_expressions();
let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = self;

FunctionCoverage { function_coverage_info, is_used, counters_seen, zero_expressions }
}
}

pub(crate) struct FunctionCoverage<'tcx> {
function_coverage_info: &'tcx FunctionCoverageInfo,
is_used: bool,

counters_seen: BitSet<CounterId>,
zero_expressions: ZeroExpressions,
}

impl<'tcx> FunctionCoverage<'tcx> {
/// Returns true for a used (called) function, and false for an unused function.
pub(crate) fn is_used(&self) -> bool {
self.is_used
}

/// Return the source hash, generated from the HIR node structure, and used to indicate whether
/// or not the source code structure changed between different compilations.
pub fn source_hash(&self) -> u64 {
if self.is_used { self.function_coverage_info.function_source_hash } else { 0 }
}

/// Generate an array of CounterExpressions, and an iterator over all `Counter`s and their
/// associated `Regions` (from which the LLVM-specific `CoverageMapGenerator` will create
/// `CounterMappingRegion`s.
pub fn get_expressions_and_counter_regions(
&self,
) -> (Vec<CounterExpression>, impl Iterator<Item = (Counter, &CodeRegion)>) {
let zero_expressions = self.identify_zero_expressions();

let counter_expressions = self.counter_expressions(&zero_expressions);
// Expression IDs are indices into `self.expressions`, and on the LLVM
// side they will be treated as indices into `counter_expressions`, so
// the two vectors should correspond 1:1.
assert_eq!(self.function_coverage_info.expressions.len(), counter_expressions.len());

let counter_regions = self.counter_regions(zero_expressions);

(counter_expressions, counter_regions)
/// Returns an iterator over all filenames used by this function's mappings.
pub(crate) fn all_file_names(&self) -> impl Iterator<Item = Symbol> + Captures<'_> {
self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name)
}

/// Convert this function's coverage expression data into a form that can be
/// passed through FFI to LLVM.
fn counter_expressions(&self, zero_expressions: &ZeroExpressions) -> Vec<CounterExpression> {
pub(crate) fn counter_expressions(
&self,
) -> impl Iterator<Item = CounterExpression> + ExactSizeIterator + Captures<'_> {
// We know that LLVM will optimize out any unused expressions before
// producing the final coverage map, so there's no need to do the same
// thing on the Rust side unless we're confident we can do much better.
// (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)

let counter_from_operand = |operand: CovTerm| match operand {
CovTerm::Expression(id) if zero_expressions.contains(id) => Counter::ZERO,
CovTerm::Expression(id) if self.zero_expressions.contains(id) => Counter::ZERO,
_ => Counter::from_term(operand),
};

self.function_coverage_info
.expressions
.iter()
.map(|&Expression { lhs, op, rhs }| CounterExpression {
self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| {
CounterExpression {
lhs: counter_from_operand(lhs),
kind: match op {
Op::Add => ExprKind::Add,
Op::Subtract => ExprKind::Subtract,
},
rhs: counter_from_operand(rhs),
})
.collect::<Vec<_>>()
}
})
}

/// Converts this function's coverage mappings into an intermediate form
/// that will be used by `mapgen` when preparing for FFI.
fn counter_regions(
pub(crate) fn counter_regions(
&self,
zero_expressions: ZeroExpressions,
) -> impl Iterator<Item = (Counter, &CodeRegion)> {
) -> impl Iterator<Item = (Counter, &CodeRegion)> + ExactSizeIterator {
// Historically, mappings were stored directly in counter/expression
// statements in MIR, and MIR optimizations would sometimes remove them.
// That's mostly no longer true, so now we detect cases where that would
// have happened, and zero out the corresponding mappings here instead.
let counter_for_term = move |term: CovTerm| {
let force_to_zero = match term {
CovTerm::Counter(id) => !self.counters_seen.contains(id),
CovTerm::Expression(id) => zero_expressions.contains(id),
CovTerm::Expression(id) => self.zero_expressions.contains(id),
CovTerm::Zero => false,
};
if force_to_zero { Counter::ZERO } else { Counter::from_term(term) }
Expand Down
Loading

0 comments on commit dde77f7

Please sign in to comment.