Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

coverage: llvm-cov expects column numbers to be bytes, not code points #119033

Merged
merged 3 commits into from Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
90 changes: 70 additions & 20 deletions compiler/rustc_mir_transform/src/coverage/mod.rs
Expand Up @@ -23,7 +23,7 @@ use rustc_middle::mir::{
use rustc_middle::ty::TyCtxt;
use rustc_span::def_id::LocalDefId;
use rustc_span::source_map::SourceMap;
use rustc_span::{Span, Symbol};
use rustc_span::{BytePos, Pos, RelativeBytePos, Span, Symbol};

/// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected
/// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen
Expand Down Expand Up @@ -107,6 +107,12 @@ impl<'a, 'tcx> Instrumentor<'a, 'tcx> {
);

let mappings = self.create_mappings(&coverage_spans, &coverage_counters);
if mappings.is_empty() {
// No spans could be converted into valid mappings, so skip this function.
debug!("no spans could be converted into valid mappings; skipping");
return;
}

self.inject_coverage_statements(bcb_has_coverage_spans, &coverage_counters);

self.mir_body.function_coverage_info = Some(Box::new(FunctionCoverageInfo {
Expand Down Expand Up @@ -148,9 +154,9 @@ impl<'a, 'tcx> Instrumentor<'a, 'tcx> {
// Flatten the spans into individual term/span pairs.
.flat_map(|(term, spans)| spans.iter().map(move |&span| (term, span)))
// Convert each span to a code region, and create the final mapping.
.map(|(term, span)| {
let code_region = make_code_region(source_map, file_name, span, body_span);
Mapping { term, code_region }
.filter_map(|(term, span)| {
let code_region = make_code_region(source_map, file_name, span, body_span)?;
Some(Mapping { term, code_region })
})
.collect::<Vec<_>>()
}
Expand Down Expand Up @@ -252,41 +258,85 @@ fn inject_statement(mir_body: &mut mir::Body<'_>, counter_kind: CoverageKind, bb
data.statements.insert(0, statement);
}

/// Convert the Span into its file name, start line and column, and end line and column
/// Convert the Span into its file name, start line and column, and end line and column.
///
/// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
/// the compiler, these column numbers are denoted in **bytes**, because that's what
/// LLVM's `llvm-cov` tool expects to see in coverage maps.
///
/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
/// but it's hard to rule out entirely (especially in the presence of complex macros
/// or other expansions), and if it does happen then skipping a span or function is
/// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
fn make_code_region(
source_map: &SourceMap,
file_name: Symbol,
span: Span,
body_span: Span,
) -> CodeRegion {
) -> Option<CodeRegion> {
debug!(
"Called make_code_region(file_name={}, span={}, body_span={})",
file_name,
source_map.span_to_diagnostic_string(span),
source_map.span_to_diagnostic_string(body_span)
);

let (file, mut start_line, mut start_col, mut end_line, mut end_col) =
source_map.span_to_location_info(span);
if span.hi() == span.lo() {
// Extend an empty span by one character so the region will be counted.
if span.hi() == body_span.hi() {
start_col = start_col.saturating_sub(1);
} else {
end_col = start_col + 1;
}
let lo = span.lo();
let hi = span.hi();

let file = source_map.lookup_source_file(lo);
if !file.contains(hi) {
debug!(?span, ?file, ?lo, ?hi, "span crosses multiple files; skipping");
return None;
}

// Column numbers need to be in bytes, so we can't use the more convenient
// `SourceMap` methods for looking up file coordinates.
let rpos_and_line_and_byte_column = |pos: BytePos| -> Option<(RelativeBytePos, usize, usize)> {
let rpos = file.relative_position(pos);
let line_index = file.lookup_line(rpos)?;
let line_start = file.lines()[line_index];
// Line numbers and column numbers are 1-based, so add 1 to each.
Some((rpos, line_index + 1, (rpos - line_start).to_usize() + 1))
};
if let Some(file) = file {
start_line = source_map.doctest_offset_line(&file.name, start_line);
end_line = source_map.doctest_offset_line(&file.name, end_line);

let (lo_rpos, mut start_line, mut start_col) = rpos_and_line_and_byte_column(lo)?;
let (hi_rpos, mut end_line, mut end_col) = rpos_and_line_and_byte_column(hi)?;

// If the span is empty, try to expand it horizontally by one character's
// worth of bytes, so that it is more visible in `llvm-cov` reports.
// We do this after resolving line/column numbers, so that empty spans at the
// end of a line get an extra column instead of wrapping to the next line.
if span.is_empty()
&& body_span.contains(span)
&& let Some(src) = &file.src
{
// Prefer to expand the end position, if it won't go outside the body span.
if hi < body_span.hi() {
let hi_rpos = hi_rpos.to_usize();
let nudge_bytes = src.ceil_char_boundary(hi_rpos + 1) - hi_rpos;
end_col += nudge_bytes;
} else if lo > body_span.lo() {
let lo_rpos = lo_rpos.to_usize();
let nudge_bytes = lo_rpos - src.floor_char_boundary(lo_rpos - 1);
// Subtract the nudge, but don't go below column 1.
start_col = start_col.saturating_sub(nudge_bytes).max(1);
}
// If neither nudge could be applied, stick with the empty span coordinates.
}
CodeRegion {

// Apply an offset so that code in doctests has correct line numbers.
// FIXME(#79417): Currently we have no way to offset doctest _columns_.
start_line = source_map.doctest_offset_line(&file.name, start_line);
end_line = source_map.doctest_offset_line(&file.name, end_line);

Some(CodeRegion {
file_name,
start_line: start_line as u32,
start_col: start_col as u32,
end_line: end_line as u32,
end_col: end_col as u32,
}
})
}

fn is_eligible_for_coverage(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_mir_transform/src/lib.rs
Expand Up @@ -9,6 +9,7 @@
#![feature(min_specialization)]
#![feature(never_type)]
#![feature(option_get_or_insert_default)]
#![feature(round_char_boundary)]
#![feature(trusted_step)]
#![feature(try_blocks)]
#![feature(yeet_expr)]
Expand Down
53 changes: 53 additions & 0 deletions tests/coverage/unicode.cov-map
@@ -0,0 +1,53 @@
Function name: unicode::main
Raw bytes (67): 0x[01, 01, 09, 01, 05, 03, 05, 1e, 0d, 22, 09, 03, 05, 11, 1b, 1e, 0d, 22, 09, 03, 05, 09, 01, 0e, 01, 00, 0b, 05, 01, 09, 00, 0c, 03, 00, 10, 00, 1b, 05, 00, 1c, 00, 28, 22, 02, 08, 00, 25, 09, 00, 29, 00, 46, 11, 00, 47, 02, 06, 1b, 02, 06, 00, 07, 17, 02, 05, 01, 02]
Number of files: 1
- file 0 => global file 1
Number of expressions: 9
- expression 0 operands: lhs = Counter(0), rhs = Counter(1)
- expression 1 operands: lhs = Expression(0, Add), rhs = Counter(1)
- expression 2 operands: lhs = Expression(7, Sub), rhs = Counter(3)
- expression 3 operands: lhs = Expression(8, Sub), rhs = Counter(2)
- expression 4 operands: lhs = Expression(0, Add), rhs = Counter(1)
- expression 5 operands: lhs = Counter(4), rhs = Expression(6, Add)
- expression 6 operands: lhs = Expression(7, Sub), rhs = Counter(3)
- expression 7 operands: lhs = Expression(8, Sub), rhs = Counter(2)
- expression 8 operands: lhs = Expression(0, Add), rhs = Counter(1)
Number of file 0 mappings: 9
- Code(Counter(0)) at (prev + 14, 1) to (start + 0, 11)
- Code(Counter(1)) at (prev + 1, 9) to (start + 0, 12)
- Code(Expression(0, Add)) at (prev + 0, 16) to (start + 0, 27)
= (c0 + c1)
- Code(Counter(1)) at (prev + 0, 28) to (start + 0, 40)
- Code(Expression(8, Sub)) at (prev + 2, 8) to (start + 0, 37)
= ((c0 + c1) - c1)
- Code(Counter(2)) at (prev + 0, 41) to (start + 0, 70)
- Code(Counter(4)) at (prev + 0, 71) to (start + 2, 6)
- Code(Expression(6, Add)) at (prev + 2, 6) to (start + 0, 7)
= ((((c0 + c1) - c1) - c2) + c3)
- Code(Expression(5, Add)) at (prev + 2, 5) to (start + 1, 2)
= (c4 + ((((c0 + c1) - c1) - c2) + c3))

Function name: unicode::サビ
Raw bytes (9): 0x[01, 01, 00, 01, 01, 1e, 14, 00, 18]
Number of files: 1
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 30, 20) to (start + 0, 24)

Function name: unicode::他 (unused)
Raw bytes (9): 0x[01, 01, 00, 01, 00, 1e, 19, 00, 25]
Number of files: 1
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Zero) at (prev + 30, 25) to (start + 0, 37)

Function name: unicode::申し訳ございません
Raw bytes (9): 0x[01, 01, 00, 01, 01, 18, 01, 02, 02]
Number of files: 1
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 24, 1) to (start + 2, 2)

40 changes: 40 additions & 0 deletions tests/coverage/unicode.coverage
@@ -0,0 +1,40 @@
LL| |// edition: 2021
LL| |// ignore-windows - we can't force `llvm-cov` to use ANSI escapes on Windows
LL| |// llvm-cov-flags: --use-color
LL| |
LL| |// Check that column numbers are denoted in bytes, so that they don't cause
LL| |// `llvm-cov` to fail or emit malformed output.
LL| |//
LL| |// Note that when `llvm-cov` prints ^ arrows on a subsequent line, it simply
LL| |// inserts one space character for each "column", with no understanding of
LL| |// Unicode or character widths. So those arrows will tend to be misaligned
LL| |// for non-ASCII source code, regardless of whether column numbers are code
LL| |// points or bytes.
LL| |
LL| 1|fn main() {
LL| 33| for _İ in 'А'..='Я' { /* Я */ }
^32 ^32
LL| |
LL| 1| if 申し訳ございません() && 申し訳ございません() {
^0
LL| 0| println!("true");
LL| 1| }
LL| |
LL| 1| サビ();
LL| 1|}
LL| |
LL| 1|fn 申し訳ございません() -> bool {
LL| 1| std::hint::black_box(false)
LL| 1|}
LL| |
LL| |macro_rules! macro_that_defines_a_function {
LL| | (fn $名:ident () $体:tt) => {
LL| 1| fn $名 () $体 fn 他 () {}
^0
LL| | }
LL| |}
LL| |
LL| |macro_that_defines_a_function! {
LL| | fn サビ() {}
LL| |}

36 changes: 36 additions & 0 deletions tests/coverage/unicode.rs
@@ -0,0 +1,36 @@
// edition: 2021
// ignore-windows - we can't force `llvm-cov` to use ANSI escapes on Windows
// llvm-cov-flags: --use-color

// Check that column numbers are denoted in bytes, so that they don't cause
// `llvm-cov` to fail or emit malformed output.
//
// Note that when `llvm-cov` prints ^ arrows on a subsequent line, it simply
// inserts one space character for each "column", with no understanding of
// Unicode or character widths. So those arrows will tend to be misaligned
// for non-ASCII source code, regardless of whether column numbers are code
// points or bytes.

fn main() {
for _İ in 'А'..='Я' { /* Я */ }

if 申し訳ございません() && 申し訳ございません() {
println!("true");
}

サビ();
}

fn 申し訳ございません() -> bool {
std::hint::black_box(false)
}

macro_rules! macro_that_defines_a_function {
(fn $名:ident () $体:tt) => {
fn $名 () $体 fn 他 () {}
}
}

macro_that_defines_a_function! {
fn サビ() {}
}