Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve invalid UTF-8 lint by finding the expression initializer #115257

Merged
merged 2 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 48 additions & 0 deletions compiler/rustc_lint/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,54 @@ impl<'tcx> LateContext<'tcx> {
tcx.try_normalize_erasing_regions(self.param_env, proj).ok()
})
}

/// If the given expression is a local binding, find the initializer expression.
/// If that initializer expression is another local or **outside** (`const`/`static`)
/// binding, find its initializer again.
///
/// This process repeats as long as possible (but usually no more than once).
/// Type-check adjustments are not taken in account in this function.
///
/// Examples:
/// ```
/// const ABC: i32 = 1;
/// // ^ output
/// let def = ABC;
/// dbg!(def);
/// // ^^^ input
///
/// // or...
/// let abc = 1;
/// let def = abc + 2;
/// // ^^^^^^^ output
/// dbg!(def);
/// // ^^^ input
/// ```
pub fn expr_or_init<'a>(&self, mut expr: &'a hir::Expr<'tcx>) -> &'a hir::Expr<'tcx> {
expr = expr.peel_blocks();

while let hir::ExprKind::Path(ref qpath) = expr.kind
&& let Some(parent_node) = match self.qpath_res(qpath, expr.hir_id) {
Res::Local(hir_id) => self.tcx.hir().find_parent(hir_id),
Res::Def(_, def_id) => self.tcx.hir().get_if_local(def_id),
_ => None,
}
&& let Some(init) = match parent_node {
hir::Node::Expr(expr) => Some(expr),
hir::Node::Local(hir::Local { init, .. }) => *init,
hir::Node::Item(item) => match item.kind {
hir::ItemKind::Const(.., body_id) | hir::ItemKind::Static(.., body_id) => {
Some(self.tcx.hir().body(body_id).value)
}
_ => None
}
_ => None
}
{
expr = init.peel_blocks();
}
expr
}
}

impl<'tcx> abi::HasDataLayout for LateContext<'tcx> {
Expand Down
25 changes: 14 additions & 11 deletions compiler/rustc_lint/src/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::str::Utf8Error;

use rustc_ast::{BorrowKind, LitKind};
use rustc_ast::LitKind;
use rustc_hir::{Expr, ExprKind};
use rustc_span::source_map::Spanned;
use rustc_span::sym;
Expand All @@ -11,7 +11,7 @@ use crate::{LateContext, LateLintPass, LintContext};
declare_lint! {
/// The `invalid_from_utf8_unchecked` lint checks for calls to
/// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
/// with an invalid UTF-8 literal.
/// with a known invalid UTF-8 value.
///
/// ### Example
///
Expand All @@ -36,7 +36,7 @@ declare_lint! {
declare_lint! {
/// The `invalid_from_utf8` lint checks for calls to
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
/// with an invalid UTF-8 literal.
/// with a known invalid UTF-8 value.
///
/// ### Example
///
Expand Down Expand Up @@ -67,8 +67,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
{
let lint = |utf8_error: Utf8Error| {
let label = arg.span;
let lint = |label, utf8_error: Utf8Error| {
let method = diag_item.as_str().strip_prefix("str_").unwrap();
let method = format!("std::str::{method}");
let valid_up_to = utf8_error.valid_up_to();
Expand All @@ -78,22 +77,26 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
expr.span,
if is_unchecked_variant {
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
} else {
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
}
)
};

match &arg.kind {
let mut init = cx.expr_or_init(arg);
while let ExprKind::AddrOf(.., inner) = init.kind {
init = cx.expr_or_init(inner);
}
match init.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => {
if let LitKind::ByteStr(bytes, _) = &lit
&& let Err(utf8_error) = std::str::from_utf8(bytes)
{
lint(utf8_error);
lint(init.span, utf8_error);
}
},
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
ExprKind::Array(args) => {
let elements = args.iter().map(|e|{
match &e.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
Expand All @@ -108,7 +111,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
if let Some(elements) = elements
&& let Err(utf8_error) = std::str::from_utf8(&elements)
{
lint(utf8_error);
lint(init.span, utf8_error);
}
}
_ => {}
Expand Down
27 changes: 27 additions & 0 deletions tests/ui/lint/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// check-pass

#![feature(inline_const)]
#![feature(concat_bytes)]

#![warn(invalid_from_utf8_unchecked)]
#![warn(invalid_from_utf8)]

Expand Down Expand Up @@ -90,4 +92,29 @@ pub fn from_utf8() {
}
}

pub fn from_utf8_with_indirections() {
let mut a = [99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8_mut(&mut a);
//~^ WARN calls to `std::str::from_utf8_mut`
let mut b = &mut a;
let mut c = b;
std::str::from_utf8_mut(c);
//~^ WARN calls to `std::str::from_utf8_mut`
let mut c = &[99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8(c);
//~^ WARN calls to `std::str::from_utf8`
const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8(&INVALID_1);
//~^ WARN calls to `std::str::from_utf8`
static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8(&INVALID_2);
//~^ WARN calls to `std::str::from_utf8`
const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8(INVALID_3);
//~^ WARN calls to `std::str::from_utf8`
const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] };
std::str::from_utf8(INVALID_4);
//~^ WARN calls to `std::str::from_utf8`
}

fn main() {}
135 changes: 96 additions & 39 deletions tests/ui/lint/invalid_from_utf8.stderr
Original file line number Diff line number Diff line change
@@ -1,110 +1,167 @@
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:19:9
--> $DIR/invalid_from_utf8.rs:21:9
|
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
|
note: the lint level is defined here
--> $DIR/invalid_from_utf8.rs:4:9
--> $DIR/invalid_from_utf8.rs:6:9
|
LL | #![warn(invalid_from_utf8_unchecked)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:21:9
--> $DIR/invalid_from_utf8.rs:23:9
|
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:39:9
--> $DIR/invalid_from_utf8.rs:41:9
|
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:41:9
--> $DIR/invalid_from_utf8.rs:43:9
|
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:43:9
--> $DIR/invalid_from_utf8.rs:45:9
|
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:45:9
--> $DIR/invalid_from_utf8.rs:47:9
|
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:62:9
--> $DIR/invalid_from_utf8.rs:64:9
|
LL | std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
|
note: the lint level is defined here
--> $DIR/invalid_from_utf8.rs:5:9
--> $DIR/invalid_from_utf8.rs:7:9
|
LL | #![warn(invalid_from_utf8)]
| ^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:64:9
--> $DIR/invalid_from_utf8.rs:66:9
|
LL | std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:82:9
--> $DIR/invalid_from_utf8.rs:84:9
|
LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^-----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:84:9
--> $DIR/invalid_from_utf8.rs:86:9
|
LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:86:9
--> $DIR/invalid_from_utf8.rs:88:9
|
LL | std::str::from_utf8(b"cl\x82ippy");
| ^^^^^^^^^^^^^^^^^^^^-------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:88:9
--> $DIR/invalid_from_utf8.rs:90:9
|
LL | std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
| ^^^^^^^^^^^^^^^^^^^^---------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: 12 warnings emitted
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:97:5
|
LL | let mut a = [99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8_mut(&mut a);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:101:5
|
LL | let mut a = [99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
...
LL | std::str::from_utf8_mut(c);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:104:5
|
LL | let mut c = &[99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(c);
| ^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:107:5
|
LL | const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(&INVALID_1);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:110:5
|
LL | static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(&INVALID_2);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:113:5
|
LL | const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(INVALID_3);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:116:5
|
LL | const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] };
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(INVALID_4);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: 19 warnings emitted