Skip to content

Commit

Permalink
make it operate on the MIR
Browse files Browse the repository at this point in the history
  • Loading branch information
Centri3 committed Jul 22, 2023
1 parent 0e65248 commit 8c84a12
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 98 deletions.
221 changes: 163 additions & 58 deletions clippy_lints/src/ptr_to_temporary.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
use clippy_utils::consts::is_promotable;
use clippy_utils::diagnostics::span_lint_and_note;
use clippy_utils::{is_from_proc_macro, is_temporary, path_res};
use rustc_hir::{BorrowKind, Expr, ExprKind, ItemKind, OwnerNode};
use clippy_utils::diagnostics::{span_lint_and_note, span_lint_hir_and_then};
use clippy_utils::mir::local_assignments;
use clippy_utils::ty::has_drop;
use clippy_utils::{is_from_proc_macro, is_temporary, last_path_segment, path_res};
use itertools::Itertools;
use rustc_hir::def_id::LocalDefId;
use rustc_hir::intravisit::FnKind;
use rustc_hir::{Body, BorrowKind, Expr, ExprKind, FnDecl, ItemKind, OwnerNode};
use rustc_lint::{LateContext, LateLintPass, LintContext};
use rustc_middle::lint::in_external_macro;
use rustc_middle::ty::TypeVisitableExt;
use rustc_middle::mir::visit::Visitor;
use rustc_middle::mir::{
self, AggregateKind, BasicBlock, BasicBlockData, CallSource, Operand, Place, Rvalue, StatementKind, TerminatorKind,
};
use rustc_middle::ty::{self, TypeVisitableExt};
use rustc_session::{declare_lint_pass, declare_tool_lint};
use rustc_span::sym;
use rustc_span::{sym, Span};

declare_clippy_lint! {
/// ### What it does
Expand Down Expand Up @@ -45,11 +54,48 @@ impl<'tcx> LateLintPass<'tcx> for PtrToTemporary {
return;
}

_ = check_for_returning_raw_ptr(cx, expr) || check_for_dangling_as_ptr(cx, expr);
_ = check_for_returning_raw_ptr(cx, expr)
}

fn check_fn(
&mut self,
cx: &LateContext<'tcx>,
kind: FnKind<'_>,
decl: &FnDecl<'_>,
body: &Body<'_>,
span: Span,
def_id: LocalDefId,
) {
let mir = cx.tcx.optimized_mir(def_id);
let mut v = DanglingPtrVisitor {
cx,
body: mir,
results: vec![],
};
v.visit_body(mir);

for dangling_ptr_span in v.results {
// TODO: We need to lint on the call in question instead, so lint attributes work fine. I'm not sure
// how to though
span_lint_hir_and_then(
cx,
PTR_TO_TEMPORARY,
cx.tcx.local_def_id_to_hir_id(def_id),
dangling_ptr_span,
"calling `TODO` on a temporary value",
|diag| {
diag.note(
"usage of this pointer will cause Undefined Behavior as the temporary will be deallocated at \
the end of the statement, yet the pointer will continue pointing to it, resulting in a \
dangling pointer",
);
},
);
}
}
}

/// Check for returning raw pointers to temporaries that are not promoted to a constant.
/// Check for returning raw pointers to temporaries that are not promoted to a constant
fn check_for_returning_raw_ptr<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) -> bool {
// Get the final return statement if this is a return statement, or don't lint
let expr = if let ExprKind::Ret(Some(expr)) = expr.kind {
Expand Down Expand Up @@ -87,61 +133,120 @@ fn check_for_returning_raw_ptr<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'t
false
}

/// Check for calls to `as_ptr` or `as_mut_ptr` that will always result in a dangling pointer, under
/// the assumption of course that `as_ptr` will return a pointer to data owned by `self`, rather
/// than returning a raw pointer to new memory.
///
/// This only lints `std` types as anything else could potentially be wrong if the above assumption
/// doesn't hold (which it should for all `std` types).
///
/// We could perhaps extend this to some external crates as well, if we want.
fn check_for_dangling_as_ptr<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) -> bool {
if let ExprKind::MethodCall(seg, recv, [], _) = expr.kind
&& (seg.ident.name == sym::as_ptr || seg.ident.name == sym!(as_mut_ptr))
&& let Some(def_id) = cx.typeck_results().type_dependent_def_id(expr.hir_id)
&& cx.tcx.fn_sig(def_id).skip_binder().output().skip_binder().is_unsafe_ptr()
&& matches!(cx.tcx.crate_name(def_id.krate), sym::core | sym::alloc | sym::std)
&& is_temporary(cx, recv)
&& !is_from_proc_macro(cx, expr)
// The typeck table only contains `ReErased`.
&& let Some(def_id) = match recv.kind {
ExprKind::Call(path, _) => {
path_res(cx, path).opt_def_id()
},
ExprKind::MethodCall(..) => {
cx.typeck_results().type_dependent_def_id(recv.hir_id)
},
// Anything else will either always have a `'static` lifetime or won't be dropped at
// the end of the statement (i.e., will have a longer lifetime), so we can skip them
_ => None,
struct DanglingPtrVisitor<'a, 'tcx> {
cx: &'a LateContext<'tcx>,
body: &'a mir::Body<'tcx>,
results: Vec<Span>,
}

impl<'tcx> Visitor<'tcx> for DanglingPtrVisitor<'_, 'tcx> {
fn visit_basic_block_data(&mut self, block: BasicBlock, data: &BasicBlockData<'tcx>) {
// TODO: We really need to clean this up. I hope future me has any idea what it does!
if let Some(term) = &data.terminator
&& let TerminatorKind::Call {
// TODO: Check if this is `as_ptr`, or any of our specific functions we want to
// lint. Should be simple to implement.
func,
args,
destination,
target: Some(target),
call_source: CallSource::Normal,
..
} = &term.kind
&& destination.ty(&self.body.local_decls, self.cx.tcx).ty.is_unsafe_ptr()
&& let [recv] = args.as_slice()
&& let Some(recv) = recv.place()
&& let [assign] = &*local_assignments(self.body, recv.local)
&& let assign_stmt = &self.body.basic_blocks[assign.block].statements[assign.statement_index]
&& let StatementKind::Assign(box (_, rvalue)) = &assign_stmt.kind
&& let Rvalue::Ref(_, _, actual_recv) = rvalue
&& let [const_assign] = &*local_assignments(self.body, actual_recv.local)
&& if const_assign.statement_index == self.body.basic_blocks[const_assign.block].statements.len() {
if let TerminatorKind::Call {
func,
call_source: CallSource::Normal,
..
} = &self.body.basic_blocks[const_assign.block].terminator().kind
&& let Some((def_id, _)) = func.const_fn_def()
&& let ty = self.cx.tcx.fn_sig(def_id).subst_identity().skip_binder().output()
&& (ty.has_late_bound_vars() || !ty.is_ref())
&& !matches!(ty.kind(), ty::Ref(region, _, _) if region.is_static() )
{
true
} else {
false
}
} else {
let StatementKind::Assign(box (_, rvalue)) =
&self.body.basic_blocks[const_assign.block].statements[const_assign.statement_index].kind
else {
return;
};

let ops = match rvalue {
Rvalue::Use(op) => vec![op],
Rvalue::Repeat(op, _) => vec![op],
Rvalue::Cast(_, op, _) => vec![op],
Rvalue::Aggregate(box AggregateKind::Array(_) | box AggregateKind::Tuple, ops) => {
ops.into_iter().collect_vec()
},
_ => return,
};

!ops.iter().all(|op| matches!(op, Operand::Constant(_)))
}
{
check_for_dangling(
self.cx,
self.body,
*target,
&actual_recv,
destination,
term.source_info.span,
&mut self.results,
);
}
&& let ty = cx.tcx.fn_sig(def_id).subst_identity().output()
// afaik, any `ReLateBound` means it isn't static. Lifetimes are confusing under the
// hood tho so this is likely wrong, and we'll need a smarter heuristic
&& (ty.has_late_bound_regions() || !ty.skip_binder().is_ref())
}
}

fn check_for_dangling<'tcx>(
cx: &LateContext<'tcx>,
body: &mir::Body<'tcx>,
bb: BasicBlock,
recv: &Place<'tcx>,
ptr: &Place<'_>,
span: Span,
results: &mut Vec<Span>,
) {
let data = &body.basic_blocks[bb];

if let Some(term) = &data.terminator
&& let TerminatorKind::Drop { .. } = term.kind
{
span_lint_and_note(
cx,
PTR_TO_TEMPORARY,
expr.span,
"calling `as_ptr` on a temporary value",
None,
"usage of this pointer will cause Undefined Behavior as the temporary will be deallocated at the end of \
the statement, yet the pointer will continue pointing to it, resulting in a dangling pointer",
);
if !body.basic_blocks[bb].statements.iter().any(|stmt| {
matches!(stmt.kind, StatementKind::StorageDead(local) if ptr.local == local)
}) {
results.push(span);
}

return true;
return;
}

false
}
// No drop glue or it's not a temporary. Bummer. Instead let's inspect the order of dead locals
// to see if the pointer is dangling for even a single statement. If it's not, the reference to
// `self` should always be dead after the pointer.

// TODO: Add a check here for some blocklist for methods that return a raw pointer that we should
// lint. We can't bulk-deny these because we don't know whether it's returning something owned by
// `self` (and will thus be dropped at the end of the statement) or is returning a pointer to newly
// allocated memory, like what allocators do.
/*
fn check_for_denied_ptr_method<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) -> bool {
true
let mut recv_dead = false;

for stmt in &data.statements {
if let StatementKind::StorageDead(local) = stmt.kind {
match (local == recv.local, local == ptr.local) {
(true, false) => recv_dead = true,
(false, true) if recv_dead => {
results.push(span);
},
_ => continue,
}
}
}
}
*/
21 changes: 15 additions & 6 deletions tests/ui/ptr_to_temporary.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//@aux-build:proc_macros.rs:proc-macro
//#aux-build:proc_macros.rs:proc-macro
#![allow(
clippy::borrow_deref_ref,
clippy::deref_addrof,
Expand All @@ -9,8 +9,8 @@
#![warn(clippy::ptr_to_temporary)]
#![no_main]

#[macro_use]
extern crate proc_macros;
// #[macro_use]
// extern crate proc_macros;

use std::cell::{Cell, RefCell};
use std::ffi::CString;
Expand Down Expand Up @@ -51,6 +51,12 @@ fn bad_6() {
let pv = vec![1].as_ptr();
}

// TODO: Not linted yet. I believe this is because there's a cast in the MIR between the call to
// `helper` and `as_ptr`. We likely need to be smarter with traversing assignments backwards.
//
// We can also probably optimize it by doing one pass over the body to get local assignments, that
// way we don't need to call `local_assignments` a ton of times (and thus do a ton of completely
// avoidable passes as well).
fn bad_7() {
fn helper() -> [i32; 1] {
[1]
Expand Down Expand Up @@ -81,10 +87,13 @@ fn bad_12() {
}

fn bad_13() {
// TODO: Not linted. We need to normalize these
let ps = <[i32]>::as_ptr(Vec::as_slice(&vec![1]));
}

fn bad_14() {
CString::new("asd".to_owned()).unwrap().as_c_str().as_ptr();
}

// TODO: We need more tests here...

fn fine_1() -> *const i32 {
Expand Down Expand Up @@ -137,17 +146,16 @@ fn fine_8() {
fn fine_9() {
let pcstr = CString::new("asd".to_owned()).unwrap();
// Not UB, as `pcstr` is not a temporary
// TODO: Don't lint this! We need a check for whether a chain begins with a local...
pcstr.as_c_str().as_ptr();
}

fn fine_10() {
let pcstr = CString::new("asd".to_owned()).unwrap();
// Not UB, as `pcstr` is not a temporary
// TODO: Same here, but this time it needs to be normalized
CString::as_c_str(&pcstr).as_ptr();
}

/*
external! {
fn fine_external() -> *const i32 {
let a = 0i32;
Expand All @@ -162,3 +170,4 @@ with_span! {
&a as *const i32
}
}
*/

0 comments on commit 8c84a12

Please sign in to comment.