Skip to content

Commit

Permalink
[analyzer] Switch to PostStmt callbacks in ArrayBoundV2 (#72107)
Browse files Browse the repository at this point in the history
...instead of the currently used, more abstract Location callback. The
main advantage of this change is that after it the checker will check
`array[index].field` while the previous implementation ignored this
situation (because here the ElementRegion is wrapped in a FieldRegion
object). This improvement fixes PR #70187.

Note that after this change `&array[idx]` will be handled as an access
to the `idx`th element of `array`, which is technically incorrect but
matches the programmer intuitions. In my opinion it's more helpful if
the report points to the source location where the indexing happens
(instead of the location where a pointer is finally dereferenced).

As a special case, this change allows code that forms the past-the-end
pointer of an array as `&arr[size]` (but still rejects code like
`if (idx >= size) return &array[idx];` and code that dereferences a
past-the-end pointer).

In addition to this primary improvement, this change tweaks the message
for the tainted index/offset case (using the more concrete information
that's available now) and clarifies/improves a few testcases.

The main change of this commit (replacing `check::Location` with
`check::PostStmt<...>` callbacks) was already proposed in my change
https://reviews.llvm.org/D150446 and https://reviews.llvm.org/D159107 by
steakhal. Those reviews were both abandoned, but the problems that led
to abandonment were unrelated to the change that is introduced in this
PR.
  • Loading branch information
NagyDonat committed Dec 5, 2023
1 parent 9e4210f commit dfdedaf
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 47 deletions.
139 changes: 101 additions & 38 deletions clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "clang/AST/CharUnits.h"
#include "clang/AST/ParentMapContext.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Expand All @@ -34,20 +35,46 @@ using llvm::formatv;
namespace {
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };

class ArrayBoundCheckerV2 :
public Checker<check::Location> {
struct Messages {
std::string Short, Full;
};

// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
// instead of `PreStmt` because the current implementation passes the whole
// expression to `CheckerContext::getSVal()` which only works after the
// symbolic evaluation of the expression. (To turn them into `PreStmt`
// callbacks, we'd need to duplicate the logic that evaluates these
// expressions.) The `MemberExpr` callback would work as `PreStmt` but it's
// defined as `PostStmt` for the sake of consistency with the other callbacks.
class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>,
check::PostStmt<UnaryOperator>,
check::PostStmt<MemberExpr>> {
BugType BT{this, "Out-of-bound access"};
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};

void performCheck(const Expr *E, CheckerContext &C) const;

void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
NonLoc Offset, std::string RegName, std::string Msg) const;
NonLoc Offset, Messages Msgs) const;

static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);

static bool isInAddressOf(const Stmt *S, ASTContext &AC);

public:
void checkLocation(SVal l, bool isLoad, const Stmt *S,
CheckerContext &C) const;
void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const {
performCheck(E, C);
}
void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const {
if (E->getOpcode() == UO_Deref)
performCheck(E, C);
}
void checkPostStmt(const MemberExpr *E, CheckerContext &C) const {
if (E->isArrow())
performCheck(E->getBase(), C);
}
};

} // anonymous namespace

/// For a given Location that can be represented as a symbolic expression
Expand Down Expand Up @@ -149,9 +176,11 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
// where the first one corresponds to "value below threshold" and the second
// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
// the case when the evaluation fails.
// If the optional argument CheckEquality is true, then use BO_EQ instead of
// the default BO_LT after consistently applying the same simplification steps.
static std::pair<ProgramStateRef, ProgramStateRef>
compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
SValBuilder &SVB) {
SValBuilder &SVB, bool CheckEquality = false) {
if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
}
Expand All @@ -167,8 +196,10 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
return {nullptr, State};
}
}
const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;
auto BelowThreshold =
SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>();
SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType())
.getAs<NonLoc>();

if (BelowThreshold)
return State->assume(*BelowThreshold);
Expand Down Expand Up @@ -217,16 +248,19 @@ static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
return formatv(ShortMsgTemplates[Kind], RegName);
}

static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
std::string RegName = getRegionName(Region);
SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of " << RegName << " at negative byte offset";
if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
Out << ' ' << ConcreteIdx->getValue();
return std::string(Buf);
return {getShortMsg(OOB_Precedes, RegName), std::string(Buf)};
}
static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName,
NonLoc Offset, NonLoc Extent, SVal Location) {

static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
NonLoc Offset, NonLoc Extent, SVal Location) {
std::string RegName = getRegionName(Region);
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
assert(EReg && "this checker only handles element access");
QualType ElemType = EReg->getElementType();
Expand Down Expand Up @@ -273,20 +307,18 @@ static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName,
Out << "s";
}

return std::string(Buf);
}
static std::string getTaintMsg(std::string RegName) {
SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of " << RegName
<< " with a tainted offset that may be too large";
return std::string(Buf);
return {getShortMsg(OOB_Exceeds, RegName), std::string(Buf)};
}

void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
const Stmt *LoadS,
CheckerContext &C) const {
static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) {
std::string RegName = getRegionName(Region);
return {formatv("Potential out of bound access to {0} with tainted {1}",
RegName, OffsetName),
formatv("Access of {0} with a tainted {1} that may be too large",
RegName, OffsetName)};
}

void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
// some new logic here that reasons directly about memory region extents.
// Once that logic is more mature, we can bring it back to assumeInBound()
Expand All @@ -297,12 +329,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
// have some flexibility in defining the base region, we can achieve
// various levels of conservatism in our buffer overflow checking.

const SVal Location = C.getSVal(E);

// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
// #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
// and incomplete analysis of these leads to false positives. As even
// accurate reports would be confusing for the users, just disable reports
// from these macros:
if (isFromCtypeMacro(LoadS, C.getASTContext()))
if (isFromCtypeMacro(E, C.getASTContext()))
return;

ProgramStateRef State = C.getState();
Expand Down Expand Up @@ -331,9 +365,8 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,

if (PrecedesLowerBound && !WithinLowerBound) {
// We know that the index definitely precedes the lower bound.
std::string RegName = getRegionName(Reg);
std::string Msg = getPrecedesMsg(RegName, ByteOffset);
reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg);
Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs);
return;
}

Expand All @@ -350,17 +383,38 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
if (ExceedsUpperBound) {
if (!WithinUpperBound) {
// We know that the index definitely exceeds the upper bound.
std::string RegName = getRegionName(Reg);
std::string Msg = getExceedsMsg(C.getASTContext(), RegName, ByteOffset,
*KnownSize, Location);
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
// ...but this is within an addressof expression, so we need to check
// for the exceptional case that `&array[size]` is valid.
auto [EqualsToThreshold, NotEqualToThreshold] =
compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize,
SVB, /*CheckEquality=*/true);
if (EqualsToThreshold && !NotEqualToThreshold) {
// We are definitely in the exceptional case, so return early
// instead of reporting a bug.
C.addTransition(EqualsToThreshold);
return;
}
}
Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset,
*KnownSize, Location);
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs);
return;
}
if (isTainted(State, ByteOffset)) {
// Both cases are possible, but the index is tainted, so report.
// Both cases are possible, but the offset is tainted, so report.
std::string RegName = getRegionName(Reg);
std::string Msg = getTaintMsg(RegName);
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);

// Diagnostic detail: "tainted offset" is always correct, but the
// common case is that 'idx' is tainted in 'arr[idx]' and then it's
// nicer to say "tainted index".
const char *OffsetName = "offset";
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
OffsetName = "index";

Messages Msgs = getTaintMsgs(Reg, OffsetName);
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs);
return;
}
}
Expand All @@ -374,17 +428,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,

void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
ProgramStateRef ErrorState, OOB_Kind Kind,
NonLoc Offset, std::string RegName,
std::string Msg) const {
NonLoc Offset, Messages Msgs) const {

ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
if (!ErrorNode)
return;

std::string ShortMsg = getShortMsg(Kind, RegName);

auto BR = std::make_unique<PathSensitiveBugReport>(
Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode);
Kind == OOB_Taint ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);

// Track back the propagation of taintedness.
if (Kind == OOB_Taint)
Expand Down Expand Up @@ -413,6 +464,18 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
(MacroName == "isupper") || (MacroName == "isxdigit"));
}

bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
ParentMapContext &ParentCtx = ACtx.getParentMapContext();
do {
const DynTypedNodeList Parents = ParentCtx.getParents(*S);
if (Parents.empty())
return false;
S = Parents[0].get<Stmt>();
} while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
}

void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
mgr.registerChecker<ArrayBoundCheckerV2>();
}
Expand Down
Loading

0 comments on commit dfdedaf

Please sign in to comment.