Skip to content

Commit

Permalink
[analyzer] Improve diagnostics from ArrayBoundCheckerV2 (#70056)
Browse files Browse the repository at this point in the history
Previously alpha.security.ArrayBoundV2 produced very spartan bug
reports; this commit ensures that the relevant (and known) details are
reported to the user.

The logic for detecting bugs is not changed, after this commit the
checker will report the same set of issues, but with better messages.

To test the details of the message generation this commit adds a new
test file 'out-of-bounds-diagnostics.c'. Three of the testcases are
added with FIXME notes because they reveal shortcomings of the existing
modeling and bounds checking code. I will try to fix them in separate
follow-up commits.
  • Loading branch information
NagyDonat committed Nov 7, 2023
1 parent d0ef94b commit 16ef496
Show file tree
Hide file tree
Showing 7 changed files with 349 additions and 100 deletions.
208 changes: 154 additions & 54 deletions clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Expand Up @@ -22,23 +22,25 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>

using namespace clang;
using namespace ento;
using namespace taint;
using llvm::formatv;

namespace {
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };

class ArrayBoundCheckerV2 :
public Checker<check::Location> {
BugType BT{this, "Out-of-bound access"};
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};

enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };

void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
SVal TaintedSVal = UnknownVal()) const;
NonLoc Offset, std::string RegName, std::string Msg) const;

static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);

Expand All @@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
/// Arr and the distance of Location from the beginning of Arr (expressed in a
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
/// cannot be determined.
std::optional<std::pair<const SubRegion *, NonLoc>>
static std::optional<std::pair<const SubRegion *, NonLoc>>
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
QualType T = SVB.getArrayIndexType();
auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
Expand Down Expand Up @@ -174,9 +176,116 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
return {nullptr, nullptr};
}

void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
const Stmt* LoadS,
CheckerContext &checkerContext) const {
static std::string getRegionName(const SubRegion *Region) {
if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
return RegName;

// Field regions only have descriptive names when their parent has a
// descriptive name; so we provide a fallback representation for them:
if (const auto *FR = Region->getAs<FieldRegion>()) {
if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
return formatv("the field '{0}'", Name);
return "the unnamed field";
}

if (isa<AllocaRegion>(Region))
return "the memory returned by 'alloca'";

if (isa<SymbolicRegion>(Region) &&
isa<HeapSpaceRegion>(Region->getMemorySpace()))
return "the heap area";

if (isa<StringRegion>(Region))
return "the string literal";

return "the region";
}

static std::optional<int64_t> getConcreteValue(NonLoc SV) {
if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
return ConcreteVal->getValue().tryExtValue();
}
return std::nullopt;
}

static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
static const char *ShortMsgTemplates[] = {
"Out of bound access to memory preceding {0}",
"Out of bound access to memory after the end of {0}",
"Potential out of bound access to {0} with tainted offset"};

return formatv(ShortMsgTemplates[Kind], RegName);
}

static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of " << RegName << " at negative byte offset";
if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
Out << ' ' << ConcreteIdx->getValue();
return std::string(Buf);
}
static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName,
NonLoc Offset, NonLoc Extent, SVal Location) {
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
assert(EReg && "this checker only handles element access");
QualType ElemType = EReg->getElementType();

std::optional<int64_t> OffsetN = getConcreteValue(Offset);
std::optional<int64_t> ExtentN = getConcreteValue(Extent);

bool UseByteOffsets = true;
if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) {
const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
if (!OffsetHasRemainder && !ExtentHasRemainder) {
UseByteOffsets = false;
if (OffsetN)
*OffsetN /= ElemSize;
if (ExtentN)
*ExtentN /= ElemSize;
}
}

SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of ";
if (!ExtentN && !UseByteOffsets)
Out << "'" << ElemType.getAsString() << "' element in ";
Out << RegName << " at ";
if (OffsetN) {
Out << (UseByteOffsets ? "byte offset " : "index ") << *OffsetN;
} else {
Out << "an overflowing " << (UseByteOffsets ? "byte offset" : "index");
}
if (ExtentN) {
Out << ", while it holds only ";
if (*ExtentN != 1)
Out << *ExtentN;
else
Out << "a single";
if (UseByteOffsets)
Out << " byte";
else
Out << " '" << ElemType.getAsString() << "' element";

if (*ExtentN > 1)
Out << "s";
}

return std::string(Buf);
}
static std::string getTaintMsg(std::string RegName) {
SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of " << RegName
<< " with a tainted offset that may be too large";
return std::string(Buf);
}

void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
const Stmt *LoadS,
CheckerContext &C) const {

// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
// some new logic here that reasons directly about memory region extents.
Expand All @@ -193,14 +302,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
// and incomplete analysis of these leads to false positives. As even
// accurate reports would be confusing for the users, just disable reports
// from these macros:
if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
if (isFromCtypeMacro(LoadS, C.getASTContext()))
return;

ProgramStateRef state = checkerContext.getState();
SValBuilder &svalBuilder = checkerContext.getSValBuilder();
ProgramStateRef State = C.getState();
SValBuilder &SVB = C.getSValBuilder();

const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
computeOffset(state, svalBuilder, location);
computeOffset(State, SVB, Location);

if (!RawOffset)
return;
Expand All @@ -217,80 +326,71 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
// MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
// non-symbolic regions (e.g. a field subregion of a symbolic region) in
// unknown space.
auto [state_precedesLowerBound, state_withinLowerBound] =
compareValueToThreshold(state, ByteOffset,
svalBuilder.makeZeroArrayIndex(), svalBuilder);
auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);

if (state_precedesLowerBound && !state_withinLowerBound) {
if (PrecedesLowerBound && !WithinLowerBound) {
// We know that the index definitely precedes the lower bound.
reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
std::string RegName = getRegionName(Reg);
std::string Msg = getPrecedesMsg(RegName, ByteOffset);
reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg);
return;
}

if (state_withinLowerBound)
state = state_withinLowerBound;
if (WithinLowerBound)
State = WithinLowerBound;
}

// CHECK UPPER BOUND
DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
if (auto KnownSize = Size.getAs<NonLoc>()) {
auto [state_withinUpperBound, state_exceedsUpperBound] =
compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
auto [WithinUpperBound, ExceedsUpperBound] =
compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);

if (state_exceedsUpperBound) {
if (!state_withinUpperBound) {
if (ExceedsUpperBound) {
if (!WithinUpperBound) {
// We know that the index definitely exceeds the upper bound.
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
std::string RegName = getRegionName(Reg);
std::string Msg = getExceedsMsg(C.getASTContext(), RegName, ByteOffset,
*KnownSize, Location);
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
return;
}
if (isTainted(state, ByteOffset)) {
if (isTainted(State, ByteOffset)) {
// Both cases are possible, but the index is tainted, so report.
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
ByteOffset);
std::string RegName = getRegionName(Reg);
std::string Msg = getTaintMsg(RegName);
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);
return;
}
}

if (state_withinUpperBound)
state = state_withinUpperBound;
if (WithinUpperBound)
State = WithinUpperBound;
}

checkerContext.addTransition(state);
C.addTransition(State);
}

void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
ProgramStateRef ErrorState, OOB_Kind Kind,
SVal TaintedSVal) const {
NonLoc Offset, std::string RegName,
std::string Msg) const {

ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
if (!ErrorNode)
return;

// FIXME: These diagnostics are preliminary, and they'll be replaced soon by
// a follow-up commit.
std::string ShortMsg = getShortMsg(Kind, RegName);

SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Out of bound memory access ";

switch (Kind) {
case OOB_Precedes:
Out << "(accessed memory precedes memory block)";
break;
case OOB_Exceeds:
Out << "(access exceeds upper limit of memory block)";
break;
case OOB_Taint:
Out << "(index is tainted)";
break;
}
auto BR = std::make_unique<PathSensitiveBugReport>(
Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
// Track back the propagation of taintedness, or do nothing if TaintedSVal is
// the default UnknownVal().
for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
BR->markInteresting(Sym);
}
Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode);

// Track back the propagation of taintedness.
if (Kind == OOB_Taint)
for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset))
BR->markInteresting(Sym);

C.emitReport(std::move(BR));
}

Expand Down

0 comments on commit 16ef496

Please sign in to comment.