Skip to content

Commit

Permalink
[analyzer][NFC] Simplifications in ArrayBoundV2 (#67572)
Browse files Browse the repository at this point in the history
I'm planning to improve diagnostics generation in `ArrayBoundCheckerV2`
but before that I'm refactoring the source code to clean up some
over-complicated code and an inaccurate comment.

Changes in this commit:
- Remove the `mutable std::unique_ptr<BugType>` boilerplate, because
it's no longer needed.
- Remove the code duplication between the methods `reportOOB()` and
`reportTaintedOOB()`.
- Eliminate the class `RegionRawOffsetV2` because it's just a "reinvent
the wheel" version of `std::pair` and it was used only once, as a
temporary object that was immediately decomposed. (I suspect that
`RegionRawOffset` in MemRegion.cpp could also be eliminated.)
- Flatten the code of `computeOffset()` which had contained six nested
indentation levels before this commit.
- Ensure that `computeOffset()` returns `std::nullopt` instead of a
`{Region, <zero array index>}` pair in the case when it encounters a
`Location` that is not an `ElementRegion`. This ensures that the
`checkLocation` callback returns early when it handles a memory access
where it has "nothing to do" (no subscript operation or equivalent
pointer arithmetic). Note that this is still NFC because zero is a
valid index everywhere, so the old logic without this shortcut
eventually reached the same conclusion.
- Correct a wrong explanation comment in `getSimplifiedOffsets()`.
  • Loading branch information
NagyDonat committed Oct 24, 2023
1 parent b796eac commit aceb34c
Showing 1 changed file with 91 additions and 136 deletions.
227 changes: 91 additions & 136 deletions clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,42 +32,76 @@ using namespace taint;
namespace {
class ArrayBoundCheckerV2 :
public Checker<check::Location> {
mutable std::unique_ptr<BugType> BT;
mutable std::unique_ptr<BugType> TaintBT;
BugType BT{this, "Out-of-bound access"};
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};

enum OOB_Kind { OOB_Precedes, OOB_Excedes };
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };

void reportOOB(CheckerContext &C, ProgramStateRef errorState,
OOB_Kind kind) const;
void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
SVal TaintedSVal) const;
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
SVal TaintedSVal = UnknownVal()) const;

static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);

public:
void checkLocation(SVal l, bool isLoad, const Stmt *S,
CheckerContext &C) const;
};
} // anonymous namespace

// FIXME: Eventually replace RegionRawOffset with this class.
class RegionRawOffsetV2 {
private:
const SubRegion *baseRegion;
NonLoc byteOffset;

public:
RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
: baseRegion(base), byteOffset(offset) { assert(base); }
/// For a given Location that can be represented as a symbolic expression
/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
/// Arr and the distance of Location from the beginning of Arr (expressed in a
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
/// cannot be determined.
std::optional<std::pair<const SubRegion *, NonLoc>>
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
QualType T = SVB.getArrayIndexType();
auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
// We will use this utility to add and multiply values.
return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
};

NonLoc getByteOffset() const { return byteOffset; }
const SubRegion *getRegion() const { return baseRegion; }
const SubRegion *OwnerRegion = nullptr;
std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();

const ElementRegion *CurRegion =
dyn_cast_or_null<ElementRegion>(Location.getAsRegion());

while (CurRegion) {
const auto Index = CurRegion->getIndex().getAs<NonLoc>();
if (!Index)
return std::nullopt;

QualType ElemType = CurRegion->getElementType();

// FIXME: The following early return was presumably added to safeguard the
// getTypeSizeInChars() call (which doesn't accept an incomplete type), but
// it seems that `ElemType` cannot be incomplete at this point.
if (ElemType->isIncompleteType())
return std::nullopt;

// Calculate Delta = Index * sizeof(ElemType).
NonLoc Size = SVB.makeArrayIndex(
SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
auto Delta = EvalBinOp(BO_Mul, *Index, Size);
if (!Delta)
return std::nullopt;

// Perform Offset += Delta.
Offset = EvalBinOp(BO_Add, *Offset, *Delta);
if (!Offset)
return std::nullopt;

OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
// When this is just another ElementRegion layer, we need to continue the
// offset calculations:
CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
}

static std::optional<RegionRawOffsetV2>
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
if (OwnerRegion)
return std::make_pair(OwnerRegion, *Offset);

void dump() const;
void dumpToStream(raw_ostream &os) const;
};
return std::nullopt;
}

// TODO: once the constraint manager is smart enough to handle non simplified
Expand All @@ -86,8 +120,8 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
APSIntType(extent.getValue()).convert(SIE->getRHS());
switch (SIE->getOpcode()) {
case BO_Mul:
// The constant should never be 0 here, since it the result of scaling
// based on the size of a type which is never 0.
// The constant should never be 0 here, becasue multiplication by zero
// is simplified by the engine.
if ((extent.getValue() % constant) != 0)
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
else
Expand Down Expand Up @@ -163,16 +197,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
return;

ProgramStateRef state = checkerContext.getState();

SValBuilder &svalBuilder = checkerContext.getSValBuilder();
const std::optional<RegionRawOffsetV2> &RawOffset =
RegionRawOffsetV2::computeOffset(state, svalBuilder, location);

const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
computeOffset(state, svalBuilder, location);

if (!RawOffset)
return;

NonLoc ByteOffset = RawOffset->getByteOffset();
const SubRegion *Reg = RawOffset->getRegion();
auto [Reg, ByteOffset] = *RawOffset;

// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace();
Expand Down Expand Up @@ -207,12 +240,13 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
if (state_exceedsUpperBound) {
if (!state_withinUpperBound) {
// We know that the index definitely exceeds the upper bound.
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
return;
}
if (isTainted(state, ByteOffset)) {
// Both cases are possible, but the index is tainted, so report.
reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
ByteOffset);
return;
}
}
Expand All @@ -224,58 +258,40 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
checkerContext.addTransition(state);
}

void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
ProgramStateRef errorState,
SVal TaintedSVal) const {
ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
if (!errorNode)
return;

if (!TaintBT)
TaintBT.reset(
new BugType(this, "Out-of-bound access", categories::TaintedData));

SmallString<256> buf;
llvm::raw_svector_ostream os(buf);
os << "Out of bound memory access (index is tainted)";
auto BR =
std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode);

// Track back the propagation of taintedness.
for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) {
BR->markInteresting(Sym);
}

checkerContext.emitReport(std::move(BR));
}

void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
ProgramStateRef errorState,
OOB_Kind kind) const {
void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
ProgramStateRef ErrorState, OOB_Kind Kind,
SVal TaintedSVal) const {

ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
if (!errorNode)
ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
if (!ErrorNode)
return;

if (!BT)
BT.reset(new BugType(this, "Out-of-bound access"));
// FIXME: These diagnostics are preliminary, and they'll be replaced soon by
// a follow-up commit.

// FIXME: This diagnostics are preliminary. We should get far better
// diagnostics for explaining buffer overruns.
SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Out of bound memory access ";

SmallString<256> buf;
llvm::raw_svector_ostream os(buf);
os << "Out of bound memory access ";
switch (kind) {
switch (Kind) {
case OOB_Precedes:
os << "(accessed memory precedes memory block)";
Out << "(accessed memory precedes memory block)";
break;
case OOB_Exceeds:
Out << "(access exceeds upper limit of memory block)";
break;
case OOB_Excedes:
os << "(access exceeds upper limit of memory block)";
case OOB_Taint:
Out << "(index is tainted)";
break;
}
auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
checkerContext.emitReport(std::move(BR));
auto BR = std::make_unique<PathSensitiveBugReport>(
Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
// Track back the propagation of taintedness, or do nothing if TaintedSVal is
// the default UnknownVal().
for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
BR->markInteresting(Sym);
}
C.emitReport(std::move(BR));
}

bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
Expand All @@ -297,67 +313,6 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
(MacroName == "isupper") || (MacroName == "isxdigit"));
}

#ifndef NDEBUG
LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
dumpToStream(llvm::errs());
}

void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
}
#endif

/// For a given Location that can be represented as a symbolic expression
/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
/// Arr and the distance of Location from the beginning of Arr (expressed in a
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
/// cannot be determined.
std::optional<RegionRawOffsetV2>
RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
SVal Location) {
QualType T = SVB.getArrayIndexType();
auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
// We will use this utility to add and multiply values.
return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
};

const MemRegion *Region = Location.getAsRegion();
NonLoc Offset = SVB.makeZeroArrayIndex();

while (Region) {
if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
QualType ElemType = ERegion->getElementType();
// If the element is an incomplete type, go no further.
if (ElemType->isIncompleteType())
return std::nullopt;

// Perform Offset += Index * sizeof(ElemType); then continue the offset
// calculations with SuperRegion:
NonLoc Size = SVB.makeArrayIndex(
SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
if (auto Delta = Calc(BO_Mul, *Index, Size)) {
if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
Offset = *NewOffset;
Region = ERegion->getSuperRegion();
continue;
}
}
}
} else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
// NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
// to see a MemSpaceRegion at this point.
// FIXME: We may return with {<Region>, 0} even if we didn't handle any
// ElementRegion layers. I think that this behavior was introduced
// accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
// it may be useful to review it in the future.
return RegionRawOffsetV2(SRegion, Offset);
}
return std::nullopt;
}
return std::nullopt;
}

void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
mgr.registerChecker<ArrayBoundCheckerV2>();
}
Expand Down

0 comments on commit aceb34c

Please sign in to comment.