Expand Up
@@ -33,7 +33,66 @@ using namespace taint;
using llvm::formatv;
namespace {
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
class StateUpdateReporter {
const SubRegion *Reg;
NonLoc ByteOffsetVal;
std::optional<QualType> ElementType = std::nullopt;
std::optional<int64_t > ElementSize = std::nullopt;
bool AssumedNonNegative = false ;
std::optional<NonLoc> AssumedUpperBound = std::nullopt;
public:
StateUpdateReporter (const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
CheckerContext &C)
: Reg(R), ByteOffsetVal(ByteOffsVal) {
initializeElementInfo (E, C);
}
void initializeElementInfo (const Expr *E, CheckerContext &C) {
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) {
const MemRegion *SubscriptBaseReg =
C.getSVal (ASE->getBase ()).getAsRegion ();
if (!SubscriptBaseReg)
return ;
SubscriptBaseReg = SubscriptBaseReg->StripCasts ();
if (!isa_and_nonnull<ElementRegion>(SubscriptBaseReg)) {
ElementType = ASE->getType ();
ElementSize =
C.getASTContext ().getTypeSizeInChars (*ElementType).getQuantity ();
}
}
}
void recordNonNegativeAssumption () { AssumedNonNegative = true ; }
void recordUpperBoundAssumption (NonLoc UpperBoundVal) {
AssumedUpperBound = UpperBoundVal;
}
const NoteTag *createNoteTag (CheckerContext &C) const ;
private:
std::string getMessage (PathSensitiveBugReport &BR) const ;
// / Return true if information about the value of `Sym` can put constraints
// / on some symbol which is interesting within the bug report `BR`.
// / In particular, this returns true when `Sym` is interesting within `BR`;
// / but it also returns true if `Sym` is an expression that contains integer
// / constants and a single symbolic operand which is interesting (in `BR`).
// / We need to use this instead of plain `BR.isInteresting()` because if we
// / are analyzing code like
// / int array[10];
// / int f(int arg) {
// / return array[arg] && array[arg + 10];
// / }
// / then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
// / sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
// / to detect this out of bounds access).
static bool providesInformationAboutInteresting (SymbolRef Sym,
PathSensitiveBugReport &BR);
static bool providesInformationAboutInteresting (SVal SV,
PathSensitiveBugReport &BR) {
return providesInformationAboutInteresting (SV.getAsSymbol (), BR);
}
};
struct Messages {
std::string Short, Full;
Expand All
@@ -54,11 +113,14 @@ class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>,
void performCheck (const Expr *E, CheckerContext &C) const ;
void reportOOB (CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind ,
NonLoc Offset, Messages Msgs ) const ;
void reportOOB (CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs ,
NonLoc Offset, bool IsTaintBug = false ) const ;
static bool isFromCtypeMacro (const Stmt *S, ASTContext &AC);
static bool isIdiomaticPastTheEndPtr (const Expr *E, ProgramStateRef State,
NonLoc Offset, NonLoc Limit,
CheckerContext &C);
static bool isInAddressOf (const Stmt *S, ASTContext &AC);
public:
Expand Down
Expand Up
@@ -133,12 +195,19 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
return std::nullopt;
}
// TODO: once the constraint manager is smart enough to handle non simplified
// symbolic expressions remove this function. Note that this can not be used in
// the constraint manager as is, since this does not handle overflows. It is
// safe to assume, however, that memory offsets will not overflow.
// NOTE: callers of this function need to be aware of the effects of overflows
// and signed<->unsigned conversions!
// NOTE: This function is the "heart" of this checker. It simplifies
// inequalities with transformations that are valid (and very elementary) in
// pure mathematics, but become invalid if we use them in C++ number model
// where the calculations may overflow.
// Due to the overflow issues I think it's impossible (or at least not
// practical) to integrate this kind of simplification into the resolution of
// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
// produces valid results if the arguments are memory offsets which are known
// to be << SIZE_MAX.
// TODO: This algorithm should be moved to a central location where it's
// available for other checkers that need to compare memory offsets.
// NOTE: When using the results of this function, don't forget that `evalBinOp`
// uses the evaluation rules of C++, so e.g. `(size_t)123 < -1`!
static std::pair<NonLoc, nonloc::ConcreteInt>
getSimplifiedOffsets (NonLoc offset, nonloc::ConcreteInt extent,
SValBuilder &svalBuilder) {
Expand Down
Expand Up
@@ -239,13 +308,8 @@ static std::optional<int64_t> getConcreteValue(NonLoc SV) {
return std::nullopt;
}
static std::string getShortMsg (OOB_Kind Kind, std::string RegName) {
static const char *ShortMsgTemplates[] = {
" Out of bound access to memory preceding {0}" ,
" Out of bound access to memory after the end of {0}" ,
" Potential out of bound access to {0} with tainted offset" };
return formatv (ShortMsgTemplates[Kind], RegName);
static std::optional<int64_t > getConcreteValue (std::optional<NonLoc> SV) {
return SV ? getConcreteValue (*SV) : std::nullopt;
}
static Messages getPrecedesMsgs (const SubRegion *Region, NonLoc Offset) {
Expand All
@@ -255,7 +319,28 @@ static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
Out << " Access of " << RegName << " at negative byte offset" ;
if (auto ConcreteIdx = Offset.getAs <nonloc::ConcreteInt>())
Out << ' ' << ConcreteIdx->getValue ();
return {getShortMsg (OOB_Precedes, RegName), std::string (Buf)};
return {formatv (" Out of bound access to memory preceding {0}" , RegName),
std::string (Buf)};
}
// / Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
// / it can be performed (`Divisor` is nonzero and there is no remainder). The
// / values `Val1` and `Val2` may be nullopt and in that case the corresponding
// / division is considered to be successful.
bool tryDividePair (std::optional<int64_t > &Val1, std::optional<int64_t > &Val2,
int64_t Divisor) {
if (!Divisor)
return false ;
const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
if (!Val1HasRemainder && !Val2HasRemainder) {
if (Val1)
*Val1 /= Divisor;
if (Val2)
*Val2 /= Divisor;
return true ;
}
return false ;
}
static Messages getExceedsMsgs (ASTContext &ACtx, const SubRegion *Region,
Expand All
@@ -268,18 +353,9 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
std::optional<int64_t > OffsetN = getConcreteValue (Offset);
std::optional<int64_t > ExtentN = getConcreteValue (Extent);
bool UseByteOffsets = true ;
if (int64_t ElemSize = ACtx.getTypeSizeInChars (ElemType).getQuantity ()) {
const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
if (!OffsetHasRemainder && !ExtentHasRemainder) {
UseByteOffsets = false ;
if (OffsetN)
*OffsetN /= ElemSize;
if (ExtentN)
*ExtentN /= ElemSize;
}
}
int64_t ElemSize = ACtx.getTypeSizeInChars (ElemType).getQuantity ();
bool UseByteOffsets = !tryDividePair (OffsetN, ExtentN, ElemSize);
SmallString<256 > Buf;
llvm::raw_svector_ostream Out (Buf);
Expand Down
Expand Up
@@ -307,7 +383,9 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
Out << " s" ;
}
return {getShortMsg (OOB_Exceeds, RegName), std::string (Buf)};
return {
formatv (" Out of bound access to memory after the end of {0}" , RegName),
std::string (Buf)};
}
static Messages getTaintMsgs (const SubRegion *Region, const char *OffsetName) {
Expand All
@@ -318,17 +396,87 @@ static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) {
RegName, OffsetName)};
}
void ArrayBoundCheckerV2::performCheck (const Expr *E, CheckerContext &C) const {
// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
// some new logic here that reasons directly about memory region extents.
// Once that logic is more mature, we can bring it back to assumeInBound()
// for all clients to use.
//
// The algorithm we are using here for bounds checking is to see if the
// memory access is within the extent of the base region. Since we
// have some flexibility in defining the base region, we can achieve
// various levels of conservatism in our buffer overflow checking.
const NoteTag *StateUpdateReporter::createNoteTag (CheckerContext &C) const {
// Don't create a note tag if we didn't assume anything:
if (!AssumedNonNegative && !AssumedUpperBound)
return nullptr ;
return C.getNoteTag (
[*this ](PathSensitiveBugReport &BR) -> std::string {
return getMessage (BR);
},
/* isPrunable=*/ false );
}
std::string StateUpdateReporter::getMessage (PathSensitiveBugReport &BR) const {
bool ShouldReportNonNegative = AssumedNonNegative;
if (!providesInformationAboutInteresting (ByteOffsetVal, BR)) {
if (AssumedUpperBound &&
providesInformationAboutInteresting (*AssumedUpperBound, BR))
ShouldReportNonNegative = false ;
else
return " " ;
}
std::optional<int64_t > OffsetN = getConcreteValue (ByteOffsetVal);
std::optional<int64_t > ExtentN = getConcreteValue (AssumedUpperBound);
const bool UseIndex =
ElementSize && tryDividePair (OffsetN, ExtentN, *ElementSize);
SmallString<256 > Buf;
llvm::raw_svector_ostream Out (Buf);
Out << " Assuming " ;
if (UseIndex) {
Out << " index " ;
if (OffsetN)
Out << " '" << OffsetN << " ' " ;
} else if (AssumedUpperBound) {
Out << " byte offset " ;
if (OffsetN)
Out << " '" << OffsetN << " ' " ;
} else {
Out << " offset " ;
}
Out << " is" ;
if (ShouldReportNonNegative) {
Out << " non-negative" ;
}
if (AssumedUpperBound) {
if (ShouldReportNonNegative)
Out << " and" ;
Out << " less than " ;
if (ExtentN)
Out << *ExtentN << " , " ;
if (UseIndex && ElementType)
Out << " the number of '" << ElementType->getAsString ()
<< " ' elements in " ;
else
Out << " the extent of " ;
Out << getRegionName (Reg);
}
return std::string (Out.str ());
}
bool StateUpdateReporter::providesInformationAboutInteresting (
SymbolRef Sym, PathSensitiveBugReport &BR) {
if (!Sym)
return false ;
for (SymbolRef PartSym : Sym->symbols ()) {
// The interestingess mark may appear on any layer as we're stripping off
// the SymIntExpr, UnarySymExpr etc. layers...
if (BR.isInteresting (PartSym))
return true ;
// ...but if both sides of the expression are symbolic (i.e. unknown), then
// the analyzer can't use the combined result to constrain the operands.
if (isa<SymSymExpr>(PartSym))
return false ;
}
return false ;
}
void ArrayBoundCheckerV2::performCheck (const Expr *E, CheckerContext &C) const {
const SVal Location = C.getSVal (E);
// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
Expand All
@@ -350,6 +498,10 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
auto [Reg, ByteOffset ] = *RawOffset;
// The state updates will be reported as a single note tag, which will be
// composed by this helper class.
StateUpdateReporter SUR (Reg, ByteOffset , E, C);
// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace ();
if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
Expand All
@@ -363,13 +515,22 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold (
State, ByteOffset , SVB.makeZeroArrayIndex (), SVB);
if (PrecedesLowerBound && !WithinLowerBound) {
// We know that the index definitely precedes the lower bound.
Messages Msgs = getPrecedesMsgs (Reg, ByteOffset );
reportOOB (C, PrecedesLowerBound, OOB_Precedes, ByteOffset , Msgs);
return ;
if (PrecedesLowerBound) {
// The offset may be invalid (negative)...
if (!WithinLowerBound) {
// ...and it cannot be valid (>= 0), so report an error.
Messages Msgs = getPrecedesMsgs (Reg, ByteOffset );
reportOOB (C, PrecedesLowerBound, Msgs, ByteOffset );
return ;
}
// ...but it can be valid as well, so the checker will (optimistically)
// assume that it's valid and mention this in the note tag.
SUR.recordNonNegativeAssumption ();
}
// Actually update the state. The "if" only fails in the extremely unlikely
// case when compareValueToThreshold returns {nullptr, nullptr} becasue
// evalBinOpNN fails to evaluate the less-than operator.
if (WithinLowerBound)
State = WithinLowerBound;
}
Expand All
@@ -381,66 +542,98 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
compareValueToThreshold (State, ByteOffset , *KnownSize, SVB);
if (ExceedsUpperBound) {
// The offset may be invalid (>= Size)...
if (!WithinUpperBound) {
// We know that the index definitely exceeds the upper bound.
if (isa<ArraySubscriptExpr>(E) && isInAddressOf (E, C.getASTContext ())) {
// ...but this is within an addressof expression, so we need to check
// for the exceptional case that `&array[size]` is valid.
auto [EqualsToThreshold, NotEqualToThreshold] =
compareValueToThreshold (ExceedsUpperBound, ByteOffset , *KnownSize,
SVB, /* CheckEquality=*/ true );
if (EqualsToThreshold && !NotEqualToThreshold) {
// We are definitely in the exceptional case, so return early
// instead of reporting a bug.
C.addTransition (EqualsToThreshold);
return ;
}
// ...and it cannot be within bounds, so report an error, unless we can
// definitely determine that this is an idiomatic `&array[size]`
// expression that calculates the past-the-end pointer.
if (isIdiomaticPastTheEndPtr (E, ExceedsUpperBound, ByteOffset ,
*KnownSize, C)) {
// FIXME: this duplicates the `addTransition` at the end of the
// function, but `goto` is taboo nowdays.
C.addTransition (ExceedsUpperBound, SUR.createNoteTag (C));
return ;
}
Messages Msgs = getExceedsMsgs (C.getASTContext (), Reg, ByteOffset ,
*KnownSize, Location);
reportOOB (C, ExceedsUpperBound, OOB_Exceeds , ByteOffset , Msgs );
reportOOB (C, ExceedsUpperBound, Msgs , ByteOffset );
return ;
}
// ...and it can be valid as well...
if (isTainted (State, ByteOffset )) {
// Both cases are possible, but the offset is tainted, so report.
std::string RegName = getRegionName (Reg);
// ...but it's tainted, so report an error.
// Diagnostic detail: "tainted offset" is always correct, but the
// common case is that 'idx' is tainted in 'arr[idx]' and then it's
// Diagnostic detail: saying "tainted offset" is always correct, but
// the common case is that 'idx' is tainted in 'arr[idx]' and then it's
// nicer to say "tainted index".
const char *OffsetName = " offset" ;
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
if (isTainted (State, ASE->getIdx (), C.getLocationContext ()))
OffsetName = " index" ;
Messages Msgs = getTaintMsgs (Reg, OffsetName);
reportOOB (C, ExceedsUpperBound, OOB_Taint , ByteOffset , Msgs );
reportOOB (C, ExceedsUpperBound, Msgs , ByteOffset , /* IsTaintBug= */ true );
return ;
}
// ...and it isn't tainted, so the checker will (optimistically) assume
// that the offset is in bounds and mention this in the note tag.
SUR.recordUpperBoundAssumption (*KnownSize);
}
// Actually update the state. The "if" only fails in the extremely unlikely
// case when compareValueToThreshold returns {nullptr, nullptr} becasue
// evalBinOpNN fails to evaluate the less-than operator.
if (WithinUpperBound)
State = WithinUpperBound;
}
C.addTransition (State);
// Add a transition, reporting the state updates that we accumulated.
C.addTransition (State, SUR.createNoteTag (C));
}
void ArrayBoundCheckerV2::reportOOB (CheckerContext &C,
ProgramStateRef ErrorState, OOB_Kind Kind ,
NonLoc Offset, Messages Msgs ) const {
ProgramStateRef ErrorState, Messages Msgs ,
NonLoc Offset, bool IsTaintBug ) const {
ExplodedNode *ErrorNode = C.generateErrorNode (ErrorState);
if (!ErrorNode)
return ;
auto BR = std::make_unique<PathSensitiveBugReport>(
Kind == OOB_Taint ? TaintBT : BT, Msgs.Short , Msgs.Full , ErrorNode);
IsTaintBug ? TaintBT : BT, Msgs.Short , Msgs.Full , ErrorNode);
// FIXME: ideally we would just call trackExpressionValue() and that would
// "do the right thing": mark the relevant symbols as interesting, track the
// control dependencies and statements storing the relevant values and add
// helpful diagnostic pieces. However, right now trackExpressionValue() is
// a heap of unreliable heuristics, so it would cause several issues:
// - Interestingness is not applied consistently, e.g. if `array[x+10]`
// causes an overflow, then `x` is not marked as interesting.
// - We get irrelevant diagnostic pieces, e.g. in the code
// `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;`
// it places a "Storing uninitialized value" note on the `malloc` call
// (which is technically true, but irrelevant).
// If trackExpressionValue() becomes reliable, it should be applied instead
// of the manual markInteresting() calls.
if (SymbolRef OffsetSym = Offset.getAsSymbol ()) {
// If the offset is a symbolic value, iterate over its "parts" with
// `SymExpr::symbols()` and mark each of them as interesting.
// For example, if the offset is `x*4 + y` then we put interestingness onto
// the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols
// `x` and `y`.
for (SymbolRef PartSym : OffsetSym->symbols ())
BR->markInteresting (PartSym);
}
// Track back the propagation of taintedness.
if (Kind == OOB_Taint)
if (IsTaintBug) {
// If the issue that we're reporting depends on the taintedness of the
// offset, then put interestingness onto symbols that could be the origin
// of the taint.
for (SymbolRef Sym : getTaintedSymbols (ErrorState, Offset))
BR->markInteresting (Sym);
}
C.emitReport (std::move (BR));
}
Expand Down
Expand Up
@@ -476,6 +669,18 @@ bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
return UnaryOp && UnaryOp->getOpcode () == UO_AddrOf;
}
bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr (const Expr *E,
ProgramStateRef State,
NonLoc Offset, NonLoc Limit,
CheckerContext &C) {
if (isa<ArraySubscriptExpr>(E) && isInAddressOf (E, C.getASTContext ())) {
auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold (
State, Offset, Limit, C.getSValBuilder (), /* CheckEquality=*/ true );
return EqualsToThreshold && !NotEqualToThreshold;
}
return false ;
}
void ento::registerArrayBoundCheckerV2 (CheckerManager &mgr) {
mgr.registerChecker <ArrayBoundCheckerV2>();
}
Expand Down