diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp index d35031b5c22df..6ad5acd4e76f2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -130,6 +130,20 @@ struct Messages { std::string Short, Full; }; +enum class BadOffsetKind { Negative, Overflowing, Indeterminate }; + +constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing", + "a negative or overflowing"}; +static StringRef asAdjective(BadOffsetKind Problem) { + return Adjectives[static_cast(Problem)]; +} + +constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of", + "around"}; +static StringRef asPreposition(BadOffsetKind Problem) { + return Prepositions[static_cast(Problem)]; +} + // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt` // instead of `PreStmt` because the current implementation passes the whole // expression to `CheckerContext::getSVal()` which only works after the @@ -388,18 +402,6 @@ static std::optional getConcreteValue(std::optional SV) { return SV ? getConcreteValue(*SV) : std::nullopt; } -static Messages getPrecedesMsgs(const MemSpaceRegion *Space, - const SubRegion *Region, NonLoc Offset) { - std::string RegName = getRegionName(Space, Region), OffsetStr = ""; - - if (auto ConcreteOffset = getConcreteValue(Offset)) - OffsetStr = formatv(" {0}", ConcreteOffset); - - return { - formatv("Out of bound access to memory preceding {0}", RegName), - formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)}; -} - /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if /// it can be performed (`Divisor` is nonzero and there is no remainder). The /// values `Val1` and `Val2` may be nullopt and in that case the corresponding @@ -419,10 +421,11 @@ static bool tryDividePair(std::optional &Val1, return true; } -static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, - const SubRegion *Region, NonLoc Offset, - NonLoc Extent, SVal Location, - bool AlsoMentionUnderflow) { +static Messages getNonTaintMsgs(const ASTContext &ACtx, + const MemSpaceRegion *Space, + const SubRegion *Region, NonLoc Offset, + std::optional Extent, SVal Location, + BadOffsetKind Problem) { std::string RegName = getRegionName(Space, Region); const auto *EReg = Location.getAsRegion()->getAs(); assert(EReg && "this checker only handles element access"); @@ -439,15 +442,21 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, SmallString<256> Buf; llvm::raw_svector_ostream Out(Buf); Out << "Access of "; - if (!ExtentN && !UseByteOffsets) + if (OffsetN && !ExtentN && !UseByteOffsets) { + // If the offset is reported as an index, then the report must mention the + // element type (because it is not always clear from the code). It's more + // natural to mention the element type later where the extent is described, + // but if the extent is unknown/irrelevant, then the element type can be + // inserted into the message at this point. Out << "'" << ElemType.getAsString() << "' element in "; + } Out << RegName << " at "; - if (AlsoMentionUnderflow) { - Out << "a negative or overflowing " << OffsetOrIndex; - } else if (OffsetN) { + if (OffsetN) { + if (Problem == BadOffsetKind::Negative) + Out << "negative "; Out << OffsetOrIndex << " " << *OffsetN; } else { - Out << "an overflowing " << OffsetOrIndex; + Out << asAdjective(Problem) << " " << OffsetOrIndex; } if (ExtentN) { Out << ", while it holds only "; @@ -465,8 +474,7 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, } return {formatv("Out of bound access to memory {0} {1}", - AlsoMentionUnderflow ? "around" : "after the end of", - RegName), + asPreposition(Problem), RegName), std::string(Buf)}; } @@ -635,7 +643,9 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { } else { if (!WithinLowerBound) { // ...and it cannot be valid (>= 0), so report an error. - Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset); + Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg, + ByteOffset, /*Extent=*/std::nullopt, + Location, BadOffsetKind::Negative); reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt); return; } @@ -677,9 +687,12 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { return; } + BadOffsetKind Problem = AlsoMentionUnderflow + ? BadOffsetKind::Indeterminate + : BadOffsetKind::Overflowing; Messages Msgs = - getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset, - *KnownSize, Location, AlsoMentionUnderflow); + getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset, + *KnownSize, Location, Problem); reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); return; } diff --git a/clang/test/Analysis/ArrayBound/assumption-reporting.c b/clang/test/Analysis/ArrayBound/assumption-reporting.c index 535e623baa815..bffd5d9bc35b5 100644 --- a/clang/test/Analysis/ArrayBound/assumption-reporting.c +++ b/clang/test/Analysis/ArrayBound/assumption-reporting.c @@ -70,7 +70,7 @@ int assumingUpper(int arg) { // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}} int b = TenElements[arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -99,7 +99,7 @@ int assumingUpperUnsigned(unsigned arg) { // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}} int b = TenElements[(int)arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -111,7 +111,7 @@ int assumingNothing(unsigned arg) { int a = TenElements[arg]; // no note here, we already know that 'arg' is in bounds int b = TenElements[(int)arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -145,7 +145,7 @@ int assumingConvertedToIntP(struct foo f, int arg) { // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 'f.b'}} int c = TenElements[arg-2]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b + c; } diff --git a/clang/test/Analysis/ArrayBound/verbose-tests.c b/clang/test/Analysis/ArrayBound/verbose-tests.c index 84d238ed1a2a4..e3416886d13e5 100644 --- a/clang/test/Analysis/ArrayBound/verbose-tests.c +++ b/clang/test/Analysis/ArrayBound/verbose-tests.c @@ -11,7 +11,7 @@ int TenElements[10]; void arrayUnderflow(void) { TenElements[-3] = 5; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -3}} } int underflowWithDeref(void) { @@ -19,9 +19,39 @@ int underflowWithDeref(void) { --p; return *p; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -1}} +} + +char underflowReportedAsChar(void) { + // Underflow is reported with the type of the accessed element (here 'char'), + // not the type that appears in the declaration of the original array (which + // would be 'int'). + return ((char *)TenElements)[-1]; + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'char' element in 'TenElements' at negative index -1}} } +struct TwoInts { + int a, b; +}; + +struct TwoInts underflowReportedAsStruct(void) { + // Another case where the accessed type is used for reporting the offset. + return *(struct TwoInts*)(TenElements - 4); + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'struct TwoInts' element in 'TenElements' at negative index -2}} +} + +struct TwoInts underflowOnlyByteOffset(void) { + // In this case the negative byte offset is not a multiple of the size of the + // accessed element, so the part "= -... * sizeof(type)" is omitted at the + // end of the message. + return *(struct TwoInts*)(TenElements - 3); + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}} +} + + int rng(void); int getIndex(void) { switch (rng()) { @@ -40,10 +70,10 @@ void gh86959(void) { while (rng()) TenElements[getIndex()] = 10; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -172}} } -int scanf(const char *restrict fmt, ...); +int scanf(const char *fmt, ...); void taintedIndex(void) { int index;