Skip to content

Commit de25473

Browse files
committed
[analyzer] Fix comparison logic in ArrayBoundCheckerV2
The prototype checker alpha.security.ArrayBoundV2 performs two comparisons to check that in an expression like Array[Index] 0 <= Index < length(Array) holds. These comparisons are handled by almost identical logic: the inequality is first rearranged by getSimplifiedOffsets(), then evaluated with evalBinOpNN(). However the simplification used "naive" elementary mathematical schematics, but evalBinOpNN() performed the signed -> unsigned conversions described in the C/C++ standards, and this confusion led to wildly inaccurate results: false positives from the lower bound check and false negatives from the upper bound check. This commit eliminates the code duplication by moving the comparison logic into a separate function, then adds an explicit check to this unified code path, which handles the problematic case separately. In addition to this, the commit also cleans up a testcase that was demonstrating the presence of this problem. Note that while that testcase was failing with an overflow error, its actual problem was in the underflow handler logic: (0) The testcase introduces a five-element array "char a[5]" and an unknown argument "size_t len"; then evaluates "a[len+1]". (1) The underflow check tries to determine whether "len+1 < 0" holds. (2) This inequality is rearranged to "len < -1". (3) evalBinOpNN() evaluates this with the schematics of C/C++ and converts -1 to the size_t value SIZE_MAX. (4) The engine concludes that len == SIZE_MAX, because otherwise we'd have an underflow here. (5) The overflow check tries to determine whether "len+1 >= 5". (6) This inequality is rearranged to "len >= 4". (7) The engine substitutes len == SIZE_MAX and reports that we have an overflow. Differential Revision: https://reviews.llvm.org/D135375
1 parent 8b39527 commit de25473

File tree

2 files changed

+70
-87
lines changed

2 files changed

+70
-87
lines changed

clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp

Lines changed: 67 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ class RegionRawOffsetV2 {
5757
: baseRegion(nullptr), byteOffset(UnknownVal()) {}
5858

5959
public:
60-
RegionRawOffsetV2(const SubRegion* base, SVal offset)
61-
: baseRegion(base), byteOffset(offset) {}
60+
RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
61+
: baseRegion(base), byteOffset(offset) { assert(base); }
6262

6363
NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); }
6464
const SubRegion *getRegion() const { return baseRegion; }
@@ -72,19 +72,12 @@ class RegionRawOffsetV2 {
7272
};
7373
}
7474

75-
static SVal computeExtentBegin(SValBuilder &svalBuilder,
76-
const MemRegion *region) {
77-
const MemSpaceRegion *SR = region->getMemorySpace();
78-
if (SR->getKind() == MemRegion::UnknownSpaceRegionKind)
79-
return UnknownVal();
80-
else
81-
return svalBuilder.makeZeroArrayIndex();
82-
}
83-
8475
// TODO: once the constraint manager is smart enough to handle non simplified
8576
// symbolic expressions remove this function. Note that this can not be used in
8677
// the constraint manager as is, since this does not handle overflows. It is
8778
// safe to assume, however, that memory offsets will not overflow.
79+
// NOTE: callers of this function need to be aware of the effects of overflows
80+
// and signed<->unsigned conversions!
8881
static std::pair<NonLoc, nonloc::ConcreteInt>
8982
getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
9083
SValBuilder &svalBuilder) {
@@ -117,6 +110,38 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
117110
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
118111
}
119112

113+
// Evaluate the comparison Value < Threshold with the help of the custom
114+
// simplification algorithm defined for this checker. Return a pair of states,
115+
// where the first one corresponds to "value below threshold" and the second
116+
// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
117+
// the case when the evaluation fails.
118+
static std::pair<ProgramStateRef, ProgramStateRef>
119+
compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
120+
SValBuilder &SVB) {
121+
if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
122+
std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
123+
}
124+
if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
125+
QualType T = Value.getType(SVB.getContext());
126+
if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) {
127+
// In this case we reduced the bound check to a comparison of the form
128+
// (symbol or value with unsigned type) < (negative number)
129+
// which is always false. We are handling these cases separately because
130+
// evalBinOpNN can perform a signed->unsigned conversion that turns the
131+
// negative number into a huge positive value and leads to wildly
132+
// inaccurate conclusions.
133+
return {nullptr, State};
134+
}
135+
}
136+
auto BelowThreshold =
137+
SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>();
138+
139+
if (BelowThreshold)
140+
return State->assume(*BelowThreshold);
141+
142+
return {nullptr, nullptr};
143+
}
144+
120145
void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
121146
const Stmt* LoadS,
122147
CheckerContext &checkerContext) const {
@@ -139,95 +164,55 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
139164
if (!rawOffset.getRegion())
140165
return;
141166

142-
NonLoc rawOffsetVal = rawOffset.getByteOffset();
167+
NonLoc ByteOffset = rawOffset.getByteOffset();
143168

144-
// CHECK LOWER BOUND: Is byteOffset < extent begin?
145-
// If so, we are doing a load/store
146-
// before the first valid offset in the memory region.
147-
148-
SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion());
149-
150-
if (std::optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) {
151-
if (auto ConcreteNV = NV->getAs<nonloc::ConcreteInt>()) {
152-
std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
153-
getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteNV,
154-
svalBuilder);
155-
rawOffsetVal = simplifiedOffsets.first;
156-
*NV = simplifiedOffsets.second;
157-
}
169+
// CHECK LOWER BOUND
170+
const MemSpaceRegion *SR = rawOffset.getRegion()->getMemorySpace();
171+
if (!llvm::isa<UnknownSpaceRegion>(SR)) {
172+
// A pointer to UnknownSpaceRegion may point to the middle of
173+
// an allocated region.
158174

159-
SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV,
160-
svalBuilder.getConditionType());
175+
auto [state_precedesLowerBound, state_withinLowerBound] =
176+
compareValueToThreshold(state, ByteOffset,
177+
svalBuilder.makeZeroArrayIndex(), svalBuilder);
161178

162-
std::optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>();
163-
if (!lowerBoundToCheck)
164-
return;
165-
166-
ProgramStateRef state_precedesLowerBound, state_withinLowerBound;
167-
std::tie(state_precedesLowerBound, state_withinLowerBound) =
168-
state->assume(*lowerBoundToCheck);
169-
170-
// Are we constrained enough to definitely precede the lower bound?
171179
if (state_precedesLowerBound && !state_withinLowerBound) {
180+
// We know that the index definitely precedes the lower bound.
172181
reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
173182
return;
174183
}
175184

176-
// Otherwise, assume the constraint of the lower bound.
177-
assert(state_withinLowerBound);
178-
state = state_withinLowerBound;
185+
if (state_withinLowerBound)
186+
state = state_withinLowerBound;
179187
}
180188

181-
do {
182-
// CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so,
183-
// we are doing a load/store after the last valid offset.
184-
const MemRegion *MR = rawOffset.getRegion();
185-
DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder);
186-
if (!isa<NonLoc>(Size))
187-
break;
188-
189-
if (auto ConcreteSize = Size.getAs<nonloc::ConcreteInt>()) {
190-
std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
191-
getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteSize,
192-
svalBuilder);
193-
rawOffsetVal = simplifiedOffsets.first;
194-
Size = simplifiedOffsets.second;
195-
}
196-
197-
SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal,
198-
Size.castAs<NonLoc>(),
199-
svalBuilder.getConditionType());
200-
201-
std::optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>();
202-
if (!upperboundToCheck)
203-
break;
204-
205-
ProgramStateRef state_exceedsUpperBound, state_withinUpperBound;
206-
std::tie(state_exceedsUpperBound, state_withinUpperBound) =
207-
state->assume(*upperboundToCheck);
208-
209-
// If we are under constrained and the index variables are tainted, report.
210-
if (state_exceedsUpperBound && state_withinUpperBound) {
211-
SVal ByteOffset = rawOffset.getByteOffset();
189+
// CHECK UPPER BOUND
190+
DefinedOrUnknownSVal Size =
191+
getDynamicExtent(state, rawOffset.getRegion(), svalBuilder);
192+
if (auto KnownSize = Size.getAs<NonLoc>()) {
193+
auto [state_withinUpperBound, state_exceedsUpperBound] =
194+
compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
195+
196+
if (state_exceedsUpperBound) {
197+
if (!state_withinUpperBound) {
198+
// We know that the index definitely exceeds the upper bound.
199+
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
200+
return;
201+
}
212202
if (isTainted(state, ByteOffset)) {
203+
// Both cases are possible, but the index is tainted, so report.
213204
reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
214205
return;
215206
}
216-
} else if (state_exceedsUpperBound) {
217-
// If we are constrained enough to definitely exceed the upper bound,
218-
// report.
219-
assert(!state_withinUpperBound);
220-
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
221-
return;
222207
}
223208

224-
assert(state_withinUpperBound);
225-
state = state_withinUpperBound;
209+
if (state_withinUpperBound)
210+
state = state_withinUpperBound;
226211
}
227-
while (false);
228212

229213
checkerContext.addTransition(state);
230214
}
215+
231216
void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
232217
ProgramStateRef errorState,
233218
SVal TaintedSVal) const {
@@ -336,9 +321,8 @@ RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
336321
switch (region->getKind()) {
337322
default: {
338323
if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) {
339-
offset = getValue(offset, svalBuilder);
340-
if (!offset.isUnknownOrUndef())
341-
return RegionRawOffsetV2(subReg, offset);
324+
if (auto Offset = getValue(offset, svalBuilder).getAs<NonLoc>())
325+
return RegionRawOffsetV2(subReg, *Offset);
342326
}
343327
return RegionRawOffsetV2();
344328
}

clang/test/Analysis/out-of-bounds-false-positive.c renamed to clang/test/Analysis/array-bound-v2-constraint-check.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@ typedef unsigned long long size_t;
88
const char a[] = "abcd"; // extent: 5 bytes
99

1010
void symbolic_size_t_and_int0(size_t len) {
11-
// FIXME: Should not warn for this.
12-
(void)a[len + 1]; // expected-warning {{Out of bound memory access}}
11+
(void)a[len + 1]; // no-warning
1312
// We infered that the 'len' must be in a specific range to make the previous indexing valid.
1413
// len: [0,3]
15-
clang_analyzer_eval(len <= 3); // expected - warning {{TRUE}}
16-
clang_analyzer_eval(len <= 2); // expected - warning {{UNKNOWN}}
14+
clang_analyzer_eval(len <= 3); // expected-warning {{TRUE}}
15+
clang_analyzer_eval(len <= 2); // expected-warning {{UNKNOWN}}
1716
}
1817

1918
void symbolic_size_t_and_int1(size_t len) {

0 commit comments

Comments
 (0)